Browse Source

Moved all words to words.yml

david 5 năm trước cách đây
mục cha
commit
d833acb531
3 tập tin đã thay đổi với 244 bổ sung49 xóa
  1. 2 1
      TODO.md
  2. 59 48
      dataLoad.py
  3. 183 0
      words.yml

+ 2 - 1
TODO.md

@@ -11,4 +11,5 @@
 * Change from `global` to pass around (COMPLETE)
 * Change `save_image()` to `output_image()` (COMPLETE)
 * Refactor variable pass around (COMPLETE)
-* Move all output to some form of storage in 1 file
+* Move all output to yaml file (COMPLETE)
+* Clean up the JSONME section that is currently disabled

+ 59 - 48
dataLoad.py

@@ -4,7 +4,7 @@ from PIL import Image
 from pprint import pprint
 
 import sys, time, os, requests, random, json, argparse
-import subprocess
+import subprocess, yaml
 
 parser = argparse.ArgumentParser(description="S0urce.io utility program.")
 parser.add_argument("--download", help="Download Images", action="store_true")
@@ -55,6 +55,9 @@ if not os.path.exists("images"):
 if not os.path.exists("data"):
     os.mkdir("data")
 
+if not os.path.exists("words.yml"):
+    with open('words.yml', 'w') as f: # Create a empty yaml file
+        f.write('')
 
 def download(howhard, index):
     global sess
@@ -206,7 +209,7 @@ def output_image(pix, size):
     return result
 
 
-def run(difficult):
+def run(difficult, index):
     """
     run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
     those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
@@ -217,44 +220,43 @@ def run(difficult):
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     """
 
-    for x in range(0, 70):
-        fname = f"images/{difficult}_{x}.png"
+    fname = f"images/{difficult}_{x}.png"
 
-        if not os.path.exists(fname):
-            print("Could not find '{0}'".format(fname))
-            continue
+    if not os.path.exists(fname):
+        print("Could not find '{0}'".format(fname))
+        return False # We did not complete
 
-        print(f"Loading: {fname}")
-        im = Image.open(fname)
-        pix = im.load()
-        size = im.size
-        print(f"Size: {size[0]} x {size[1]}")
+    print(f"Loading: {fname}")
+    im = Image.open(fname)
+    pix = im.load()
+    size = im.size
+    print(f"Size: {size[0]} x {size[1]}")
 
-        pal = im.getpalette()
+    pal = im.getpalette()
 
-        sx = 0
-        ex = size[0]
-        sy = 0
-        ey = size[1]
-        total = 0
+    sx = 0
+    ex = size[0]
+    sy = 0
+    ey = size[1]
+    total = 0
 
-        sx, sy, ex, ey, total = scan_img(pix, size)
+    sx, sy, ex, ey, total = scan_img(pix, size)
 
-        print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
+    print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
 
-        img_s = output_image(pix, size)
-        for l in img_s:
-            print(l)
-        word = input("Word: ")
-        with open(f"{DIR}/{difficult}_{x}.txt", "w") as f:
-            f.write("{0}\n".format(word))
+    img_s = output_image(pix, size)
+    for l in img_s:
+        print(l)
+    word = input("Word: ")
+    # Returns word so it can be stored in dictonary
+    return word
 
-        print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
-        # os.remove(f'{fname}')  # Grr No bad bean, keep file for error checking
-        # print(f"File '{fname}' automatically removed")
+    #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
+    # os.remove(f'{fname}')  # Grr No bad bean, keep file for error checking
+    # print(f"File '{fname}' automatically removed")
 
 
-def autotrain(difficult):
+def autotrain(difficult, index):
     """
     run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
     those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
@@ -265,25 +267,27 @@ def autotrain(difficult):
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     """
 
-    for x in range(0, 70):
-        fname = f"images/{difficult}_{x}.png"
+    # Re aranged the code so I can have it return after each word
+    fname = f"images/{difficult}_{x}.png"
 
-        if not os.path.exists(fname):
-            print("Could not find '{0}'".format(fname))
-            continue
+    if not os.path.exists(fname):
+        print("Could not find '{0}'".format(fname))
+        return False # We did not complete
 
-        print(f"Loading: {fname}")
-        fileout = "data/{0}_{1}".format(difficult, x)
-        output = subprocess.run(
-            ["tesseract", fname, fileout],
-            stderr=subprocess.DEVNULL,
-            # capture_output=False,
-            shell=False,
-        )
+    print(f"Loading: {fname}")
+    fileout = "data/{0}_{1}".format(difficult, x)
+    output = subprocess.run(
+        ["tesseract", fname, fileout],
+        stderr=subprocess.DEVNULL,
+        # capture_output=False,
+        shell=False,
+    )
 
-        with open(fileout + ".txt", "r") as fp:
-            word = fp.read().strip()
-            print(word)
+    with open(fileout + ".txt", "r") as fp:
+        word = fp.read().strip()
+        print(word)
+
+    return word # Save this to the dict
 
 
 # Now to call all the previous functions
@@ -308,9 +312,16 @@ if args.download:
         # time.sleep(random.randint(10, 15))
 
 if args.train:
-    # Img Processing
+    # Img Processing: Run thru every single category and every single word
+    wordDict = {}
     for level in ["e", "m", "h"]:
-        autotrain(level)
+        for x in range(0, 66):
+            at = autotrain(level, x) 
+            if(at != False): # If it is complete store it
+                wordDict["{0}_{1}".format(level, x)] = at
+
+    with open('words.yml', 'w') as f:
+        yaml.dump(wordDict, f) # Writes it automatically into the file
 
 # ----------------------------------------------------------------------------------------
 # All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!

+ 183 - 0
words.yml

@@ -0,0 +1,183 @@
+e_0: anon
+e_1: buffer
+e_10: type
+e_11: host
+e_12: ping
+e_13: bytes
+e_14: into
+e_15: set
+e_16: gookies
+e_17: file
+e_18: add
+e_19: urd
+e_2: com
+e_20: add
+e_21: antel.
+e_22: yoot
+e_23: ''
+e_24: ''
+e_25: FOL
+e_26: ten
+e_27: write
+e_28: elient
+e_29: port
+e_3: reset
+e_30: key
+e_31: uplcad
+e_32: part
+e_33: qhost
+e_34: pass
+e_35: handle
+e_36: size
+e_37: bit
+e_38: global
+e_39: Licat
+e_4: pum
+e_40: ''
+e_41: stat
+e_42: net
+e_43: Temove
+e_44: delete
+e_45: user
+e_46: domain
+e_47: Signal
+e_48: Call
+e_49: socket
+e_5: ''
+e_50: log
+e_51: ''
+e_52: ain
+e_53: anit
+e_54: event
+e_55: List
+e_56: emit
+e_57: Loop
+e_58: data
+e_59: load
+e_6: status
+e_60: get
+e_61: poly
+e_7: point
+e_8: send
+e_9: cqount
+h_0: isadreqgisterlist
+h_1: decryptdatabatch
+h_10: blockthreat
+h_11: wordcounter
+h_12: removecidcookie
+h_13: calilmodule
+h_14: patcheventiog
+h_15: setnerproxy
+h_16: ghnostfilesystem
+h_17: rootcookieset
+h_18: ''
+h_19: encodenewiolder
+h_2: getfirewaillchannel
+h_20: systemportkey
+h_21: encryptunpackedbatch
+h_22: respondertimesut
+h_23: uploaduserstats
+h_24: emitconfigiist
+h_25: channel setpackage
+h_26: systemgridtype
+h_27: butiferpingset
+h_28: exportconfigpackage
+h_29: getpartoffile
+h_3: deleteallids
+h_30: createnewsocket
+h_31: eventlistdir
+h_32: unpacktnpfite
+h_33: disconnectserver
+h_34: loadilcggedpassword
+h_35: dodecahedron
+h_36: generatecoderack
+h_37: checkhttptype
+h_38: Greatetaxisvector
+h_39: batchallfiles
+h_4: chancepassword
+h_40: Foinnetworkclirent
+h_41: getrmiprotecol
+h_42: hostnewserver
+h_43: oreatenewpoackage
+h_44: sendintelpnass
+h_45: getdatapassword
+h_46: destrovbatch
+h_47: httpbutiersize
+h_48: changeusername
+h_49: ineludedirectory
+h_5: tempdatapass
+h_50: disconnectchannel
+h_51: removenewoookie
+h_52: statusofprocess
+h_53: loadaltevent
+h_54: fileexpressiog
+h_6: GreateZzaxisvector
+h_7: sizeofhexadon
+h_8: getmysqidomein
+h_9: ecreatefilethread
+m_0: sizeck
+m_1: Droxy
+m_10: gqridwiadth
+m_11: mysal
+m_12: userport
+m_13: decrypt
+m_14: Listcontig
+m_15: getping
+m_16: server
+m_17: findpackage
+m_18: channel
+m_19: encryptfile
+m_2: newline
+m_20: filedixr
+m_21: serverproxy
+m_22: account
+m_23: package
+m_24: length
+m_25: uricheck
+m_26: disconnect
+m_27: constructor
+m_28: getilag
+m_29: username
+m_3: loadbytes
+m_30: protocol
+m_31: response
+m_32: setping
+m_33: eventtype
+m_34: writetile
+m_35: filetype
+m_36: aqridheicht
+m_37: password
+m_38: accountname
+m_39: threat
+m_4: Fat hari
+m_40: number
+m_41: getid
+m_42: getinfo
+m_43: syscall
+m_44: command
+m_45: generate
+m_46: setstats
+m_47: newhost
+m_48: module
+m_49: getkey
+m_5: getoass
+m_50: qettite
+m_51: setport
+m_52: datatype
+m_53: thread
+m_54: hexacon
+m_55: decrypttite
+m_56: setnewid
+m_57: process
+m_58: setdoockie
+m_59: contig
+m_6: download
+m_60: hostserver
+m_61: export
+m_62: encode
+m_63: enerypt
+m_64: newserver
+m_65: userid
+m_7: connect
+m_8: responder
+m_9: veotoxr