|
@@ -4,7 +4,7 @@ from PIL import Image
|
|
|
from pprint import pprint
|
|
|
|
|
|
import sys, time, os, requests, random, json, argparse
|
|
|
-import subprocess
|
|
|
+import subprocess, yaml
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="S0urce.io utility program.")
|
|
|
parser.add_argument("--download", help="Download Images", action="store_true")
|
|
@@ -55,6 +55,9 @@ if not os.path.exists("images"):
|
|
|
if not os.path.exists("data"):
|
|
|
os.mkdir("data")
|
|
|
|
|
|
+if not os.path.exists("words.yml"):
|
|
|
+ with open('words.yml', 'w') as f: # Create a empty yaml file
|
|
|
+ f.write('')
|
|
|
|
|
|
def download(howhard, index):
|
|
|
global sess
|
|
@@ -206,7 +209,7 @@ def output_image(pix, size):
|
|
|
return result
|
|
|
|
|
|
|
|
|
-def run(difficult):
|
|
|
+def run(difficult, index):
|
|
|
"""
|
|
|
run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
|
|
|
those components do the following... (Each category has around 70 items so we standardize on 70, but )
|
|
@@ -217,44 +220,43 @@ def run(difficult):
|
|
|
Where we ask the user what the word is, and after that we save all that to a file in the data directory.
|
|
|
"""
|
|
|
|
|
|
- for x in range(0, 70):
|
|
|
- fname = f"images/{difficult}_{x}.png"
|
|
|
+ fname = f"images/{difficult}_{x}.png"
|
|
|
|
|
|
- if not os.path.exists(fname):
|
|
|
- print("Could not find '{0}'".format(fname))
|
|
|
- continue
|
|
|
+ if not os.path.exists(fname):
|
|
|
+ print("Could not find '{0}'".format(fname))
|
|
|
+ return False # We did not complete
|
|
|
|
|
|
- print(f"Loading: {fname}")
|
|
|
- im = Image.open(fname)
|
|
|
- pix = im.load()
|
|
|
- size = im.size
|
|
|
- print(f"Size: {size[0]} x {size[1]}")
|
|
|
+ print(f"Loading: {fname}")
|
|
|
+ im = Image.open(fname)
|
|
|
+ pix = im.load()
|
|
|
+ size = im.size
|
|
|
+ print(f"Size: {size[0]} x {size[1]}")
|
|
|
|
|
|
- pal = im.getpalette()
|
|
|
+ pal = im.getpalette()
|
|
|
|
|
|
- sx = 0
|
|
|
- ex = size[0]
|
|
|
- sy = 0
|
|
|
- ey = size[1]
|
|
|
- total = 0
|
|
|
+ sx = 0
|
|
|
+ ex = size[0]
|
|
|
+ sy = 0
|
|
|
+ ey = size[1]
|
|
|
+ total = 0
|
|
|
|
|
|
- sx, sy, ex, ey, total = scan_img(pix, size)
|
|
|
+ sx, sy, ex, ey, total = scan_img(pix, size)
|
|
|
|
|
|
- print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
|
|
|
+ print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
|
|
|
|
|
|
- img_s = output_image(pix, size)
|
|
|
- for l in img_s:
|
|
|
- print(l)
|
|
|
- word = input("Word: ")
|
|
|
- with open(f"{DIR}/{difficult}_{x}.txt", "w") as f:
|
|
|
- f.write("{0}\n".format(word))
|
|
|
+ img_s = output_image(pix, size)
|
|
|
+ for l in img_s:
|
|
|
+ print(l)
|
|
|
+ word = input("Word: ")
|
|
|
+ # Returns word so it can be stored in dictonary
|
|
|
+ return word
|
|
|
|
|
|
- print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
|
|
|
- # os.remove(f'{fname}') # Grr No bad bean, keep file for error checking
|
|
|
- # print(f"File '{fname}' automatically removed")
|
|
|
+ #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
|
|
|
+ # os.remove(f'{fname}') # Grr No bad bean, keep file for error checking
|
|
|
+ # print(f"File '{fname}' automatically removed")
|
|
|
|
|
|
|
|
|
-def autotrain(difficult):
|
|
|
+def autotrain(difficult, index):
|
|
|
"""
|
|
|
run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
|
|
|
those components do the following... (Each category has around 70 items so we standardize on 70, but )
|
|
@@ -265,25 +267,27 @@ def autotrain(difficult):
|
|
|
Where we ask the user what the word is, and after that we save all that to a file in the data directory.
|
|
|
"""
|
|
|
|
|
|
- for x in range(0, 70):
|
|
|
- fname = f"images/{difficult}_{x}.png"
|
|
|
+ # Re aranged the code so I can have it return after each word
|
|
|
+ fname = f"images/{difficult}_{x}.png"
|
|
|
|
|
|
- if not os.path.exists(fname):
|
|
|
- print("Could not find '{0}'".format(fname))
|
|
|
- continue
|
|
|
+ if not os.path.exists(fname):
|
|
|
+ print("Could not find '{0}'".format(fname))
|
|
|
+ return False # We did not complete
|
|
|
|
|
|
- print(f"Loading: {fname}")
|
|
|
- fileout = "data/{0}_{1}".format(difficult, x)
|
|
|
- output = subprocess.run(
|
|
|
- ["tesseract", fname, fileout],
|
|
|
- stderr=subprocess.DEVNULL,
|
|
|
- # capture_output=False,
|
|
|
- shell=False,
|
|
|
- )
|
|
|
+ print(f"Loading: {fname}")
|
|
|
+ fileout = "data/{0}_{1}".format(difficult, x)
|
|
|
+ output = subprocess.run(
|
|
|
+ ["tesseract", fname, fileout],
|
|
|
+ stderr=subprocess.DEVNULL,
|
|
|
+ # capture_output=False,
|
|
|
+ shell=False,
|
|
|
+ )
|
|
|
|
|
|
- with open(fileout + ".txt", "r") as fp:
|
|
|
- word = fp.read().strip()
|
|
|
- print(word)
|
|
|
+ with open(fileout + ".txt", "r") as fp:
|
|
|
+ word = fp.read().strip()
|
|
|
+ print(word)
|
|
|
+
|
|
|
+ return word # Save this to the dict
|
|
|
|
|
|
|
|
|
# Now to call all the previous functions
|
|
@@ -308,9 +312,16 @@ if args.download:
|
|
|
# time.sleep(random.randint(10, 15))
|
|
|
|
|
|
if args.train:
|
|
|
- # Img Processing
|
|
|
+ # Img Processing: Run thru every single category and every single word
|
|
|
+ wordDict = {}
|
|
|
for level in ["e", "m", "h"]:
|
|
|
- autotrain(level)
|
|
|
+ for x in range(0, 66):
|
|
|
+ at = autotrain(level, x)
|
|
|
+ if(at != False): # If it is complete store it
|
|
|
+ wordDict["{0}_{1}".format(level, x)] = at
|
|
|
+
|
|
|
+ with open('words.yml', 'w') as f:
|
|
|
+ yaml.dump(wordDict, f) # Writes it automatically into the file
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------
|
|
|
# All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
|