|
@@ -4,6 +4,7 @@ from PIL import Image
|
|
|
from pprint import pprint
|
|
|
|
|
|
import sys, time, os, requests, random, json, argparse
|
|
|
+import subprocess
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="S0urce.io utility program.")
|
|
|
parser.add_argument("--download", help="Download Images", action="store_true")
|
|
@@ -14,10 +15,6 @@ parser.add_argument(
|
|
|
args = parser.parse_args()
|
|
|
# pprint(args)
|
|
|
|
|
|
-# Should we spend the time to download image, and process it? (True = Yes, False = No)
|
|
|
-# DOWNLOAD = False
|
|
|
-DOWNLOAD = args.download
|
|
|
-
|
|
|
# Should we add the JSON in a file? (True is filename, False = do not do)
|
|
|
# JSONME = 'test.js'
|
|
|
JSONME = args.JSON
|
|
@@ -257,8 +254,40 @@ def run(difficult):
|
|
|
# print(f"File '{fname}' automatically removed")
|
|
|
|
|
|
|
|
|
+def autotrain(difficult):
|
|
|
+ """
|
|
|
+ run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
|
|
|
+ those components do the following... (Each category has around 70 items so we standardize on 70, but )
|
|
|
+ (not all of the categories have 70 and thus we print a File does not exist)
|
|
|
+ We open and load the image, and get it's size,
|
|
|
+ then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
|
|
|
+ then from that we output the image line by line onto the screen after it has been output_image d into list form,
|
|
|
+ Where we ask the user what the word is, and after that we save all that to a file in the data directory.
|
|
|
+ """
|
|
|
+
|
|
|
+ for x in range(0, 70):
|
|
|
+ fname = f"in/{difficult}_{x}.png"
|
|
|
+
|
|
|
+ if not os.path.exists(fname):
|
|
|
+ print("Could not find '{0}'".format(fname))
|
|
|
+ continue
|
|
|
+
|
|
|
+ print(f"Loading: {fname}")
|
|
|
+ fileout = "data/{0}_{1}".format(difficult, x)
|
|
|
+ output = subprocess.run(
|
|
|
+ ["tesseract", fname, fileout],
|
|
|
+ stderr=subprocess.DEVNULL,
|
|
|
+ # capture_output=False,
|
|
|
+ shell=False,
|
|
|
+ )
|
|
|
+
|
|
|
+ with open(fileout + ".txt", "r") as fp:
|
|
|
+ word = fp.read().strip()
|
|
|
+ print(word)
|
|
|
+
|
|
|
+
|
|
|
# Now to call all the previous functions
|
|
|
-if DOWNLOAD == True:
|
|
|
+if args.download:
|
|
|
print("Downloading s0urce.io Words")
|
|
|
print("EASY")
|
|
|
# time.sleep(5)
|
|
@@ -280,9 +309,8 @@ if DOWNLOAD == True:
|
|
|
|
|
|
if args.train:
|
|
|
# Img Processing
|
|
|
- run("e") # Answer the questions
|
|
|
- run("m")
|
|
|
- run("h")
|
|
|
+ for level in ["e", "m", "h"]:
|
|
|
+ autotrain(level)
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------
|
|
|
# All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
|