|
@@ -3,8 +3,19 @@
|
|
|
from PIL import Image
|
|
|
from pprint import pprint
|
|
|
|
|
|
+<<<<<<< Updated upstream
|
|
|
import sys, time, os, requests, random, json, argparse
|
|
|
import subprocess, yaml
|
|
|
+=======
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import os
|
|
|
+import requests
|
|
|
+import random
|
|
|
+import json
|
|
|
+import argparse
|
|
|
+import subprocess
|
|
|
+>>>>>>> Stashed changes
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="S0urce.io utility program.")
|
|
|
parser.add_argument("--download", help="Download Images", action="store_true")
|
|
@@ -208,6 +219,38 @@ def output_image(pix, size):
|
|
|
|
|
|
return result
|
|
|
|
|
|
+def image_filename(difficulty, index):
|
|
|
+ return f"images/{difficulty}_{index}.png"
|
|
|
+
|
|
|
+def cleaned_filename(difficulty, index):
|
|
|
+ return f"images/{difficulty}_{index}_clean.png"
|
|
|
+
|
|
|
+
|
|
|
+def image_cleaner(source, destination):
|
|
|
+ image = Image.open(source)
|
|
|
+ # pixels = image.load()
|
|
|
+ size = image.size
|
|
|
+ print(f"Size: {size[0]} x {size[1]}")
|
|
|
+
|
|
|
+ for y in range(0, size[1]):
|
|
|
+ s = ""
|
|
|
+ for x in range(0, size[0]):
|
|
|
+ (r, g, b, _) = image.getpixel( (x,y) )
|
|
|
+ high = r
|
|
|
+ if b > high:
|
|
|
+ high = b
|
|
|
+ diff = g - high
|
|
|
+ is_green = diff > GREEN_DIFF
|
|
|
+
|
|
|
+ if is_green:
|
|
|
+ image.putpixel( (x,y), (255,255,255,255) )
|
|
|
+ else:
|
|
|
+ image.putpixel( (x,y), (0, 0, 0, 255) )
|
|
|
+
|
|
|
+ # if is_set(pix, x, y):
|
|
|
+ # if not is_green(pix, x, y):
|
|
|
+ # result.append(s)
|
|
|
+ image.save(destination)
|
|
|
|
|
|
def run(difficult, index):
|
|
|
"""
|
|
@@ -220,11 +263,22 @@ def run(difficult, index):
|
|
|
Where we ask the user what the word is, and after that we save all that to a file in the data directory.
|
|
|
"""
|
|
|
|
|
|
+<<<<<<< Updated upstream
|
|
|
fname = f"images/{difficult}_{x}.png"
|
|
|
|
|
|
if not os.path.exists(fname):
|
|
|
print("Could not find '{0}'".format(fname))
|
|
|
return False # We did not complete
|
|
|
+=======
|
|
|
+ for x in range(0, 70):
|
|
|
+ fname = image_filename(difficult, x)
|
|
|
+
|
|
|
+ if not os.path.exists(fname):
|
|
|
+ # print("Could not find '{0}'".format(fname))
|
|
|
+ # continue
|
|
|
+ # We've reached the end, so stop looking. :P
|
|
|
+ break
|
|
|
+>>>>>>> Stashed changes
|
|
|
|
|
|
print(f"Loading: {fname}")
|
|
|
im = Image.open(fname)
|
|
@@ -256,7 +310,13 @@ def run(difficult, index):
|
|
|
# print(f"File '{fname}' automatically removed")
|
|
|
|
|
|
|
|
|
+<<<<<<< Updated upstream
|
|
|
def autotrain(difficult, index):
|
|
|
+=======
|
|
|
+key_word = {}
|
|
|
+
|
|
|
+def autotrain(difficult):
|
|
|
+>>>>>>> Stashed changes
|
|
|
"""
|
|
|
run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
|
|
|
those components do the following... (Each category has around 70 items so we standardize on 70, but )
|
|
@@ -267,6 +327,7 @@ def autotrain(difficult, index):
|
|
|
Where we ask the user what the word is, and after that we save all that to a file in the data directory.
|
|
|
"""
|
|
|
|
|
|
+<<<<<<< Updated upstream
|
|
|
# Re aranged the code so I can have it return after each word
|
|
|
fname = f"images/{difficult}_{x}.png"
|
|
|
|
|
@@ -288,6 +349,34 @@ def autotrain(difficult, index):
|
|
|
print(word)
|
|
|
|
|
|
return word # Save this to the dict
|
|
|
+=======
|
|
|
+ for x in range(0, 70):
|
|
|
+ fname = image_filename(difficult, x)
|
|
|
+
|
|
|
+ if not os.path.exists(fname):
|
|
|
+ break
|
|
|
+ # print("Could not find '{0}'".format(fname))
|
|
|
+ # continue
|
|
|
+
|
|
|
+ cleaned = cleaned_filename(difficult, x)
|
|
|
+ if not os.path.exists(cleaned):
|
|
|
+ image_cleaner(fname, cleaned)
|
|
|
+
|
|
|
+ print(f"Loading: {cleaned}")
|
|
|
+ fileout = "data/{0}_{1}".format(difficult, x)
|
|
|
+
|
|
|
+ output = subprocess.run(
|
|
|
+ ["tesseract", cleaned, fileout],
|
|
|
+ stderr=subprocess.DEVNULL,
|
|
|
+ # capture_output=False,
|
|
|
+ shell=False,
|
|
|
+ )
|
|
|
+
|
|
|
+ with open(fileout + ".txt", "r") as fp:
|
|
|
+ word = fp.read().strip().lower()
|
|
|
+ key_word[f'{difficult}_{x}'] = word
|
|
|
+ print(word)
|
|
|
+>>>>>>> Stashed changes
|
|
|
|
|
|
|
|
|
# Now to call all the previous functions
|
|
@@ -315,6 +404,7 @@ if args.train:
|
|
|
# Img Processing: Run thru every single category and every single word
|
|
|
wordDict = {}
|
|
|
for level in ["e", "m", "h"]:
|
|
|
+<<<<<<< Updated upstream
|
|
|
for x in range(0, 66):
|
|
|
at = autotrain(level, x)
|
|
|
if(at != False): # If it is complete store it
|
|
@@ -322,6 +412,11 @@ if args.train:
|
|
|
|
|
|
with open('words.yml', 'w') as f:
|
|
|
yaml.dump(wordDict, f) # Writes it automatically into the file
|
|
|
+=======
|
|
|
+ autotrain(level)
|
|
|
+ with open(args.JSON, 'w') as fp:
|
|
|
+ json.dump(key_word, fp, sort_keys=True, indent=2)
|
|
|
+>>>>>>> Stashed changes
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------
|
|
|
# All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
|