123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474 |
- #!/usr/bin/env python3
- from PIL import Image
- from pprint import pprint
- import sys
- import time
- import os
- import requests
- import random
- import json
- import argparse
- import subprocess
- import imager
- parser = argparse.ArgumentParser(description="S0urce.io utility program.")
- parser.add_argument("--download", help="Download Images", action="store_true")
- parser.add_argument("--train", help="Convert Images to Text", action="store_true")
- parser.add_argument("--update", help="Update s0urce.js script", action="store_true")
- parser.add_argument(
- "JSON", type=str, nargs="?", help="Filename to save results", default="test.js"
- )
- args = parser.parse_args()
- # pprint(args)
- # Should we add the JSON in a file? (True is filename, False = do not do)
- # JSONME = 'test.js'
- JSONME = args.JSON
- # NOTE: To begin the insert of the JSONIFIED image and word its
- # // T
- # A JS comment with a uppercase T
- # To stop its
- # // t
- # A JS comment with a lowercase t
- # httpbin.org/headers
- sess = requests.Session()
- head = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
- }
- sess.headers.update(head)
- ON = "X" # Dark pixel in an image
- OFF = "." # Light pixel in an image
- DIR = (
- "data"
- ) # Data directory name, do we really need this? Is it really going to change?
- INTENSITY = (
- 75
- ) # How bright does something have to be to trigger it being a dark or light pixel?
- # Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.
- GREEN_DIFF = 10
- # How much brighter the green channel must be (compared to the others),
- # to be called green.
- # Check the environment, do we have all that we need?
- if not os.path.exists("images"):
- os.mkdir("images")
- if not os.path.exists("data"):
- os.mkdir("data")
- if not os.path.exists("words.yml"):
- with open('words.yml', 'w') as f: # Create a empty yaml file
- f.write('')
- def download(howhard, index):
- global sess
- """
- Download an image based upon how hard it is.
- On success, it saves the image file.
- Failure raises ConnectionError.
- """
- r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
- if r.status_code == 200:
- with open(f"images/{howhard}_{index}.png", "wb") as f:
- f.write(r.content)
- if os.path.exists(f"images/{howhard}_{index}_clean.png"):
- os.remove(f"images/{howhard}_{index}_clean.png")
- else:
- # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
- # print( f'{howhard}_{index}.png ' + str(r.status_code) )
- raise ConnectionError(
- "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
- howhard, index, r.status_code
- )
- )
- def img_point(pix, x, y):
- """
- img_point, returns a pixel of an image,
- given the x and y on the image.
- """
- return pix[x, y]
- def img_avg(pix, x, y):
- """
- img_avg, returns the average brightness 0-255,
- given pixel, and the x and y on the image calls img_point,
- to get the individual rgb values to calculate,
- brightness. (Grey scale)
- """
- rgb = img_point(pix, x, y)
- # if(im.mode == 'P'):
- # rgb = pal[rgb*3:(rgb+1)*3]
- # if(im.mode == 'I'):
- # return rgb >> 8
- return int((rgb[0] + rgb[1] + rgb[2]) / 3)
- def is_set(pix, x, y):
- global INTENSITY
- """
- is_set, returns True or False of calculating,
- the brightness of the given point on a image,
- compared to given intensity.
-
- True means the brightness at the given x and y,
- is Less Than which means its dark.
-
- False means the brightness at the given x and y,
- is Greater Than which means its bright. (Grey Scale)
- """
- avg = img_avg(pix, x, y)
- return avg < INTENSITY
- def is_green(pix, x, y):
- """
- Is this pixel Green?
- """
- (red, green, blue, _) = img_point(pix, x, y)
- # Find the difference between green and the other values.
- other = red
- if blue > other:
- other = blue
- diff = green - other
- return diff > GREEN_DIFF
- def scan_img(pix, size):
- """
- scan_img, looks at a image and looks for dark pixels,
- if it is a dark pixel record the number and resize the,
- returned values to show where the most dark pixels on the,
- image are located. (Grey Scale)
-
- given pixel, and image size.
- returns start x, y and end x, y and total number of dark pixels.
- """
- total = 0
- sx = size[0]
- ex = 0
- sy = size[1]
- ey = 0
- for y in range(0, size[1]):
- for x in range(0, size[0]):
- pnt_is = is_set(pix, x, y)
- if pnt_is:
- total += 1
- if x < sx:
- sx = x
- if x > ex:
- ex = x
- if y < sy:
- sy = y
- if y > ey:
- ey = y
- # print (sx,ex,sy,ey)
- # give us a little border to work with
- if sx > 0:
- sx -= 1
- if ex < size[0]:
- ex += 1
- if sy > 0:
- sy -= 1
- if ey < size[1]:
- ey += 1
- # print (sx,ex,sy,ey)
- return (sx, sy, ex, ey, total)
- def output_image(pix, size):
- """
- For the size of the area we have reduced down to where the majority of dark pixels,
- are located, store all that into a list and return the list.
-
- given pixel for function passing.
- returns multiple strings in a list that are edited to use characters to represent,
- the dark and light pixels of the image. (Grey Scale)
- """
- result = []
- ex = size[0]
- sx = 0
- ey = size[1]
- sy = 0
- for y in range(sy, ey):
- s = ""
- for x in range(sx, ex):
- # if is_set(pix, x, y):
- if not is_green(pix, x, y):
- s += ON
- else:
- s += OFF
- result.append(s)
- return result
- def image_filename(difficulty, index):
- return f"images/{difficulty}_{index}.png"
- def cleaned_filename(difficulty, index):
- return f"images/{difficulty}_{index}_clean.png"
- def cleaner_filename(difficulty, index):
- return f"images/{difficulty}_{index}_cleaner.png"
- def image_cleaner(source, destination):
- image = Image.open(source)
- # pixels = image.load()
- size = image.size
- #print(f"Size: {size[0]} x {size[1]}")
- for y in range(0, size[1]):
- s = ""
- for x in range(0, size[0]):
- (r, g, b, _) = image.getpixel( (x,y) )
- high = r
- if b > high:
- high = b
- diff = g - high
- is_green = diff > 0 # GREEN_DIFF
- if is_green:
- image.putpixel( (x,y), (255,255,255,255) )
- else:
- image.putpixel( (x,y), (0, 0, 0, 255) )
- # if is_set(pix, x, y):
- # if not is_green(pix, x, y):
- # result.append(s)
- image.save(destination)
- def run(difficult, index):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- # print("Could not find '{0}'".format(fname))
- # continue
- # We've reached the end, so stop looking. :P
- break
- print(f"Loading: {fname}")
- im = Image.open(fname)
- pix = im.load()
- size = im.size
- print(f"Size: {size[0]} x {size[1]}")
- pal = im.getpalette()
- sx = 0
- ex = size[0]
- sy = 0
- ey = size[1]
- total = 0
- sx, sy, ex, ey, total = scan_img(pix, size)
- print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
- img_s = output_image(pix, size)
- for l in img_s:
- print(l)
- word = input("Word: ")
- # Returns word so it can be stored in dictonary
- return word
- #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
- # os.remove(f'{fname}') # Grr No bad bean, keep file for error checking
- # print(f"File '{fname}' automatically removed")
- key_word = {}
- def autotrain(difficult):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- break
- # print("Could not find '{0}'".format(fname))
- # continue
- cleaned = cleaned_filename(difficult, x)
- if not os.path.exists(cleaned):
- imager.image_cleaner(fname, cleaned)
- print(f"Loading: {cleaned} ", end='')
- fileout = "data/{0}_{1}".format(difficult, x)
- output = subprocess.run(
- ["tesseract", cleaned, fileout],
- stderr=subprocess.DEVNULL,
- # capture_output=False,
- shell=False,
- )
- with open(fileout + ".txt", "r") as fp:
- word = fp.read().strip().lower()
- key_word[f'{difficult}_{x}'] = word
- print(word)
- # Now to call all the previous functions
- if args.download:
- print("Downloading s0urce.io Words")
- print("EASY")
- # time.sleep(5)
- for e in range(0, 62):
- download("e", e)
- # time.sleep(random.randint(10, 15))
- print("MEDIUM")
- # time.sleep(5)
- for m in range(0, 66):
- download("m", m)
- # time.sleep(random.randint(10, 15))
- print("HARD")
- # time.sleep(5)
- for h in range(0, 55):
- download("h", h)
- # time.sleep(random.randint(10, 15))
- if args.train:
- # Img Processing: Run thru every single category and every single word
- wordDict = {}
- for level in ["e", "m", "h"]:
- autotrain(level)
- with open(args.JSON, 'w') as fp:
- json.dump(key_word, fp, sort_keys=True, indent=2)
- if args.update:
- with open(args.JSON, 'r') as fp:
- key_word = json.load(fp)
- # update the s0urce.js script
- filename = 's0urce.js'
- with open(filename, 'r') as fp:
- lines = fp.readlines()
- # Lines are now in memory. Time to update!
- for i in range(0, len(lines)):
- if 'http://s0urce.io/client/img/word/' in lines[i]:
- # This is a target line, so:
- l = lines[i].strip().strip(':').strip('"')
- parts = l.split('/')
- dif = parts[-2]
- index = parts[-1]
- key = f'{dif}_{index}'
- pprint(parts)
- pprint(key)
- word = key_word[key]
- lines[i+1] = f' form.value = "{word}";' + "\n" # break;\n" # You may need it... or may not.
- with open(filename, 'w') as fp:
- for line in lines:
- fp.write(line)
- # ----------------------------------------------------------------------------------------
- # All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
- # Do we really need to worry about all this right now? (I think we have enough bugs to begin with.)
- JSONME = "false" # Do not execute
- if JSONME.lower() != "false":
- print("Now exporting to JSON")
- print(f"Targeting file: '{JSONME}'")
- time.sleep(5)
- def test(t):
- global DIR
- """
- given the filename, we read it and add it to a list and return the list.
- """
- fname = f"{DIR}/{t}.txt"
- r = []
- try:
- with open(fname, "r") as f:
- for l in f:
- r.append(l.strip())
- return r
- except FileNotFoundError:
- return None
- def insertJS(item):
- global JSON
- """
- Edits the file given and adds the JSONIFIED item to the file between 2 indicators,
- // T
- and
- // t
- In between the T and t will be replaced with the item.
- """
- item = json.dumps(item)
- item = f"{item},"
- r = []
- try:
- with open(f"{JSONME}", "r") as f:
- for l in f:
- if l != "":
- r.append(l.strip("\n"))
- else:
- r.append("")
- except FileNotFoundError:
- print(f"File {JSONME} Not Found!")
- sys.exit()
- c = 0
- for e in r:
- if "// T" == e:
- temp = r[c + 1]
- del r[c + 1]
- r.insert(c + 1, item)
- r.insert(c + 2, temp)
- elif "// t" == e:
- break
- c += 1
- with open(f"{JSONME}", "w") as f:
- for e in r:
- f.write(f"{e}\n")
- for x in range(0, 183):
- te = test(x)
- if te != None:
- word = te
- insertJS(word)
- # Regardless what we did let the user know we at least ran and we are now done
- print("Complete")
|