Prechádzať zdrojové kódy

Add --train, look for mean green pixels.

Steve Thielemann 5 rokov pred
rodič
commit
5cbb74de7d
1 zmenil súbory, kde vykonal 118 pridanie a 75 odobranie
  1. 118 75
      dataLoad.py

+ 118 - 75
dataLoad.py

@@ -5,11 +5,14 @@ from pprint import pprint
 
 import sys, time, os, requests, random, json, argparse
 
-parser = argparse.ArgumentParser(description='Unknown program that breaks in strange ways.')
+parser = argparse.ArgumentParser(description="S0urce.io utility program.")
 parser.add_argument("--download", help="Download Images", action="store_true")
-parser.add_argument("JSON", type=str, nargs="?", help="Filename to save results", default="test.js")
+parser.add_argument("--train", help="Convert Images to Text", action="store_true")
+parser.add_argument(
+    "JSON", type=str, nargs="?", help="Filename to save results", default="test.js"
+)
 args = parser.parse_args()
-pprint(args)
+# pprint(args)
 
 # Should we spend the time to download image, and process it? (True = Yes, False = No)
 # DOWNLOAD = False
@@ -28,40 +31,55 @@ JSONME = args.JSON
 
 # httpbin.org/headers
 sess = requests.Session()
-head = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
+head = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
+}
 sess.headers.update(head)
 
-ON = 'X'      # Dark pixel in an image
-OFF = '.'     # Light pixel in an image
-DIR = 'data'  # Data directory name, do we really need this? Is it really going to change?
+ON = "X"  # Dark pixel in an image
+OFF = "."  # Light pixel in an image
+DIR = (
+    "data"
+)  # Data directory name, do we really need this? Is it really going to change?
 
-INTENSITY = 75 # How bright does something have to be to trigger it being a dark or light pixel?
+INTENSITY = (
+    75
+)  # How bright does something have to be to trigger it being a dark or light pixel?
 # Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.
 
+GREEN_DIFF = 10
+# How much brighter the green channel must be (compared to the others),
+# to be called green.
+
 # Check the environment, do we have all that we need?
-if not os.path.exists('in'):
-    os.mkdir('in')
+if not os.path.exists("in"):
+    os.mkdir("in")
+
+if not os.path.exists("data"):
+    os.mkdir("data")
 
-if not os.path.exists('data'):
-    os.mkdir('data')
 
 def download(howhard, index):
     global sess
     """
     Download an image based upon how hard it is.
 
-    You print out the return value.  Are you returning anything useful??
-    (Ah, yeah I could just have it print out the status code when there is a error,
-     Which would get rid of the need for it to return anything)
+    On success, it saves the image file.
+    Failure raises ConnectionError.
     """
-    r = sess.get(f'http://s0urce.io/client/img/word/{howhard}/{index}')
-    if(r.status_code == 200):
-        with open(f'in/{howhard}_{index}.png', 'wb') as f:
+    r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
+    if r.status_code == 200:
+        with open(f"in/{howhard}_{index}.png", "wb") as f:
             f.write(r.content)
     else:
         # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
-        #print( f'{howhard}_{index}.png ' + str(r.status_code) )
-        raise ConnectionError('http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}'.format(howhard, index, r.status_code))
+        # print( f'{howhard}_{index}.png ' + str(r.status_code) )
+        raise ConnectionError(
+            "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
+                howhard, index, r.status_code
+            )
+        )
+
 
 def img_point(pix, x, y):
     """
@@ -70,6 +88,7 @@ def img_point(pix, x, y):
     """
     return pix[x, y]
 
+
 def img_avg(pix, x, y):
     """
     img_avg, returns the average brightness 0-255,
@@ -77,14 +96,15 @@ def img_avg(pix, x, y):
     to get the individual rgb values to calculate,
     brightness. (Grey scale)
     """
-    rgb = img_point(pix, x,y)
-    #if(im.mode == 'P'):
+    rgb = img_point(pix, x, y)
+    # if(im.mode == 'P'):
     #    rgb = pal[rgb*3:(rgb+1)*3]
 
-    #if(im.mode == 'I'):
+    # if(im.mode == 'I'):
     #    return rgb >> 8
 
-    return int( ( rgb[0] + rgb[1] + rgb[2] ) / 3 )
+    return int((rgb[0] + rgb[1] + rgb[2]) / 3)
+
 
 def is_set(pix, x, y):
     global INTENSITY
@@ -99,8 +119,22 @@ def is_set(pix, x, y):
     False means the brightness at the given x and y,
     is Greater Than which means its bright. (Grey Scale)
     """
-    avg = img_avg(pix, x,y)
-    return (avg < INTENSITY)
+    avg = img_avg(pix, x, y)
+    return avg < INTENSITY
+
+
+def is_green(pix, x, y):
+    """
+    Is this pixel Green?
+    """
+    (red, green, blue, _) = img_point(pix, x, y)
+    # Find the difference between green and the other values.
+    other = red
+    if blue > other:
+        other = blue
+    diff = green - other
+    return diff > GREEN_DIFF
+
 
 def scan_img(pix, size):
     """
@@ -113,13 +147,15 @@ def scan_img(pix, size):
     returns start x, y and end x, y and total number of dark pixels.
     """
     total = 0
-    sx = size[0]; ex = 0
-    sy = size[1]; ey = 0
-
-    for y in range( 0,size[1] ):
-        for x in range( 0,size[0] ):
-            pnt_is = is_set(pix, x,y)
-            if (pnt_is):
+    sx = size[0]
+    ex = 0
+    sy = size[1]
+    ey = 0
+
+    for y in range(0, size[1]):
+        for x in range(0, size[0]):
+            pnt_is = is_set(pix, x, y)
+            if pnt_is:
                 total += 1
                 if x < sx:
                     sx = x
@@ -130,7 +166,7 @@ def scan_img(pix, size):
                 if y > ey:
                     ey = y
 
-    #print (sx,ex,sy,ey)
+    # print (sx,ex,sy,ey)
     # give us a little border to work with
     if sx > 0:
         sx -= 1
@@ -142,8 +178,9 @@ def scan_img(pix, size):
     if ey < size[1]:
         ey += 1
 
-    #print (sx,ex,sy,ey)
-    return(sx,sy,ex,ey,total)
+    # print (sx,ex,sy,ey)
+    return (sx, sy, ex, ey, total)
+
 
 def output_image(pix, size):
     """
@@ -155,12 +192,15 @@ def output_image(pix, size):
     the dark and light pixels of the image. (Grey Scale)
     """
     result = []
-    ex = size[0]; sx = 0
-    ey = size[1]; sy = 0
-    for y in range(sy,ey):
-        s = ''
-        for x in range(sx,ex):
-            if is_set(pix, x,y):
+    ex = size[0]
+    sx = 0
+    ey = size[1]
+    sy = 0
+    for y in range(sy, ey):
+        s = ""
+        for x in range(sx, ex):
+            # if is_set(pix, x, y):
+            if not is_green(pix, x, y):
                 s += ON
             else:
                 s += OFF
@@ -168,6 +208,7 @@ def output_image(pix, size):
 
     return result
 
+
 def run(difficult):
     """
     run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
@@ -178,10 +219,10 @@ def run(difficult):
     then from that we output the image line by line onto the screen after it has been output_image d into list form,
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     """
-    
+
     for x in range(0, 70):
-        fname = f'in/{difficult}_{x}.png'
-    
+        fname = f"in/{difficult}_{x}.png"
+
         if not os.path.exists(fname):
             print("Could not find '{0}'".format(fname))
             continue
@@ -207,45 +248,47 @@ def run(difficult):
         img_s = output_image(pix, size)
         for l in img_s:
             print(l)
-        word = input('Word: ')
-        with open(f'{DIR}/{difficult}_{x}.txt', 'w') as f:
-            f.write('{0}\n'.format(word))
+        word = input("Word: ")
+        with open(f"{DIR}/{difficult}_{x}.txt", "w") as f:
+            f.write("{0}\n".format(word))
 
         print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
         # os.remove(f'{fname}')  # Grr No bad bean, keep file for error checking
         # print(f"File '{fname}' automatically removed")
 
+
 # Now to call all the previous functions
-if (DOWNLOAD == True):
+if DOWNLOAD == True:
     print("Downloading s0urce.io Words")
     print("EASY")
     # time.sleep(5)
     for e in range(0, 62):
-        download('e', e)
+        download("e", e)
         # time.sleep(random.randint(10, 15))
 
     print("MEDIUM")
     # time.sleep(5)
     for m in range(0, 66):
-        download('m', m)
+        download("m", m)
         # time.sleep(random.randint(10, 15))
 
     print("HARD")
     # time.sleep(5)
     for h in range(0, 55):
-        download('h', h)
+        download("h", h)
         # time.sleep(random.randint(10, 15))
 
+if args.train:
     # Img Processing
-    run('e') # Answer the questions
-    run('m')
-    run('h')
+    run("e")  # Answer the questions
+    run("m")
+    run("h")
 
 # ----------------------------------------------------------------------------------------
 # All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
 # Do we really need to worry about all this right now? (I think we have enough bugs to begin with.)
-JSONME = 'false' # Do not execute
-if (JSONME.lower() != 'false'):
+JSONME = "false"  # Do not execute
+if JSONME.lower() != "false":
     print("Now exporting to JSON")
     print(f"Targeting file: '{JSONME}'")
     time.sleep(5)
@@ -255,10 +298,10 @@ if (JSONME.lower() != 'false'):
         """
         given the filename, we read it and add it to a list and return the list.
         """
-        fname = f'{DIR}/{t}.txt'
+        fname = f"{DIR}/{t}.txt"
         r = []
         try:
-            with open(fname, 'r') as f:
+            with open(fname, "r") as f:
                 for l in f:
                     r.append(l.strip())
             return r
@@ -275,37 +318,37 @@ if (JSONME.lower() != 'false'):
         In between the T and t will be replaced with the item.
         """
         item = json.dumps(item)
-        item = f'{item},'
+        item = f"{item},"
         r = []
         try:
-            with open(f'{JSONME}', 'r') as f:
+            with open(f"{JSONME}", "r") as f:
                 for l in f:
-                    if l != '':
-                        r.append(l.strip('\n'))
+                    if l != "":
+                        r.append(l.strip("\n"))
                     else:
-                        r.append('')
+                        r.append("")
         except FileNotFoundError:
             print(f"File {JSONME} Not Found!")
             sys.exit()
-        
+
         c = 0
         for e in r:
-            if('// T' == e):
-                temp = r[c+1]
-                del r[c+1]
-                r.insert(c+1, item)
-                r.insert(c+2, temp)
-            elif('// t' == e):
+            if "// T" == e:
+                temp = r[c + 1]
+                del r[c + 1]
+                r.insert(c + 1, item)
+                r.insert(c + 2, temp)
+            elif "// t" == e:
                 break
             c += 1
-        
-        with open(f'{JSONME}', 'w') as f:
+
+        with open(f"{JSONME}", "w") as f:
             for e in r:
-                f.write(f'{e}\n')
+                f.write(f"{e}\n")
 
     for x in range(0, 183):
         te = test(x)
-        if(te != None):
+        if te != None:
             word = te
             insertJS(word)