123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437 |
- #!/usr/bin/env python3
- from PIL import Image
- from pprint import pprint
- import sys
- import time
- import os
- import requests
- import random
- import json
- import argparse
- import subprocess
- import imager
- parser = argparse.ArgumentParser(description="S0urce.io utility program.")
- parser.add_argument("--download", help="Download Images", action="store_true")
- parser.add_argument("--train", help="Convert Images to Text", action="store_true")
- parser.add_argument("--update", help="Update s0urce.js script", action="store_true")
- parser.add_argument(
- "JSON", type=str, nargs="?", help="Filename to save results", default="test.js"
- )
- args = parser.parse_args()
- # pprint(args)
- # Should we add the JSON in a file? (True is filename, False = do not do)
- # JSONME = 'test.js'
- JSONME = args.JSON
- # NOTE: To begin the insert of the JSONIFIED image and word its
- # // T
- # A JS comment with a uppercase T
- # To stop its
- # // t
- # A JS comment with a lowercase t
- # httpbin.org/headers
- sess = requests.Session()
- head = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
- }
- sess.headers.update(head)
- ON = "X" # Dark pixel in an image
- OFF = "." # Light pixel in an image
- DIR = (
- "data"
- ) # Data directory name, do we really need this? Is it really going to change?
- INTENSITY = (
- 75
- ) # How bright does something have to be to trigger it being a dark or light pixel?
- # Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.
- GREEN_DIFF = 10
- # How much brighter the green channel must be (compared to the others),
- # to be called green.
- VALID_WORDS = {
- "constructor", "info", "anon", "send", "com", "root", "port", "val",
- "add", "ghost", "net", "http", "status", "syscall", "part", "delete",
- "datatype", "loadbytes", "setping", "size", "system", "setstats",
- "join", "socket", "signal", "dir", "accountname", "decryptfile",
- "intel", "xml", "connect", "sizeof", "writefile", "call", "reset",
- "global", "user", "add", "remove", "count", "set", "loop", "num",
- "client", "file", "channel", "right", "stat", "emit", "handle",
- "buffer", "mysql", "write", "type", "list", "temp", "getfile",
- "thread", "decrypt", "poly", "setcookie", "domain", "length",
- "gridwidth", "upload", "get", "generatecodepack", "data",
- "process", "download", "proxy", "fillgrid", "bit", "encryptfile",
- "host", "ping", "event", "url", "load", "key", "changepassword",
- "bufferpingset", "getfirewallchannel", "getinfo", "getping", "pass",
- "newserver", "username", "generate", "userport", "init", "net",
- "left", "point", "cookies", "protocol", "responder", "getkey",
- "hostserver", "eventtype", "gridheight", "server", "setport",
- "getpass", "loadloggedpassword", "destroybatch", "getxmlprotocol",
- "channelsetpackage", "batchallfiles", "module", "response",
- "serverproxy", "filetype", "urlcheck", "config", "number",
- "ghostfilesystem", "disconnectserver", "emitconfiglist",
- "dodecahedron", "eventlistdir", "systemportkey", "setnewproxy",
- "createnewsocket", "changeusername", "tempdatapass", "blockthreat",
- "statusofprocess", "patcheventlog", "newline", "dir", "bytes",
- "findpackage", "package", "encode", "joinnetworkclient",
- "rootcookieset", "callmodule", "sizeofhexagon", "createfilethread",
- "includedirectory", "loadregisterlist", "encryptunpackedbatch",
- "getpartoffile", "getdatapassword", "create2axisvector",
- "create3axisvector", "disconnectchannel", "setnewid", "hexagon",
- "account", "removenewcookie", "getid", "encodenewfolder",
- "sendintelpass", "getlog", "command", "threat", "userid",
- "wordcounter", "removeoldcookie", "hostnewserver", "disconnect",
- "listconfig", "newhost", "createnewpackage", "loadaltevent", "log",
- "filedir", "fileexpresslog", "decryptdatabatch", "mergesocket",
- "unpacktmpfile", "uploaduserstats", "getmysqldomain",
- "checkhttptype", "encrypt", "vector", "httpbuffersize",
- "systemgridtype", "password", "respondertimeout", "deleteallids",
- "exportconfigpackage", "export"
- }
- # Check the environment, do we have all that we need?
- if not os.path.exists("images"):
- os.mkdir("images")
- if not os.path.exists("data"):
- os.mkdir("data")
- if not os.path.exists("words.yml"):
- with open('words.yml', 'w') as f: # Create a empty yaml file
- f.write('')
- def image_filename(difficulty, index):
- return f"images/{difficulty}_{index}.png"
- def cleaned_filename(difficulty, index):
- return f"images/{difficulty}_{index}_clean.png"
- def cleaner_filename(difficulty, index):
- return f"images/{difficulty}_{index}_cleaner.png"
- def download(howhard, index):
- global sess
- """
- Download an image based upon how hard it is.
- On success, it saves the image file.
- Failure raises ConnectionError.
- Don't leave stale cleaned images around.
- """
- r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
- if r.status_code == 200:
- # DRY
- with open( image_filename(howhard, index), "wb") as f:
- f.write(r.content)
- # cleaned images? we need to delete & regenerate those.
- cleaned = cleaned_filename(howhard, index)
- if os.path.exists(cleaned):
- os.remove(cleaned)
- else:
- # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
- # print( f'{howhard}_{index}.png ' + str(r.status_code) )
- raise ConnectionError(
- "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
- howhard, index, r.status_code
- )
- )
- def img_point(pix, x, y):
- """
- img_point, returns a pixel of an image,
- given the x and y on the image.
- """
- return pix[x, y]
- def img_avg(pix, x, y):
- """
- img_avg, returns the average brightness 0-255,
- given pixel, and the x and y on the image calls img_point,
- to get the individual rgb values to calculate,
- brightness. (Grey scale)
- """
- rgb = img_point(pix, x, y)
- # if(im.mode == 'P'):
- # rgb = pal[rgb*3:(rgb+1)*3]
- # if(im.mode == 'I'):
- # return rgb >> 8
- return int((rgb[0] + rgb[1] + rgb[2]) / 3)
- def is_set(pix, x, y):
- global INTENSITY
- """
- is_set, returns True or False of calculating,
- the brightness of the given point on a image,
- compared to given intensity.
-
- True means the brightness at the given x and y,
- is Less Than which means its dark.
-
- False means the brightness at the given x and y,
- is Greater Than which means its bright. (Grey Scale)
- """
- avg = img_avg(pix, x, y)
- return avg < INTENSITY
- def is_green(pix, x, y):
- """
- Is this pixel Green?
- """
- (red, green, blue, _) = img_point(pix, x, y)
- # Find the difference between green and the other values.
- other = red
- if blue > other:
- other = blue
- diff = green - other
- return diff > GREEN_DIFF
- def scan_img(pix, size):
- """
- scan_img, looks at a image and looks for dark pixels,
- if it is a dark pixel record the number and resize the,
- returned values to show where the most dark pixels on the,
- image are located. (Grey Scale)
-
- given pixel, and image size.
- returns start x, y and end x, y and total number of dark pixels.
- """
- total = 0
- sx = size[0]
- ex = 0
- sy = size[1]
- ey = 0
- for y in range(0, size[1]):
- for x in range(0, size[0]):
- pnt_is = is_set(pix, x, y)
- if pnt_is:
- total += 1
- if x < sx:
- sx = x
- if x > ex:
- ex = x
- if y < sy:
- sy = y
- if y > ey:
- ey = y
- # print (sx,ex,sy,ey)
- # give us a little border to work with
- if sx > 0:
- sx -= 1
- if ex < size[0]:
- ex += 1
- if sy > 0:
- sy -= 1
- if ey < size[1]:
- ey += 1
- # print (sx,ex,sy,ey)
- return (sx, sy, ex, ey, total)
- def output_image(pix, size):
- """
- For the size of the area we have reduced down to where the majority of dark pixels,
- are located, store all that into a list and return the list.
-
- given pixel for function passing.
- returns multiple strings in a list that are edited to use characters to represent,
- the dark and light pixels of the image. (Grey Scale)
- """
- result = []
- ex = size[0]
- sx = 0
- ey = size[1]
- sy = 0
- for y in range(sy, ey):
- s = ""
- for x in range(sx, ex):
- # if is_set(pix, x, y):
- if not is_green(pix, x, y):
- s += ON
- else:
- s += OFF
- result.append(s)
- return result
- def run(difficult, index):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- # print("Could not find '{0}'".format(fname))
- # continue
- # We've reached the end, so stop looking. :P
- break
- print(f"Loading: {fname}")
- im = Image.open(fname)
- pix = im.load()
- size = im.size
- print(f"Size: {size[0]} x {size[1]}")
- pal = im.getpalette()
- sx = 0
- ex = size[0]
- sy = 0
- ey = size[1]
- total = 0
- sx, sy, ex, ey, total = scan_img(pix, size)
- print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
- img_s = output_image(pix, size)
- for l in img_s:
- print(l)
- word = input("Word: ")
- # Returns word so it can be stored in dictonary
- return word
- #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
- # os.remove(f'{fname}') # Grr No bad bean, keep file for error checking
- # print(f"File '{fname}' automatically removed")
- key_word = {}
- def autotrain(difficult):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- break
- # print("Could not find '{0}'".format(fname))
- # continue
- cleaned = cleaned_filename(difficult, x)
- if not os.path.exists(cleaned):
- imager.image_cleaner(fname, cleaned)
- print(f"Loading: {cleaned} ", end='')
- fileout = "data/{0}_{1}".format(difficult, x)
- output = subprocess.run(
- ["tesseract", cleaned, fileout],
- stderr=subprocess.DEVNULL,
- # capture_output=False,
- shell=False,
- )
- with open(fileout + ".txt", "r") as fp:
- word = fp.read().strip().lower()
- if (word != '') and (word in VALID_WORDS):
- key_word[f'{difficult}_{x}'] = word
- print(word)
- else:
- print("UNKNOWN", word)
- # Output the image
- im = Image.open(fname)
- imager.output_image(im)
- # pix = im.load()
- # size = im.size
- # img_s = output_image(pix, size)
- # for l in img_s:
- # print(l)
- key_word[f'{difficult}_{x}'] = input("Word: ")
- # Now to call all the previous functions
- if args.download:
- print("Downloading s0urce.io Words")
- # smaller is better, and cleaner.
- tofetch = { 'e': 62, 'm': 66, 'h': 55 }
- for d, max in tofetch.items():
- print(d.upper())
- for i in range(0, max):
- download(d, i)
- # time.sleep(random.randint(10, 15))
- if args.train:
- # Img Processing: Run thru every single category and every single word
- for level in ["e", "m", "h"]:
- autotrain(level)
- with open(args.JSON, 'w') as fp:
- json.dump(key_word, fp, sort_keys=True, indent=2)
- if args.update:
- with open(args.JSON, 'r') as fp:
- key_word = json.load(fp)
- # update the s0urce.js script
- filename = 's0urce.user.js'
- with open(filename, 'r') as fp:
- lines = fp.readlines()
- # Lines are now in memory. Time to update!
- for i in range(0, len(lines)):
- if 'http://s0urce.io/client/img/word/' in lines[i]:
- # This is a target line, so:
- l = lines[i].strip().strip(':').strip('"')
- # gets parts of the path
- parts = l.split('/')
- # get difficulty and index
- dif = parts[-2]
- index = parts[-1]
- # build the key -- get the word
- key = f'{dif}_{index}'
- # pprint(parts)
- # pprint(key)
- word = key_word[key]
- print("{0} : {1}".format( key, word))
- lines[i+1] = f' form.value = "{word}";' + "\n" # break;\n" # You may need it... or may not.
- with open(filename, 'w') as fp:
- for line in lines:
- fp.write(line)
- output = subprocess.run(
- ["scp", filename, "linode:/usr/share/nginx/bugz"],
- stderr=subprocess.DEVNULL,
- # capture_output=False,
- shell=False,
- )
- print("Please visit https://bugz.red-green.com/s0urce.user.js !")
- # Regardless what we did let the user know we at least ran and we are now done
- print("Complete")
|