123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563 |
- #!/usr/bin/env python3
- from PIL import Image
- from pprint import pprint
- import sys
- import time
- import os
- import requests
- import random
- import json
- import argparse
- import subprocess
- import imager
- parser = argparse.ArgumentParser(description="S0urce.io utility program.")
- parser.add_argument("--download", help="Download Images", action="store_true")
- parser.add_argument("--train", help="Convert Images to Text", action="store_true")
- parser.add_argument("--quick", help="Quick convert Images to Text", action="store_true")
- parser.add_argument("--update", help="Update s0urce.js script", action="store_true")
- parser.add_argument("JSON", type=str, nargs="?", help="Filename to save results", default="test.js")
- args = parser.parse_args()
- # pprint(args)
- # If no option, display help and exit
- if ( not args.download and not args.train and not args.quick and not args.update ):
- parser.print_help()
- sys.exit()
- # httpbin.org/headers
- sess = requests.Session()
- head = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
- }
- sess.headers.update(head)
- ON = "X" # Dark pixel in an image
- OFF = "." # Light pixel in an image
- DIR = (
- "data"
- ) # Data directory name, do we really need this? Is it really going to change?
- INTENSITY = (
- 75
- ) # How bright does something have to be to trigger it being a dark or light pixel?
- # Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.
- GREEN_DIFF = 10
- # How much brighter the green channel must be (compared to the others),
- # to be called green.
- VALID_WORDS = {
- "constructor", "info", "anon", "send", "com", "root", "port", "val",
- "add", "ghost", "net", "http", "status", "syscall", "part", "delete",
- "datatype", "loadbytes", "setping", "size", "system", "setstats",
- "join", "socket", "signal", "dir", "accountname", "decryptfile",
- "intel", "xml", "connect", "sizeof", "writefile", "call", "reset",
- "global", "user", "remove", "count", "set", "loop", "num",
- "client", "file", "channel", "right", "stat", "emit", "handle",
- "buffer", "mysql", "write", "type", "list", "temp", "getfile",
- "thread", "decrypt", "poly", "setcookie", "domain", "length",
- "gridwidth", "upload", "get", "generatecodepack", "data",
- "process", "download", "proxy", "fillgrid", "bit", "encryptfile",
- "host", "ping", "event", "url", "load", "key", "changepassword",
- "bufferpingset", "getfirewallchannel", "getinfo", "getping", "pass",
- "newserver", "username", "generate", "userport", "init", "net",
- "left", "point", "cookies", "protocol", "responder", "getkey",
- "hostserver", "eventtype", "gridheight", "server", "setport",
- "getpass", "loadloggedpassword", "destroybatch", "getxmlprotocol",
- "channelsetpackage", "batchallfiles", "module", "response",
- "serverproxy", "filetype", "urlcheck", "config", "number",
- "ghostfilesystem", "disconnectserver", "emitconfiglist",
- "dodecahedron", "eventlistdir", "systemportkey", "setnewproxy",
- "createnewsocket", "changeusername", "tempdatapass", "blockthreat",
- "statusofprocess", "patcheventlog", "newline", "dir", "bytes",
- "findpackage", "package", "encode", "joinnetworkclient",
- "rootcookieset", "callmodule", "sizeofhexagon", "createfilethread",
- "includedirectory", "loadregisterlist", "encryptunpackedbatch",
- "getpartoffile", "getdatapassword", "create2axisvector",
- "create3axisvector", "disconnectchannel", "setnewid", "hexagon",
- "account", "removenewcookie", "getid", "encodenewfolder",
- "sendintelpass", "getlog", "command", "threat", "userid",
- "wordcounter", "removeoldcookie", "hostnewserver", "disconnect",
- "listconfig", "newhost", "createnewpackage", "loadaltevent", "log",
- "filedir", "fileexpresslog", "decryptdatabatch", "mergesocket",
- "unpacktmpfile", "uploaduserstats", "getmysqldomain",
- "checkhttptype", "encrypt", "vector", "httpbuffersize",
- "systemgridtype", "password", "respondertimeout", "deleteallids",
- "exportconfigpackage", "export"
- }
- # Check the environment, do we have all that we need?
- if not os.path.exists("images"):
- os.mkdir("images")
- if not os.path.exists("data"):
- os.mkdir("data")
- if not os.path.exists("words.txt"):
- with open('words.txt', 'w') as f: # Create a empty file
- f.write('')
- def image_filename(difficulty, index):
- return f"images/{difficulty}_{index}.png"
- def cleaned_filename(difficulty, index):
- return f"images/{difficulty}_{index}_clean.png"
- def cleaner_filename(difficulty, index):
- return f"images/{difficulty}_{index}_cleaner.png"
- def download(howhard, index):
- global sess
- """
- Download an image based upon how hard it is.
- On success, it saves the image file.
- Failure raises ConnectionError.
- Don't leave stale cleaned images around.
- """
- r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
- if r.status_code == 200:
- # DRY
- with open( image_filename(howhard, index), "wb") as f:
- f.write(r.content)
- # cleaned images? we need to delete & regenerate those.
- cleaned = cleaned_filename(howhard, index)
- if os.path.exists(cleaned):
- os.remove(cleaned)
- else:
- # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
- # print( f'{howhard}_{index}.png ' + str(r.status_code) )
- raise ConnectionError(
- "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
- howhard, index, r.status_code
- )
- )
- def run(difficult, index):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- # print("Could not find '{0}'".format(fname))
- # continue
- # We've reached the end, so stop looking. :P
- break
- print(f"Loading: {fname}")
- im = Image.open(fname)
- pix = im.load()
- size = im.size
- print(f"Size: {size[0]} x {size[1]}")
- pal = im.getpalette()
- sx = 0
- ex = size[0]
- sy = 0
- ey = size[1]
- total = 0
- sx, sy, ex, ey, total = scan_img(pix, size)
- print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")
- img_s = output_image(pix, size)
- for l in img_s:
- print(l)
- word = input("Word: ")
- # Returns word so it can be stored in dictonary
- return word
- #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
- # os.remove(f'{fname}') # Grr No bad bean, keep file for error checking
- # print(f"File '{fname}' automatically removed")
- key_word = {}
- misery = {}
- def quicktrain(difficult):
- """
- Quickly convert the images to text based upon filesize and image size.
- """
- quick = {
- "100_24_7874": "client",
- "100_24_7897": "status",
- "100_24_7929": "vector",
- "100_24_7937": "encode",
- "101_24_7761": "getkey",
- "101_24_7906": "server",
- "101_24_7922": "module",
- "101_24_7968": "socket",
- "101_24_7984": "config",
- "102_24_7983": "export",
- "102_24_7987": "number",
- "102_24_7997": "buffer",
- "102_24_8000": "getlog",
- "102_24_8010": "length",
- "102_24_8019": "global",
- "102_24_8075": "delete",
- "102_24_8087": "domain",
- "103_24_8085": "remove",
- "103_24_8099": "upload",
- "103_24_8102": "sizeof",
- "103_24_8122": "system",
- "103_24_8139": "threat",
- "103_24_8159": "userid",
- "104_24_8146": "thread",
- "114_24_9012": "getpass",
- "115_24_8980": "filedir",
- "115_24_9063": "account",
- "115_24_9113": "cookies",
- "116_24_9036": "newline",
- "116_24_9072": "getfile",
- "116_24_9089": "newhost",
- "116_24_9090": "process",
- "116_24_9122": "channel",
- "116_24_9136": "connect",
- "117_24_9101": "setping",
- "117_24_9168": "encrypt",
- "117_24_9176": "decrypt",
- "117_24_9182": "setport",
- "117_24_9248": "package",
- "117_24_9297": "hexagon",
- "118_24_9280": "getinfo",
- "118_24_9310": "getping",
- "119_24_9300": "syscall",
- "119_24_9321": "command",
- "131_24_10113": "generate",
- "131_24_10275": "userport",
- "132_24_10329": "download",
- "132_24_10342": "datatype",
- "132_24_10356": "username",
- "132_24_10379": "filetype",
- "132_24_10395": "protocol",
- "132_24_10404": "urlcheck",
- "133_24_10292": "response",
- "134_24_10420": "setstats",
- "134_24_10531": "setnewid",
- "134_24_10557": "password",
- "136_24_10707": "fillgrid",
- "145_24_11396": "loadbytes",
- "146_24_11393": "writefile",
- "147_24_11595": "setcookie",
- "148_24_11374": "eventtype",
- "148_24_11610": "newserver",
- "148_24_11626": "responder",
- "149_24_11614": "gridwidth",
- "161_24_12648": "hostserver",
- "163_24_12693": "listconfig",
- "163_24_12750": "callmodule",
- "164_24_12800": "disconnect",
- "164_24_12835": "gridheight",
- "176_24_13510": "mergesocket",
- "177_24_13789": "wordcounter",
- "177_24_13819": "accountname",
- "177_24_13824": "encryptfile",
- "177_24_13855": "serverproxy",
- "177_24_13871": "decryptfile",
- "177_24_13873": "constructor",
- "178_24_13825": "findpackage",
- "179_24_13843": "blockthreat",
- "179_24_14001": "setnewproxy",
- "192_24_14660": "dodecahedron",
- "192_24_15084": "destroybatch",
- "193_24_14925": "tempdatapass",
- "193_24_14979": "eventlistdir",
- "194_24_15129": "deleteallids",
- "195_24_15252": "loadaltevent",
- "207_24_16020": "batchallfiles",
- "207_24_16074": "sendintelpass",
- "208_24_16209": "getpartoffile",
- "208_24_16274": "unpacktmpfile",
- "208_24_16287": "hostnewserver",
- "208_24_16317": "systemportkey",
- "209_24_15651": "rootcookieset",
- "209_24_16185": "bufferpingset",
- "209_24_16243": "sizeofhexagon",
- "209_24_16338": "patcheventlog",
- "209_24_16357": "checkhttptype",
- "223_24_17412": "changeusername",
- "223_24_17418": "systemgridtype",
- "224_24_17265": "fileexpresslog",
- "224_24_17287": "getmysqldomain",
- "224_24_17484": "getxmlprotocol",
- "224_24_17487": "httpbuffersize",
- "224_24_17506": "emitconfiglist",
- "225_24_17005": "changepassword",
- "237_24_18534": "uploaduserstats",
- "238_24_18712": "encodenewfolder",
- "239_24_18670": "ghostfilesystem",
- "239_24_18700": "getdatapassword",
- "239_24_18710": "statusofprocess",
- "239_24_18713": "removeoldcookie",
- "239_24_18744": "removenewcookie",
- "241_24_18811": "createnewsocket",
- "253_24_19681": "generatecodepack",
- "254_24_19585": "createnewpackage",
- "254_24_19791": "disconnectserver",
- "254_24_19901": "decryptdatabatch",
- "255_24_19874": "includedirectory",
- "255_24_19938": "loadregisterlist",
- "256_24_19399": "createfilethread",
- "256_24_19577": "respondertimeout",
- "268_24_20945": "channelsetpackage",
- "268_24_20953": "disconnectchannel",
- "269_24_20857": "create2axisvector",
- "270_24_21037": "create3axisvector",
- "271_24_21016": "joinnetworkclient",
- "285_24_22237": "getfirewallchannel",
- "288_24_22241": "loadloggedpassword",
- "300_24_23305": "exportconfigpackage",
- "314_24_24079": "encryptunpackedbatch",
- "52_24_4127": "xml",
- "52_24_4170": "val",
- "53_24_4188": "url",
- "54_24_4316": "net",
- "54_24_4350": "key",
- "55_24_4381": "log",
- "55_24_4393": "set",
- "55_24_4417": "dir",
- "56_24_4487": "get",
- "57_24_4504": "num",
- "57_24_4524": "com",
- "57_24_4553": "bit",
- "57_24_4593": "add",
- "60_24_4770": "add",
- "68_24_5431": "http",
- "69_24_5525": "pass",
- "70_24_5392": "temp",
- "70_24_5444": "type",
- "70_24_5469": "list",
- "70_24_5529": "file",
- "70_24_5545": "loop",
- "70_24_5579": "ping",
- "70_24_5591": "port",
- "71_24_5544": "left",
- "71_24_5572": "size",
- "71_24_5610": "call",
- "71_24_5612": "root",
- "71_24_5632": "part",
- "71_24_5644": "init",
- "71_24_5649": "host",
- "71_24_5656": "poly",
- "71_24_5667": "info",
- "71_24_5748": "user",
- "72_24_5559": "join",
- "72_24_5699": "anon",
- "72_24_5753": "data",
- "73_24_5678": "stat",
- "74_24_5861": "send",
- "75_24_6004": "load",
- "83_24_6579": "proxy",
- "84_24_6417": "event",
- "85_24_6661": "intel",
- "85_24_6693": "right",
- "85_24_6735": "bytes",
- "86_24_6678": "mysql",
- "86_24_6762": "write",
- "87_24_6800": "ghost",
- "87_24_6911": "count",
- "88_24_6908": "reset",
- "88_24_6946": "point",
- "88_24_7003": "getid",
- "99_24_7741": "signal",
- "99_24_7796": "handle"
- }
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- break
- # Output the image
- im = Image.open(fname)
- # imager.output_image(im)
- size = im.size
- filesize = os.path.getsize(fname)
-
- k = "{0}_{1}_{2}".format(*size, os.path.getsize(fname))
- if k in quick:
- correct = quick[k]
- else:
- imager.output_image(im)
- print("well, shit.")
- sys.exit(5)
- print(fname, correct)
- key_word[f'{difficult}_{x}'] = correct
- # misery[correct] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }
- misery[correct] = "{0}_{1}_{2}".format(*size, os.path.getsize(fname))
- def autotrain(difficult):
- """
- run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
- those components do the following... (Each category has around 70 items so we standardize on 70, but )
- (not all of the categories have 70 and thus we print a File does not exist)
- We open and load the image, and get it's size,
- then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
- then from that we output the image line by line onto the screen after it has been output_image d into list form,
- Where we ask the user what the word is, and after that we save all that to a file in the data directory.
- """
- for x in range(0, 70):
- fname = image_filename(difficult, x)
- if not os.path.exists(fname):
- break
- # print("Could not find '{0}'".format(fname))
- # continue
- cleaned = cleaned_filename(difficult, x)
- if not os.path.exists(cleaned):
- imager.image_cleaner(fname, cleaned)
- print(f"Loading: {cleaned} ", end='')
- fileout = "data/{0}_{1}".format(difficult, x)
- output = subprocess.run(
- ["tesseract", cleaned, fileout],
- stderr=subprocess.DEVNULL,
- # capture_output=False,
- shell=False,
- )
- with open(fileout + ".txt", "r") as fp:
- word = fp.read().strip().lower()
- if (word != '') and (word in VALID_WORDS):
- key_word[f'{difficult}_{x}'] = word
- im = Image.open(fname)
- # imager.output_image(im)
- size = im.size
- with open('words.txt', 'a') as f:
- f.write(f'{difficult}_{x} = {size[0]} x {size[1]} is {word}\n')
- print(word)
- if word in misery:
- print("Awwww SHIT! ", word)
- misery["{0}_{1}_{2}".format(*size, os.path.getsize(fname))] = word
- # misery[word] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }
- else:
- print("UNKNOWN", word)
- # Output the image
- im = Image.open(fname)
- imager.output_image(im)
- size = im.size
- # img_s = output_image(pix, size)
- # for l in img_s:
- # print(l)
- correct = input("Word: ")
- key_word[f'{difficult}_{x}'] = correct
- # misery[correct] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }
- if correct in misery:
- print("Awwww SHIT! ", correct)
-
- misery["{0}_{1}_{2}".format(*size, os.path.getsize(fname))] = correct
- with open('words.txt', 'a') as f:
- f.write(f'{difficult}_{x} = {size[0]} x {size[1]} is {correct}\n')
- with open('misery.json', 'w') as fp:
- json.dump(misery, fp, indent=2, sort_keys=True)
- # Now to call all the previous functions
- if args.download:
- print("Downloading s0urce.io Words")
- # smaller is better, and cleaner.
- tofetch = { 'e': 62, 'm': 66, 'h': 55 }
- for d, max in tofetch.items():
- print(d.upper())
- for i in range(0, max):
- download(d, i)
- # time.sleep(random.randint(10, 15))
- if args.train:
- # Img Processing: Run thru every single category and every single word
- for level in ["e", "m", "h"]:
- autotrain(level)
- with open(args.JSON, 'w') as fp:
- json.dump(key_word, fp, sort_keys=True, indent=2)
- if args.quick:
- # Img Processing: Run thru every single category and every single word
- for level in ["e", "m", "h"]:
- quicktrain(level)
- with open(args.JSON, 'w') as fp:
- json.dump(key_word, fp, sort_keys=True, indent=2)
- if args.update:
- with open(args.JSON, 'r') as fp:
- key_word = json.load(fp)
- # update the s0urce.js script
- filename = 's0urce.user.js'
- with open(filename, 'r') as fp:
- lines = fp.readlines()
- # Lines are now in memory. Time to update!
- for i in range(0, len(lines)):
- if 'http://s0urce.io/client/img/word/' in lines[i]:
- # This is a target line, so:
- l = lines[i].strip().strip(':').strip('"')
- # gets parts of the path
- parts = l.split('/')
- # get difficulty and index
- dif = parts[-2]
- index = parts[-1]
- # build the key -- get the word
- key = f'{dif}_{index}'
- # pprint(parts)
- # pprint(key)
- word = key_word[key]
- print("{0} : {1}".format( key, word))
- lines[i+1] = f' form.value = "{word}";' + "\n" # break;\n" # You may need it... or may not.
- if '@version' in lines[i]:
- # Get version number
- l = lines[i]
- lis = list(l)
- # Get values and increment decimal
- who = int(lis[-4]) # Whole
- dec = int(lis[-2]) # Decimal
- dec += 1
- if (dec >= 10):
- who += 1
- dec = 0
- # Update the list so it displays the new values
- lis[-4] = str(who)
- lis[-2] = str(dec)
- # Write that all out again
- rest = ''
- for e in lis:
- rest += e
- lines[i] = rest
- with open(filename, 'w') as fp:
- for line in lines:
- fp.write(line)
- output = subprocess.run(
- ["scp", filename, "linode:/usr/share/nginx/bugz"],
- stderr=subprocess.DEVNULL,
- # capture_output=False,
- shell=False,
- )
- print("Please visit https://bugz.red-green.com/s0urce.user.js !")
- # Regardless what we did let the user know we at least ran and we are now done
- print("Complete")
|