david
/
s0urceBOT


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
							#!/usr/bin/env python3

from PIL import Image
from pprint import pprint

import sys
import time
import os
import requests
import random
import json
import argparse
import subprocess

import imager

parser = argparse.ArgumentParser(description="S0urce.io utility program.")
parser.add_argument("--download", help="Download Images", action="store_true")
parser.add_argument("--train", help="Convert Images to Text", action="store_true")
parser.add_argument("--quick", help="Quick convert Images to Text", action="store_true")
parser.add_argument("--update", help="Update s0urce.js script", action="store_true")
parser.add_argument("JSON", type=str, nargs="?", help="Filename to save results", default="test.js")
args = parser.parse_args()
# pprint(args)

# If no option, display help and exit
if ( not args.download and not args.train and not args.quick and not args.update ):
    parser.print_help()
    sys.exit()

# httpbin.org/headers
sess = requests.Session()
head = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
}
sess.headers.update(head)

ON = "X"  # Dark pixel in an image
OFF = "."  # Light pixel in an image
DIR = (
    "data"
)  # Data directory name, do we really need this? Is it really going to change?

INTENSITY = (
    75
)  # How bright does something have to be to trigger it being a dark or light pixel?
# Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.

GREEN_DIFF = 10
# How much brighter the green channel must be (compared to the others),
# to be called green.

VALID_WORDS = {
 "constructor", "info", "anon", "send", "com", "root", "port", "val", 
 "add", "ghost", "net", "http", "status", "syscall", "part", "delete", 
 "datatype", "loadbytes", "setping", "size", "system", "setstats", 
 "join", "socket", "signal", "dir", "accountname", "decryptfile", 
 "intel", "xml", "connect", "sizeof", "writefile", "call", "reset", 
 "global", "user", "remove", "count", "set", "loop", "num", 
 "client", "file", "channel", "right", "stat", "emit", "handle", 
 "buffer", "mysql", "write", "type", "list", "temp", "getfile", 
 "thread", "decrypt", "poly", "setcookie", "domain", "length", 
 "gridwidth", "upload", "get", "generatecodepack", "data", 
 "process", "download", "proxy", "fillgrid", "bit", "encryptfile", 
 "host", "ping", "event", "url", "load", "key", "changepassword", 
 "bufferpingset", "getfirewallchannel", "getinfo", "getping", "pass", 
 "newserver", "username", "generate", "userport", "init", "net", 
 "left", "point", "cookies", "protocol", "responder", "getkey", 
 "hostserver", "eventtype", "gridheight", "server", "setport", 
 "getpass", "loadloggedpassword", "destroybatch", "getxmlprotocol", 
 "channelsetpackage", "batchallfiles", "module", "response", 
 "serverproxy", "filetype", "urlcheck", "config", "number", 
 "ghostfilesystem", "disconnectserver", "emitconfiglist", 
 "dodecahedron", "eventlistdir", "systemportkey", "setnewproxy", 
 "createnewsocket", "changeusername", "tempdatapass", "blockthreat", 
 "statusofprocess", "patcheventlog", "newline", "dir", "bytes", 
 "findpackage", "package", "encode", "joinnetworkclient", 
 "rootcookieset", "callmodule", "sizeofhexagon", "createfilethread", 
 "includedirectory", "loadregisterlist", "encryptunpackedbatch", 
 "getpartoffile", "getdatapassword", "create2axisvector", 
 "create3axisvector", "disconnectchannel", "setnewid", "hexagon", 
 "account", "removenewcookie", "getid", "encodenewfolder", 
 "sendintelpass", "getlog", "command", "threat", "userid", 
 "wordcounter", "removeoldcookie", "hostnewserver", "disconnect", 
 "listconfig", "newhost", "createnewpackage", "loadaltevent", "log", 
 "filedir", "fileexpresslog", "decryptdatabatch", "mergesocket", 
 "unpacktmpfile", "uploaduserstats", "getmysqldomain", 
 "checkhttptype", "encrypt", "vector", "httpbuffersize", 
 "systemgridtype", "password", "respondertimeout", "deleteallids", 
 "exportconfigpackage", "export"
}

# Check the environment, do we have all that we need?
if not os.path.exists("images"):
    os.mkdir("images")

if not os.path.exists("data"):
    os.mkdir("data")

if not os.path.exists("words.txt"):
    with open('words.txt', 'w') as f: # Create a empty file
        f.write('')

def image_filename(difficulty, index):
    return f"images/{difficulty}_{index}.png"       

def cleaned_filename(difficulty, index):
    return f"images/{difficulty}_{index}_clean.png"       

def cleaner_filename(difficulty, index):
    return f"images/{difficulty}_{index}_cleaner.png"       

def download(howhard, index):
    global sess
    """
    Download an image based upon how hard it is.

    On success, it saves the image file.
    Failure raises ConnectionError.
    Don't leave stale cleaned images around.
    """
    r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
    if r.status_code == 200:
        # DRY
        with open( image_filename(howhard, index), "wb") as f:
            f.write(r.content)
        # cleaned images?  we need to delete & regenerate those.
        cleaned = cleaned_filename(howhard, index)
        if os.path.exists(cleaned):
            os.remove(cleaned)
    else:
        # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
        # print( f'{howhard}_{index}.png ' + str(r.status_code) )
        raise ConnectionError(
            "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
                howhard, index, r.status_code
            )
        )

def run(difficult, index):
    """
    run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
    those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
                                                                   (not all of the categories have 70 and thus we print a File does not exist)
    We open and load the image, and get it's size,
    then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
    then from that we output the image line by line onto the screen after it has been output_image d into list form,
    Where we ask the user what the word is, and after that we save all that to a file in the data directory.
    """

    for x in range(0, 70):
        fname = image_filename(difficult, x)

        if not os.path.exists(fname):
            # print("Could not find '{0}'".format(fname))
            # continue
            # We've reached the end, so stop looking.  :P
            break

    print(f"Loading: {fname}")
    im = Image.open(fname)
    pix = im.load()
    size = im.size
    print(f"Size: {size[0]} x {size[1]}")

    pal = im.getpalette()

    sx = 0
    ex = size[0]
    sy = 0
    ey = size[1]
    total = 0

    sx, sy, ex, ey, total = scan_img(pix, size)

    print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")

    img_s = output_image(pix, size)
    for l in img_s:
        print(l)
    word = input("Word: ")
    # Returns word so it can be stored in dictonary
    return word

    #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
    # os.remove(f'{fname}')  # Grr No bad bean, keep file for error checking
    # print(f"File '{fname}' automatically removed")


key_word = {}
misery = {}

def quicktrain(difficult):
    """
    Quickly convert the images to text based upon filesize and image size.
    """

    quick = {
  "100_24_7874": "client",
  "100_24_7897": "status",
  "100_24_7929": "vector",
  "100_24_7937": "encode",
  "101_24_7761": "getkey",
  "101_24_7906": "server",
  "101_24_7922": "module",
  "101_24_7968": "socket",
  "101_24_7984": "config",
  "102_24_7983": "export",
  "102_24_7987": "number",
  "102_24_7997": "buffer",
  "102_24_8000": "getlog",
  "102_24_8010": "length",
  "102_24_8019": "global",
  "102_24_8075": "delete",
  "102_24_8087": "domain",
  "103_24_8085": "remove",
  "103_24_8099": "upload",
  "103_24_8102": "sizeof",
  "103_24_8122": "system",
  "103_24_8139": "threat",
  "103_24_8159": "userid",
  "104_24_8146": "thread",
  "114_24_9012": "getpass",
  "115_24_8980": "filedir",
  "115_24_9063": "account",
  "115_24_9113": "cookies",
  "116_24_9036": "newline",
  "116_24_9072": "getfile",
  "116_24_9089": "newhost",
  "116_24_9090": "process",
  "116_24_9122": "channel",
  "116_24_9136": "connect",
  "117_24_9101": "setping",
  "117_24_9168": "encrypt",
  "117_24_9176": "decrypt",
  "117_24_9182": "setport",
  "117_24_9248": "package",
  "117_24_9297": "hexagon",
  "118_24_9280": "getinfo",
  "118_24_9310": "getping",
  "119_24_9300": "syscall",
  "119_24_9321": "command",
  "131_24_10113": "generate",
  "131_24_10275": "userport",
  "132_24_10329": "download",
  "132_24_10342": "datatype",
  "132_24_10356": "username",
  "132_24_10379": "filetype",
  "132_24_10395": "protocol",
  "132_24_10404": "urlcheck",
  "133_24_10292": "response",
  "134_24_10420": "setstats",
  "134_24_10531": "setnewid",
  "134_24_10557": "password",
  "136_24_10707": "fillgrid",
  "145_24_11396": "loadbytes",
  "146_24_11393": "writefile",
  "147_24_11595": "setcookie",
  "148_24_11374": "eventtype",
  "148_24_11610": "newserver",
  "148_24_11626": "responder",
  "149_24_11614": "gridwidth",
  "161_24_12648": "hostserver",
  "163_24_12693": "listconfig",
  "163_24_12750": "callmodule",
  "164_24_12800": "disconnect",
  "164_24_12835": "gridheight",
  "176_24_13510": "mergesocket",
  "177_24_13789": "wordcounter",
  "177_24_13819": "accountname",
  "177_24_13824": "encryptfile",
  "177_24_13855": "serverproxy",
  "177_24_13871": "decryptfile",
  "177_24_13873": "constructor",
  "178_24_13825": "findpackage",
  "179_24_13843": "blockthreat",
  "179_24_14001": "setnewproxy",
  "192_24_14660": "dodecahedron",
  "192_24_15084": "destroybatch",
  "193_24_14925": "tempdatapass",
  "193_24_14979": "eventlistdir",
  "194_24_15129": "deleteallids",
  "195_24_15252": "loadaltevent",
  "207_24_16020": "batchallfiles",
  "207_24_16074": "sendintelpass",
  "208_24_16209": "getpartoffile",
  "208_24_16274": "unpacktmpfile",
  "208_24_16287": "hostnewserver",
  "208_24_16317": "systemportkey",
  "209_24_15651": "rootcookieset",
  "209_24_16185": "bufferpingset",
  "209_24_16243": "sizeofhexagon",
  "209_24_16338": "patcheventlog",
  "209_24_16357": "checkhttptype",
  "223_24_17412": "changeusername",
  "223_24_17418": "systemgridtype",
  "224_24_17265": "fileexpresslog",
  "224_24_17287": "getmysqldomain",
  "224_24_17484": "getxmlprotocol",
  "224_24_17487": "httpbuffersize",
  "224_24_17506": "emitconfiglist",
  "225_24_17005": "changepassword",
  "237_24_18534": "uploaduserstats",
  "238_24_18712": "encodenewfolder",
  "239_24_18670": "ghostfilesystem",
  "239_24_18700": "getdatapassword",
  "239_24_18710": "statusofprocess",
  "239_24_18713": "removeoldcookie",
  "239_24_18744": "removenewcookie",
  "241_24_18811": "createnewsocket",
  "253_24_19681": "generatecodepack",
  "254_24_19585": "createnewpackage",
  "254_24_19791": "disconnectserver",
  "254_24_19901": "decryptdatabatch",
  "255_24_19874": "includedirectory",
  "255_24_19938": "loadregisterlist",
  "256_24_19399": "createfilethread",
  "256_24_19577": "respondertimeout",
  "268_24_20945": "channelsetpackage",
  "268_24_20953": "disconnectchannel",
  "269_24_20857": "create2axisvector",
  "270_24_21037": "create3axisvector",
  "271_24_21016": "joinnetworkclient",
  "285_24_22237": "getfirewallchannel",
  "288_24_22241": "loadloggedpassword",
  "300_24_23305": "exportconfigpackage",
  "314_24_24079": "encryptunpackedbatch",
  "52_24_4127": "xml",
  "52_24_4170": "val",
  "53_24_4188": "url",
  "54_24_4316": "net",
  "54_24_4350": "key",
  "55_24_4381": "log",
  "55_24_4393": "set",
  "55_24_4417": "dir",
  "56_24_4487": "get",
  "57_24_4504": "num",
  "57_24_4524": "com",
  "57_24_4553": "bit",
  "57_24_4593": "add",
  "60_24_4770": "add",
  "68_24_5431": "http",
  "69_24_5525": "pass",
  "70_24_5392": "temp",
  "70_24_5444": "type",
  "70_24_5469": "list",
  "70_24_5529": "file",
  "70_24_5545": "loop",
  "70_24_5579": "ping",
  "70_24_5591": "port",
  "71_24_5544": "left",
  "71_24_5572": "size",
  "71_24_5610": "call",
  "71_24_5612": "root",
  "71_24_5632": "part",
  "71_24_5644": "init",
  "71_24_5649": "host",
  "71_24_5656": "poly",
  "71_24_5667": "info",
  "71_24_5748": "user",
  "72_24_5559": "join",
  "72_24_5699": "",
  "72_24_5753": "data",
  "73_24_5678": "stat",
  "74_24_5861": "send",
  "75_24_6004": "load",
  "83_24_6579": "proxy",
  "84_24_6417": "event",
  "85_24_6661": "intel",
  "85_24_6693": "right",
  "85_24_6735": "bytes",
  "86_24_6678": "mysql",
  "86_24_6762": "write",
  "87_24_6800": "ghost",
  "87_24_6911": "count",
  "88_24_6908": "reset",
  "88_24_6946": "point",
  "88_24_7003": "getid",
  "99_24_7741": "signal",
  "99_24_7796": "handle"
}

    for x in range(0, 70):
        fname = image_filename(difficult, x)

        if not os.path.exists(fname):
            break

        # Output the image
        im = Image.open(fname)
        # imager.output_image(im)
        size = im.size
        filesize = os.path.getsize(fname)
   
        k = "{0}_{1}_{2}".format(*size, os.path.getsize(fname))
        if k in quick:
            correct = quick[k]
            if(correct == ''):
                imager.output_image(im)
                correct = input("Word: ")
        else:
            imager.output_image(im)            
            print("well, shit.")
            sys.exit(5)
        print(fname, correct)            
        key_word[f'{difficult}_{x}'] = correct
        # misery[correct] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }
        misery[correct] = "{0}_{1}_{2}".format(*size, os.path.getsize(fname))

def autotrain(difficult):
    """
    run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
    those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
                                                                   (not all of the categories have 70 and thus we print a File does not exist)
    We open and load the image, and get it's size,
    then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
    then from that we output the image line by line onto the screen after it has been output_image d into list form,
    Where we ask the user what the word is, and after that we save all that to a file in the data directory.
    """

    for x in range(0, 70):
        fname = image_filename(difficult, x)

        if not os.path.exists(fname):
            break
            # print("Could not find '{0}'".format(fname))
            # continue

        cleaned = cleaned_filename(difficult, x)
        if not os.path.exists(cleaned):
            imager.image_cleaner(fname, cleaned)

        print(f"Loading: {cleaned} ", end='')
        fileout = "data/{0}_{1}".format(difficult, x)

        output = subprocess.run(
            ["tesseract", cleaned, fileout],
            stderr=subprocess.DEVNULL,
            # capture_output=False,
            shell=False,
        )

        with open(fileout + ".txt", "r") as fp:
            word = fp.read().strip().lower()
            if (word != '') and (word in VALID_WORDS):
                key_word[f'{difficult}_{x}'] = word
                im = Image.open(fname)
                # imager.output_image(im)
                size = im.size
                with open('words.txt', 'a') as f:
                    f.write(f'{difficult}_{x} = {size[0]} x {size[1]} is {word}\n')
                print(word)
                if word in misery:
                    print("Awwww SHIT! ", word)

                misery["{0}_{1}_{2}".format(*size, os.path.getsize(fname))] = word

                # misery[word] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }                
            else:
                print("UNKNOWN", word)
                # Output the image
                im = Image.open(fname)
                imager.output_image(im)
                size = im.size
                # img_s = output_image(pix, size)
                # for l in img_s:
                #     print(l)
                correct = input("Word: ")
                key_word[f'{difficult}_{x}'] = correct
                # misery[correct] = { 'img_size': "{0},{1}".format(*size), 'filesize': os.path.getsize(fname) }
                if correct in misery:
                    print("Awwww SHIT! ", correct)
                    
                misery["{0}_{1}_{2}".format(*size, os.path.getsize(fname))] = correct
                with open('words.txt', 'a') as f:
                    f.write(f'{difficult}_{x} = {size[0]} x {size[1]} is {correct}\n')

    with open('misery.json', 'w') as fp:
        json.dump(misery, fp, indent=2, sort_keys=True)

# Now to call all the previous functions
if args.download:
    print("Downloading s0urce.io Words")
    # smaller is better, and cleaner.
    tofetch = { 'e': 62, 'm': 66, 'h': 55 }
    for d, max in tofetch.items():
        print(d.upper())
        for i in range(0, max):
            download(d, i)
        # time.sleep(random.randint(10, 15))

if args.train:
    # Img Processing: Run thru every single category and every single word
    for level in ["e", "m", "h"]:
        autotrain(level)
    with open(args.JSON, 'w') as fp:
        json.dump(key_word, fp, sort_keys=True, indent=2)

if args.quick:
    # Img Processing: Run thru every single category and every single word
    for level in ["e", "m", "h"]:
        quicktrain(level)
    with open(args.JSON, 'w') as fp:
        json.dump(key_word, fp, sort_keys=True, indent=2)


if args.update:
    with open(args.JSON, 'r') as fp:
        key_word = json.load(fp)

    # update the s0urce.js script
    filename = 's0urce.user.js'
    with open(filename, 'r') as fp:
        lines = fp.readlines()
    # Lines are now in memory.  Time to update!    

    for i in range(0, len(lines)):
        if 'http://s0urce.io/client/img/word/' in lines[i]:
            # This is a target line, so:
            l = lines[i].strip().strip(':').strip('"')
            # gets parts of the path
            parts = l.split('/')
            # get difficulty and index
            dif = parts[-2]
            index = parts[-1]
            # build the key -- get the word
            key = f'{dif}_{index}'
            # pprint(parts)
            # pprint(key)
            word = key_word[key]
            print("{0} : {1}".format( key, word))            
            lines[i+1] = f'                form.value = "{word}";' + "\n" #                break;\n" # You may need it... or may not.
        if '@version' in lines[i]:
            # Get version number
            l = lines[i]
            lis = list(l)
            # Get values and increment decimal
            who = int(lis[-4]) # Whole
            dec = int(lis[-2]) # Decimal
            dec += 1
            if (dec >= 10):
                who += 1
                dec = 0
            # Update the list so it displays the new values
            lis[-4] = str(who)
            lis[-2] = str(dec)
            # Write that all out again
            rest = ''
            for e in lis:
                rest += e
            lines[i] = rest

    with open(filename, 'w') as fp:
        for line in lines:
            fp.write(line)

    output = subprocess.run(
        ["scp", filename, "linode:/usr/share/nginx/bugz"],
        stderr=subprocess.DEVNULL,
        # capture_output=False,
        shell=False,
    )
    print("Please visit https://bugz.red-green.com/s0urce.user.js !")

# Regardless what we did let the user know we at least ran and we are now done
print("Complete")