david
/
s0urceBOT


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
							#!/usr/bin/env python3

from PIL import Image
from pprint import pprint

import sys
import time
import os
import requests
import random
import json
import argparse
import subprocess

import imager

parser = argparse.ArgumentParser(description="S0urce.io utility program.")
parser.add_argument("--download", help="Download Images", action="store_true")
parser.add_argument("--train", help="Convert Images to Text", action="store_true")
parser.add_argument("--update", help="Update s0urce.js script", action="store_true")
parser.add_argument(
    "JSON", type=str, nargs="?", help="Filename to save results", default="test.js"
)
args = parser.parse_args()
# pprint(args)

# Should we add the JSON in a file? (True is filename, False = do not do)
# JSONME = 'test.js'
JSONME = args.JSON

# NOTE: To begin the insert of the JSONIFIED image and word its
# // T
# A JS comment with a uppercase T
# To stop its
# // t
# A JS comment with a lowercase t

# httpbin.org/headers
sess = requests.Session()
head = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
}
sess.headers.update(head)

ON = "X"  # Dark pixel in an image
OFF = "."  # Light pixel in an image
DIR = (
    "data"
)  # Data directory name, do we really need this? Is it really going to change?

INTENSITY = (
    75
)  # How bright does something have to be to trigger it being a dark or light pixel?
# Looks like around 75 removes the extra stuff that s0urce.io does to prevent it from being just matching images.

GREEN_DIFF = 10
# How much brighter the green channel must be (compared to the others),
# to be called green.

VALID_WORDS = {
 "constructor", "info", "anon", "send", "com", "root", "port", "val", 
 "add", "ghost", "net", "http", "status", "syscall", "part", "delete", 
 "datatype", "loadbytes", "setping", "size", "system", "setstats", 
 "join", "socket", "signal", "dir", "accountname", "decryptfile", 
 "intel", "xml", "connect", "sizeof", "writefile", "call", "reset", 
 "global", "user", "add", "remove", "count", "set", "loop", "num", 
 "client", "file", "channel", "right", "stat", "emit", "handle", 
 "buffer", "mysql", "write", "type", "list", "temp", "getfile", 
 "thread", "decrypt", "poly", "setcookie", "domain", "length", 
 "gridwidth", "upload", "get", "generatecodepack", "data", 
 "process", "download", "proxy", "fillgrid", "bit", "encryptfile", 
 "host", "ping", "event", "url", "load", "key", "changepassword", 
 "bufferpingset", "getfirewallchannel", "getinfo", "getping", "pass", 
 "newserver", "username", "generate", "userport", "init", "net", 
 "left", "point", "cookies", "protocol", "responder", "getkey", 
 "hostserver", "eventtype", "gridheight", "server", "setport", 
 "getpass", "loadloggedpassword", "destroybatch", "getxmlprotocol", 
 "channelsetpackage", "batchallfiles", "module", "response", 
 "serverproxy", "filetype", "urlcheck", "config", "number", 
 "ghostfilesystem", "disconnectserver", "emitconfiglist", 
 "dodecahedron", "eventlistdir", "systemportkey", "setnewproxy", 
 "createnewsocket", "changeusername", "tempdatapass", "blockthreat", 
 "statusofprocess", "patcheventlog", "newline", "dir", "bytes", 
 "findpackage", "package", "encode", "joinnetworkclient", 
 "rootcookieset", "callmodule", "sizeofhexagon", "createfilethread", 
 "includedirectory", "loadregisterlist", "encryptunpackedbatch", 
 "getpartoffile", "getdatapassword", "create2axisvector", 
 "create3axisvector", "disconnectchannel", "setnewid", "hexagon", 
 "account", "removenewcookie", "getid", "encodenewfolder", 
 "sendintelpass", "getlog", "command", "threat", "userid", 
 "wordcounter", "removeoldcookie", "hostnewserver", "disconnect", 
 "listconfig", "newhost", "createnewpackage", "loadaltevent", "log", 
 "filedir", "fileexpresslog", "decryptdatabatch", "mergesocket", 
 "unpacktmpfile", "uploaduserstats", "getmysqldomain", 
 "checkhttptype", "encrypt", "vector", "httpbuffersize", 
 "systemgridtype", "password", "respondertimeout", "deleteallids", 
 "exportconfigpackage", "export"
}

# Check the environment, do we have all that we need?
if not os.path.exists("images"):
    os.mkdir("images")

if not os.path.exists("data"):
    os.mkdir("data")

if not os.path.exists("words.yml"):
    with open('words.yml', 'w') as f: # Create a empty yaml file
        f.write('')

def image_filename(difficulty, index):
    return f"images/{difficulty}_{index}.png"       

def cleaned_filename(difficulty, index):
    return f"images/{difficulty}_{index}_clean.png"       

def cleaner_filename(difficulty, index):
    return f"images/{difficulty}_{index}_cleaner.png"       

def download(howhard, index):
    global sess
    """
    Download an image based upon how hard it is.

    On success, it saves the image file.
    Failure raises ConnectionError.
    Don't leave stale cleaned images around.
    """
    r = sess.get(f"http://s0urce.io/client/img/word/{howhard}/{index}")
    if r.status_code == 200:
        # DRY
        with open( image_filename(howhard, index), "wb") as f:
            f.write(r.content)
        # cleaned images?  we need to delete & regenerate those.
        cleaned = cleaned_filename(howhard, index)
        if os.path.exists(cleaned):
            os.remove(cleaned)
    else:
        # We did not get a 200 Okay, log this... Hmm maybe we need to make a log file?
        # print( f'{howhard}_{index}.png ' + str(r.status_code) )
        raise ConnectionError(
            "http://s0urce.io/client/img/word/{0}/{1} returned status_code {2}".format(
                howhard, index, r.status_code
            )
        )


def img_point(pix, x, y):
    """
    img_point, returns a pixel of an image,
    given the x and y on the image.
    """
    return pix[x, y]


def img_avg(pix, x, y):
    """
    img_avg, returns the average brightness 0-255,
    given pixel, and the x and y on the image calls img_point,
    to get the individual rgb values to calculate,
    brightness. (Grey scale)
    """
    rgb = img_point(pix, x, y)
    # if(im.mode == 'P'):
    #    rgb = pal[rgb*3:(rgb+1)*3]

    # if(im.mode == 'I'):
    #    return rgb >> 8

    return int((rgb[0] + rgb[1] + rgb[2]) / 3)


def is_set(pix, x, y):
    global INTENSITY
    """
    is_set, returns True or False of calculating,
    the brightness of the given point on a image,
    compared to given intensity.
    
    True means the brightness at the given x and y,
    is Less Than which means its dark.
    
    False means the brightness at the given x and y,
    is Greater Than which means its bright. (Grey Scale)
    """
    avg = img_avg(pix, x, y)
    return avg < INTENSITY


def is_green(pix, x, y):
    """
    Is this pixel Green?
    """
    (red, green, blue, _) = img_point(pix, x, y)
    # Find the difference between green and the other values.
    other = red
    if blue > other:
        other = blue
    diff = green - other
    return diff > GREEN_DIFF


def scan_img(pix, size):
    """
    scan_img, looks at a image and looks for dark pixels,
    if it is a dark pixel record the number and resize the,
    returned values to show where the most dark pixels on the,
    image are located. (Grey Scale)
    
    given pixel, and image size.
    returns start x, y and end x, y and total number of dark pixels.
    """
    total = 0
    sx = size[0]
    ex = 0
    sy = size[1]
    ey = 0

    for y in range(0, size[1]):
        for x in range(0, size[0]):
            pnt_is = is_set(pix, x, y)
            if pnt_is:
                total += 1
                if x < sx:
                    sx = x
                if x > ex:
                    ex = x
                if y < sy:
                    sy = y
                if y > ey:
                    ey = y

    # print (sx,ex,sy,ey)
    # give us a little border to work with
    if sx > 0:
        sx -= 1
    if ex < size[0]:
        ex += 1

    if sy > 0:
        sy -= 1
    if ey < size[1]:
        ey += 1

    # print (sx,ex,sy,ey)
    return (sx, sy, ex, ey, total)


def output_image(pix, size):
    """
    For the size of the area we have reduced down to where the majority of dark pixels,
    are located, store all that into a list and return the list.
    
    given pixel for function passing.
    returns multiple strings in a list that are edited to use characters to represent,
    the dark and light pixels of the image. (Grey Scale)
    """
    result = []
    ex = size[0]
    sx = 0
    ey = size[1]
    sy = 0
    for y in range(sy, ey):
        s = ""
        for x in range(sx, ex):
            # if is_set(pix, x, y):
            if not is_green(pix, x, y):
                s += ON
            else:
                s += OFF
        result.append(s)

    return result

def run(difficult, index):
    """
    run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
    those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
                                                                   (not all of the categories have 70 and thus we print a File does not exist)
    We open and load the image, and get it's size,
    then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
    then from that we output the image line by line onto the screen after it has been output_image d into list form,
    Where we ask the user what the word is, and after that we save all that to a file in the data directory.
    """

    for x in range(0, 70):
        fname = image_filename(difficult, x)

        if not os.path.exists(fname):
            # print("Could not find '{0}'".format(fname))
            # continue
            # We've reached the end, so stop looking.  :P
            break

    print(f"Loading: {fname}")
    im = Image.open(fname)
    pix = im.load()
    size = im.size
    print(f"Size: {size[0]} x {size[1]}")

    pal = im.getpalette()

    sx = 0
    ex = size[0]
    sy = 0
    ey = size[1]
    total = 0

    sx, sy, ex, ey, total = scan_img(pix, size)

    print(f"Chars within ({sx}, {sy}) - ({ex}, {ey}) total {total} pixels")

    img_s = output_image(pix, size)
    for l in img_s:
        print(l)
    word = input("Word: ")
    # Returns word so it can be stored in dictonary
    return word

    #print(f"Image saved to '{DIR}/{difficult}_{x}.txt' in byte string")
    # os.remove(f'{fname}')  # Grr No bad bean, keep file for error checking
    # print(f"File '{fname}' automatically removed")


key_word = {}

def autotrain(difficult):
    """
    run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
    those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
                                                                   (not all of the categories have 70 and thus we print a File does not exist)
    We open and load the image, and get it's size,
    then we scan_img for dark and light pixels, <-- This narrows the image down to just the majority of dark pixels
    then from that we output the image line by line onto the screen after it has been output_image d into list form,
    Where we ask the user what the word is, and after that we save all that to a file in the data directory.
    """

    for x in range(0, 70):
        fname = image_filename(difficult, x)

        if not os.path.exists(fname):
            break
            # print("Could not find '{0}'".format(fname))
            # continue

        cleaned = cleaned_filename(difficult, x)
        if not os.path.exists(cleaned):
            imager.image_cleaner(fname, cleaned)

        print(f"Loading: {cleaned} ", end='')
        fileout = "data/{0}_{1}".format(difficult, x)

        output = subprocess.run(
            ["tesseract", cleaned, fileout],
            stderr=subprocess.DEVNULL,
            # capture_output=False,
            shell=False,
        )

        with open(fileout + ".txt", "r") as fp:
            word = fp.read().strip().lower()
            if (word != '') and (word in VALID_WORDS):
                key_word[f'{difficult}_{x}'] = word
                print(word)
            else:
                print("UNKNOWN", word)
                # Output the image
                im = Image.open(fname)
                imager.output_image(im)

                # pix = im.load()
                # size = im.size
                # img_s = output_image(pix, size)
                # for l in img_s:
                #     print(l)
                key_word[f'{difficult}_{x}'] = input("Word: ")


# Now to call all the previous functions
if args.download:
    print("Downloading s0urce.io Words")
    # smaller is better, and cleaner.
    tofetch = { 'e': 62, 'm': 66, 'h': 55 }
    for d, max in tofetch.items():
        print(d.upper())
        for i in range(0, max):
            download(d, i)
        # time.sleep(random.randint(10, 15))

if args.train:
    # Img Processing: Run thru every single category and every single word
    for level in ["e", "m", "h"]:
        autotrain(level)
    with open(args.JSON, 'w') as fp:
        json.dump(key_word, fp, sort_keys=True, indent=2)

if args.update:
    with open(args.JSON, 'r') as fp:
        key_word = json.load(fp)

    # update the s0urce.js script
    filename = 's0urce.user.js'
    with open(filename, 'r') as fp:
        lines = fp.readlines()
    # Lines are now in memory.  Time to update!    

    for i in range(0, len(lines)):
        if 'http://s0urce.io/client/img/word/' in lines[i]:
            # This is a target line, so:
            l = lines[i].strip().strip(':').strip('"')
            # gets parts of the path
            parts = l.split('/')
            # get difficulty and index
            dif = parts[-2]
            index = parts[-1]
            # build the key -- get the word
            key = f'{dif}_{index}'
            # pprint(parts)
            # pprint(key)
            word = key_word[key]
            print("{0} : {1}".format( key, word))            
            lines[i+1] = f'                form.value = "{word}";' + "\n" #                break;\n" # You may need it... or may not.

    with open(filename, 'w') as fp:
        for line in lines:
            fp.write(line)

    output = subprocess.run(
        ["scp", filename, "linode:/usr/share/nginx/bugz"],
        stderr=subprocess.DEVNULL,
        # capture_output=False,
        shell=False,
    )
    print("Please visit https://bugz.red-green.com/s0urce.user.js !")

# Regardless what we did let the user know we at least ran and we are now done
print("Complete")