瀏覽代碼

Updated: json output, file cleaning.

bugz 5 年之前
父節點
當前提交
8fbadc5628
共有 2 個文件被更改,包括 280 次插入15 次删除
  1. 95 0
      dataLoad.py
  2. 185 15
      test.js

+ 95 - 0
dataLoad.py

@@ -3,8 +3,19 @@
 from PIL import Image
 from PIL import Image
 from pprint import pprint
 from pprint import pprint
 
 
+<<<<<<< Updated upstream
 import sys, time, os, requests, random, json, argparse
 import sys, time, os, requests, random, json, argparse
 import subprocess, yaml
 import subprocess, yaml
+=======
+import sys
+import time
+import os
+import requests
+import random
+import json
+import argparse
+import subprocess
+>>>>>>> Stashed changes
 
 
 parser = argparse.ArgumentParser(description="S0urce.io utility program.")
 parser = argparse.ArgumentParser(description="S0urce.io utility program.")
 parser.add_argument("--download", help="Download Images", action="store_true")
 parser.add_argument("--download", help="Download Images", action="store_true")
@@ -208,6 +219,38 @@ def output_image(pix, size):
 
 
     return result
     return result
 
 
+def image_filename(difficulty, index):
+    return f"images/{difficulty}_{index}.png"       
+
+def cleaned_filename(difficulty, index):
+    return f"images/{difficulty}_{index}_clean.png"       
+
+
+def image_cleaner(source, destination):
+    image = Image.open(source)
+    # pixels = image.load()
+    size = image.size
+    print(f"Size: {size[0]} x {size[1]}")
+
+    for y in range(0, size[1]):
+        s = ""
+        for x in range(0, size[0]):
+            (r, g, b, _) = image.getpixel( (x,y) )
+            high = r
+            if b > high:
+                high = b
+            diff = g - high
+            is_green = diff > GREEN_DIFF
+
+            if is_green:
+                image.putpixel( (x,y), (255,255,255,255) )
+            else:
+                image.putpixel( (x,y), (0, 0, 0, 255) )
+
+            # if is_set(pix, x, y):
+            # if not is_green(pix, x, y):
+        # result.append(s)
+    image.save(destination)
 
 
 def run(difficult, index):
 def run(difficult, index):
     """
     """
@@ -220,11 +263,22 @@ def run(difficult, index):
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     """
     """
 
 
+<<<<<<< Updated upstream
     fname = f"images/{difficult}_{x}.png"
     fname = f"images/{difficult}_{x}.png"
 
 
     if not os.path.exists(fname):
     if not os.path.exists(fname):
         print("Could not find '{0}'".format(fname))
         print("Could not find '{0}'".format(fname))
         return False # We did not complete
         return False # We did not complete
+=======
+    for x in range(0, 70):
+        fname = image_filename(difficult, x)
+
+        if not os.path.exists(fname):
+            # print("Could not find '{0}'".format(fname))
+            # continue
+            # We've reached the end, so stop looking.  :P
+            break
+>>>>>>> Stashed changes
 
 
     print(f"Loading: {fname}")
     print(f"Loading: {fname}")
     im = Image.open(fname)
     im = Image.open(fname)
@@ -256,7 +310,13 @@ def run(difficult, index):
     # print(f"File '{fname}' automatically removed")
     # print(f"File '{fname}' automatically removed")
 
 
 
 
+<<<<<<< Updated upstream
 def autotrain(difficult, index):
 def autotrain(difficult, index):
+=======
+key_word = {}
+
+def autotrain(difficult):
+>>>>>>> Stashed changes
     """
     """
     run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
     run, represents a single execution of components to the image, (Actuall we do it 1 category at a time instead of just 1 single execution )
     those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
     those components do the following...                           (Each category has around 70 items so we standardize on 70, but           )
@@ -267,6 +327,7 @@ def autotrain(difficult, index):
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     Where we ask the user what the word is, and after that we save all that to a file in the data directory.
     """
     """
 
 
+<<<<<<< Updated upstream
     # Re aranged the code so I can have it return after each word
     # Re aranged the code so I can have it return after each word
     fname = f"images/{difficult}_{x}.png"
     fname = f"images/{difficult}_{x}.png"
 
 
@@ -288,6 +349,34 @@ def autotrain(difficult, index):
         print(word)
         print(word)
 
 
     return word # Save this to the dict
     return word # Save this to the dict
+=======
+    for x in range(0, 70):
+        fname = image_filename(difficult, x)
+
+        if not os.path.exists(fname):
+            break
+            # print("Could not find '{0}'".format(fname))
+            # continue
+
+        cleaned = cleaned_filename(difficult, x)
+        if not os.path.exists(cleaned):
+            image_cleaner(fname, cleaned)
+
+        print(f"Loading: {cleaned}")
+        fileout = "data/{0}_{1}".format(difficult, x)
+
+        output = subprocess.run(
+            ["tesseract", cleaned, fileout],
+            stderr=subprocess.DEVNULL,
+            # capture_output=False,
+            shell=False,
+        )
+
+        with open(fileout + ".txt", "r") as fp:
+            word = fp.read().strip().lower()
+            key_word[f'{difficult}_{x}'] = word
+            print(word)
+>>>>>>> Stashed changes
 
 
 
 
 # Now to call all the previous functions
 # Now to call all the previous functions
@@ -315,6 +404,7 @@ if args.train:
     # Img Processing: Run thru every single category and every single word
     # Img Processing: Run thru every single category and every single word
     wordDict = {}
     wordDict = {}
     for level in ["e", "m", "h"]:
     for level in ["e", "m", "h"]:
+<<<<<<< Updated upstream
         for x in range(0, 66):
         for x in range(0, 66):
             at = autotrain(level, x) 
             at = autotrain(level, x) 
             if(at != False): # If it is complete store it
             if(at != False): # If it is complete store it
@@ -322,6 +412,11 @@ if args.train:
 
 
     with open('words.yml', 'w') as f:
     with open('words.yml', 'w') as f:
         yaml.dump(wordDict, f) # Writes it automatically into the file
         yaml.dump(wordDict, f) # Writes it automatically into the file
+=======
+        autotrain(level)
+    with open(args.JSON, 'w') as fp:
+        json.dump(key_word, fp, sort_keys=True, indent=2)
+>>>>>>> Stashed changes
 
 
 # ----------------------------------------------------------------------------------------
 # ----------------------------------------------------------------------------------------
 # All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!
 # All below was in a seperate dataJS.py file... but now I have fixed it so it's 1 script!

+ 185 - 15
test.js

@@ -1,15 +1,185 @@
-
-// Create 'database' to store processed images and word
-let db = [
-// T
-
-
-// t
-];
-
-console.log("db contains " + db.length + " elements.");
-for(var x = 0; x < db.length; x++){
-    var temp = db[x].length;
-    console.log("db[" + x + "] = " + db[x][temp-1]);
-}
-
+{
+  "e_0": "anon",
+  "e_1": "buffer",
+  "e_10": "type",
+  "e_11": "host",
+  "e_12": "ping",
+  "e_13": "bytes",
+  "e_14": "info",
+  "e_15": "set",
+  "e_16": "cookies",
+  "e_17": "file",
+  "e_18": "add",
+  "e_19": "url",
+  "e_2": "com",
+  "e_20": "add",
+  "e_21": "intel",
+  "e_22": "root",
+  "e_23": "",
+  "e_24": "system",
+  "e_25": "join",
+  "e_26": "temp",
+  "e_27": "write",
+  "e_28": "client",
+  "e_29": "port",
+  "e_3": "reset",
+  "e_30": "key",
+  "e_31": "upload",
+  "e_32": "part",
+  "e_33": "ghost",
+  "e_34": "pass",
+  "e_35": "handle",
+  "e_36": "size",
+  "e_37": "bit",
+  "e_38": "global",
+  "e_39": "right",
+  "e_4": "num",
+  "e_40": "xml",
+  "e_41": "stat",
+  "e_42": "net",
+  "e_43": "remove",
+  "e_44": "delete",
+  "e_45": "user",
+  "e_46": "domain",
+  "e_47": "signal",
+  "e_48": "cail",
+  "e_49": "socket",
+  "e_5": "",
+  "e_50": "log",
+  "e_51": "val",
+  "e_52": "dir",
+  "e_53": "init",
+  "e_54": "event",
+  "e_55": "list",
+  "e_56": "emit",
+  "e_57": "loop",
+  "e_58": "data",
+  "e_59": "lead",
+  "e_6": "status",
+  "e_60": "get",
+  "e_61": "poly",
+  "e_7": "point",
+  "e_8": "send",
+  "e_9": "count",
+  "h_0": "leadregisterlist",
+  "h_1": "decryptdatabatch",
+  "h_10": "blockthreat",
+  "h_11": "wordcounter",
+  "h_12": "removeoldcookie",
+  "h_13": "callmodule",
+  "h_14": "patcheventlog",
+  "h_15": "setnewproxy",
+  "h_16": "ghestfilesystem",
+  "h_17": "rootcookieset",
+  "h_18": "mergesocket",
+  "h_19": "encodenewfolder",
+  "h_2": "getfirewallchannel",
+  "h_20": "systemportkey",
+  "h_21": "encryptunpackedbatch",
+  "h_22": "respondertimeout",
+  "h_23": "uploaduserstats",
+  "h_24": "emitconfiglist",
+  "h_25": "channelsetpackage",
+  "h_26": "systemgridtype",
+  "h_27": "bufferpingset",
+  "h_28": "exportconfigpackage",
+  "h_29": "getpartoffile",
+  "h_3": "deleteallids",
+  "h_30": "createnewsocket",
+  "h_31": "eventlistdir",
+  "h_32": "unpacktmpfile",
+  "h_33": "disconnectserver",
+  "h_34": "leadloggedpassword",
+  "h_35": "dodecahedron",
+  "h_36": "generatecodepack",
+  "h_37": "echeckhttptype",
+  "h_38": "create3axisvector",
+  "h_39": "batchallfiles",
+  "h_4": "changepassword",
+  "h_40": "joinnetworkclient",
+  "h_41": "getxmlprotecol",
+  "h_42": "hostnewserver",
+  "h_43": "createnewpackage",
+  "h_44": "sendintelpass",
+  "h_45": "getdatapassword",
+  "h_46": "destroybatch",
+  "h_47": "httpbuffersize",
+  "h_48": "changeusername",
+  "h_49": "includedirectory",
+  "h_5": "tempdatapass",
+  "h_50": "disconnectchannel",
+  "h_51": "removenewcookie",
+  "h_52": "statuscfprocess",
+  "h_53": "leadaltevent",
+  "h_54": "fileexpresslog",
+  "h_6": "create2axisvector",
+  "h_7": "sizeofhexagon",
+  "h_8": "getmysqldomain",
+  "h_9": "ecreatefilethread",
+  "m_0": "sizeoft",
+  "m_1": "proxy",
+  "m_10": "aqridwidth",
+  "m_11": "mysal",
+  "m_12": "userport",
+  "m_13": "decrypt",
+  "m_14": "llisteonfig",
+  "m_15": "getping",
+  "m_16": "server",
+  "m_17": "findpackade",
+  "m_18": "channel",
+  "m_19": "encryptfile",
+  "m_2": "newline",
+  "m_20": "filedir",
+  "m_21": "serverproxy",
+  "m_22": "account",
+  "m_23": "package",
+  "m_24": "length",
+  "m_25": "urlicheck",
+  "m_26": "disconnect",
+  "m_27": "constructer",
+  "m_28": "getlodg",
+  "m_29": "username",
+  "m_3": "loadbytes",
+  "m_30": "protocel",
+  "m_31": "response",
+  "m_32": "setping",
+  "m_33": "eventtype",
+  "m_34": "writefile",
+  "m_35": "filetype",
+  "m_36": "gridheight",
+  "m_37": "password",
+  "m_38": "accountname",
+  "m_39": "threat",
+  "m_4": "fillgrid",
+  "m_40": "number",
+  "m_41": "getid",
+  "m_42": "getinfo",
+  "m_43": "syscall",
+  "m_44": "command",
+  "m_45": "generate",
+  "m_46": "setstats",
+  "m_47": "newhost",
+  "m_48": "module",
+  "m_49": "getkey",
+  "m_5": "getpass",
+  "m_50": "getfile",
+  "m_51": "setport",
+  "m_52": "datatype",
+  "m_53": "thread",
+  "m_54": "hexagon",
+  "m_55": "decryptfile",
+  "m_56": "setnewid",
+  "m_57": "process",
+  "m_58": "setcookie",
+  "m_59": "config",
+  "m_6": "download",
+  "m_60": "hostserver",
+  "m_61": "export",
+  "m_62": "encode",
+  "m_63": "encrypt",
+  "m_64": "newserver",
+  "m_65": "userid",
+  "m_7": "connect",
+  "m_8": "responder",
+  "m_9": "vector"
+}