Browse Source

Fixing unicode processing. streamlined.

Instead of trying to replace every unicode
symbol we know, we pull the unicode char
out and replace it.
bugz 4 years ago
parent
commit
e2117ba5ab
3 changed files with 203 additions and 94 deletions
  1. 176 92
      doorman.cpp
  2. 25 1
      utils.cpp
  3. 2 1
      utils.h

+ 176 - 92
doorman.cpp

@@ -302,33 +302,22 @@ void terminal_output(TRANSLATE xlate, std::string &buffer) {
     // 1 char searches:
     if (length >= 1) {
       uchoad = buffer[length - 1];
-      if ((uchoad & 0xe0) == 0xc0) {
-        // CHOAD!
+      if (unicode_len(uchoad))
         save = 1;
-      } else {
-        if ((uchoad & 0xf0) == 0xe0) {
-          save = 1;
-        }
-      }
     }
 
     if (length >= 2) {
       uchoad = buffer[length - 2];
-      if ((uchoad & 0xf0) == 0xe0) {
-        // CHOAD
+      if (unicode_len(uchoad) > 2)
         save = 2;
-      }
     }
 
-    /*  // if we're going back 3 chars, and the len=3 chars, leave it along. :P
+    // We'll probably never see any of these, but it's here if we need it.
     if (length >= 3) {
       uchoad = buffer[length - 3];
-      if ((uchoad & 0xf0) == 0xe0) {
-        // CHOAD
+      if (unicode_len(uchoad) > 3)
         save = 3;
-      }
     }
-    */
 
     std::string saved;
 
@@ -341,8 +330,9 @@ void terminal_output(TRANSLATE xlate, std::string &buffer) {
       ZF_LOGV_MEM(saved.data(), saved.size(), "SAVED:");
     }
 
-    static std::map<const char *, int> utf8cp437 = {
-
+    // TO FIX:  Pull chars out, and look up in the map.  Don't brute force your
+    // way through all of the times.
+    static std::map<std::string, int> utf8cp437 = {
         {"\xe2\x98\xba", 1},
         {"\xe2\x98\xbb", 2},
         {"\xe2\x99\xa5", 3},
@@ -503,23 +493,56 @@ void terminal_output(TRANSLATE xlate, std::string &buffer) {
         {"\xe2\x96\xa0", 0xfe},
         {"\xc2\xa0", 0xff}};
 
-    if ((buffer.find('\xe2') != std::string::npos) ||
-        (buffer.find('\xc2') != std::string::npos) ||
-        (buffer.find('\xc3') != std::string::npos) ||
-        (buffer.find('\xce') != std::string::npos) ||
-        (buffer.find('\xcf') != std::string::npos)) {
-      int c = 0;
-
-      for (auto it = utf8cp437.begin(); it != utf8cp437.end(); ++it) {
-        while (replace(buffer, it->first, std::string(1, char(it->second)))) {
-          c++;
+    int c = 0;
+
+    size_t pos;
+    std::vector<char> keys = {'\xc2', '\xc3', '\xce', '\xcf', '\xe2'};
+    for (auto ikey = keys.begin(); ikey != keys.end(); ++ikey) {
+      size_t spos = 0;
+      while ((pos = buffer.find(*ikey, spos)) != std::string::npos) {
+        char uchoad = buffer[pos];
+        int ulen = unicode_len(uchoad);
+
+        std::string ustr = buffer.substr(pos, ulen);
+        ZF_LOGE_MEM(ustr.data(), ustr.length(), "unicode %d : (@%ld)", ulen,
+                    pos);
+        auto upos = utf8cp437.find(ustr.c_str());
+        if (upos != utf8cp437.end()) {
+          std::string to(1, (char)upos->second);
+          while (replace(buffer, ustr, to)) {
+            c++;
+          }
+          spos = 0;
+        } else {
+          ZF_LOGE("failed to locate.");
+          spos = pos + 1;
         }
       }
-      if (c) {
-        ZF_LOGE("Replaced %d", c);
-        ZF_LOGV_MEM(buffer.data(), buffer.size(), "After Replace:");
-      }
     }
+    if (c) {
+      ZF_LOGE("vector/map Replaced %d", c);
+      ZF_LOGV_MEM(buffer.data(), buffer.size(), "After Replace:");
+    }
+
+    /*
+        if ((buffer.find('\xe2') != std::string::npos) ||
+            (buffer.find('\xc2') != std::string::npos) ||
+            (buffer.find('\xc3') != std::string::npos) ||
+            (buffer.find('\xce') != std::string::npos) ||
+            (buffer.find('\xcf') != std::string::npos)) {
+          c = 0;
+
+          for (auto it = utf8cp437.begin(); it != utf8cp437.end(); ++it) {
+            while (replace(buffer, it->first, std::string(1, char(it->second))))
+       { c++;
+            }
+          }
+          if (c) {
+            ZF_LOGE("Replaced %d", c);
+            ZF_LOGV_MEM(buffer.data(), buffer.size(), "After Replace:");
+          }
+        }
+    */
 
     // Convert bright to bold.
     while (replace(buffer, "\x1b[90m", "\x1b[1;30m")) {
@@ -562,6 +585,9 @@ void terminal_output(TRANSLATE xlate, std::string &buffer) {
         │ │ │ │
         ├ ─ ┼ ┤
         └ ─ ┴ ┘
+
+        We cheat here.  Instead of converting to unicode, we convert to CP437
+        chars.
          */
         if (dcs_mode == 0) {
           switch (c) {
@@ -612,7 +638,7 @@ void terminal_output(TRANSLATE xlate, std::string &buffer) {
     buffer.clear();
     buffer.insert(0, saved);
     saved.clear();
-  };
+  }
 }
 
 void help(void) {
@@ -623,6 +649,7 @@ void help(void) {
   printf("\t-NOLOG\tNo logging\n");
   printf("\t-NOC\tDon't allow Ctrl-C\n");
   printf("\t-NOTERM\tDon't Translate Keys\n");
+  printf("\t-LOGTIME\tAdd Hours/Minutes to logfile name\n");
 }
 
 int main(int argc, char *argv[]) {
@@ -646,6 +673,7 @@ int main(int argc, char *argv[]) {
   int CATCH_CTRLC = 0;
   int NO_LOGGING = 0;
   int TERM_XLATE = 1;
+  int LOG_TIME = 0;
 
   int x;
   for (x = 1; x < argc; x++) {
@@ -669,6 +697,10 @@ int main(int argc, char *argv[]) {
       TERM_XLATE = 0;
       continue;
     }
+    if (strcasecmp("-LOGTIME", argv[x]) == 0) {
+      LOG_TIME = 1;
+      continue;
+    }
     if ((strcasecmp("-H", argv[x]) == 0) || (strcmp("-?", argv[x]) == 0)) {
       // display help information.
       help();
@@ -683,6 +715,7 @@ int main(int argc, char *argv[]) {
   }
 
   if (x == argc) {
+    printf("No command to run found.\n");
     help();
     return 0;
   }
@@ -698,9 +731,14 @@ int main(int argc, char *argv[]) {
 
     buffer << "doorman-" << tmp->tm_year + 1900 << "-" << std::setfill('0')
            << std::setw(2) << tmp->tm_mon + 1 << "-" << std::setfill('0')
-           << std::setw(2) << tmp->tm_mday << ".log";
+           << std::setw(2) << tmp->tm_mday;
+    if (LOG_TIME) {
+      buffer << "-" << std::setw(2) << tmp->tm_hour << std::setw(2)
+             << tmp->tm_min;
+    }
+    buffer << ".log";
     logfile = buffer.str();
-  };
+  }
 
   if (NO_LOGGING) {
     zf_log_set_output_level(ZF_LOG_NONE);
@@ -830,11 +868,11 @@ int main(int argc, char *argv[]) {
       // o descritor tem que ser unico para o programa, a documentacao
       // recomenda um calculo entre os descritores sendo usados + 1
 
-      timeout.tv_sec = 0;
+      timeout.tv_sec = 60;
       timeout.tv_usec = 0;
       // if (select(master + 1, &read_fd, &write_fd, &except_fd, &timeout) == 0)
       // {
-      if (select(master + 1, &read_fd, &write_fd, &except_fd, NULL) == 0) {
+      if (select(master + 1, &read_fd, &write_fd, &except_fd, &timeout) == 0) {
         ZF_LOGI("TIMEOUT");
         // This means timeout!
       }
@@ -854,27 +892,10 @@ int main(int argc, char *argv[]) {
           buffer.append(read_buffer, total);
 
           terminal_output(translate, buffer);
-
-          /*
-          if (0) { // zmodem) {
-            // ZF_LOGI("Buffer %lu bytes, zmodem...", buffer.size());
-
-            write(STDOUT_FILENO, buffer.data(), buffer.size());
-            // console_receive(&console, buffer);
-            buffer.clear();
-          } else {
-            // ZF_LOGV_MEM(buffer + size, total, "Read %d bytes:", total);
-            // size += total;
-            ZF_LOGV_MEM(buffer.data(), buffer.size(), "Buffer now:");
-
-            write(STDOUT_FILENO, buffer.data(), buffer.size());
-            // console_receive(&console, buffer);
-            buffer.clear();
-          }
-          */
-
-        } else
+        } else {
+          ZF_LOGE("read = -1, break/exit");
           break;
+        }
       }
 
       // read_fd esta atribuido com a entrada padrao?
@@ -885,6 +906,25 @@ int main(int argc, char *argv[]) {
         std::string input_str(input, r);
         // input[r] = 0;
 
+        // F11 didn't work, it's a "fullscreen" key.
+        static const char *func_keys[12] = {"\x1bOP",
+                                            "\x1bOQ",
+                                            "\x1bOR",
+                                            "\x1bOS",
+                                            "\x1b[15\x7e",
+                                            "\x1b[17\x7e",
+                                            "\x1b[18\x7e",
+                                            "\x1b[19\x7e",
+                                            "\x1b[20\x7e",
+                                            "\x1b[21\x7e",
+                                            "",
+                                            "\x1b[24]x7e"};
+
+        // left right up down ins del home end PgUp PgDown
+        static const char *dir_keys[] = {
+            "\x1bOD",     "\x1bOC", "\x1bOA", "\x1bOB",     "\x1b[2\x7e",
+            "\x1b[3\x7e", "\x1bOH", "\x1b[F", "\x1b[5\x7e", "\x1b[6\x7e"};
+
         // e escreva no bc
         ZF_LOGI_MEM(input_str.data(), input_str.size(), "INPUT <<");
 
@@ -895,39 +935,39 @@ int main(int argc, char *argv[]) {
         if (TERM_XLATE) {
           // We're going to try it without any buffer or understanding of ANSI
           // codes.
-          int c;
+          int c = 0;
 
-          while (replace(input_str, "\x1b[A", "\x1bOA")) {
+          while (replace(input_str, "\x1b[A", dir_keys[2])) { // OA
             c++;
           }
-          while (replace(input_str, "\x1b[B", "\x1bOB")) {
+          while (replace(input_str, "\x1b[B", dir_keys[3])) { // OB
             c++;
           }
-          while (replace(input_str, "\x1b[C", "\x1bOC")) {
+          while (replace(input_str, "\x1b[C", dir_keys[1])) { // OC
             c++;
           }
-          while (replace(input_str, "\x1b[D", "\x1bOD")) {
+          while (replace(input_str, "\x1b[D", dir_keys[0])) { // OD
             c++;
           }
 
-          while (replace(input_str, "\x1b[U", "\x1b[6\x7e")) {
+          while (replace(input_str, "\x1b[U", dir_keys[9])) { // [6~
             c++;
           }
-          while (replace(input_str, "\x1b[V", "\x1b[5\x7e")) {
+          while (replace(input_str, "\x1b[V", dir_keys[8])) { // [5~
             c++;
           }
-          while (replace(input_str, "\x1b[H", "\x1bOH")) {
+          while (replace(input_str, "\x1b[H", dir_keys[6])) { // OH
             c++;
           }
-          while (replace(input_str, "\x1b[K", "\x1bOF")) {
+          while (replace(input_str, "\x1b[K", dir_keys[7])) { // OF
             c++;
           }
-          while (replace(input_str, "\x1b[@", "\x1b[2\x7e")) {
+          while (replace(input_str, "\x1b[@", dir_keys[4])) { // [2~
             c++;
           }
 
           /*  F1-F5 are "broken".  They all map to this.
-          while (replace(input_str, "\x1b[1", "\x1bOR")) { // F3
+          while (replace(input_str, "\x1b[1", "\x1bOR")) { // F?
             c++;
           }
           */
@@ -935,7 +975,18 @@ int main(int argc, char *argv[]) {
             c++;
           }
 
+          // NON-Doorways, DEL = 0x7f
+          // Terminal DEL = "\x1b[3\7e"
+
+          while (replace(input_str, std::string("\x7f"), dir_keys[5])) {
+            c++;
+          }
+
           // DOORWAYS mode:
+
+          // Future Fix:  Find char(0) pos, get next char, look those 2 chars up
+          // in a map.
+
           if (input_str.find(char(0)) != std::string::npos) {
             // We found a null character, TIAS
             ZF_LOGE("null found.  Start doorways replace mode.");
@@ -943,80 +994,95 @@ int main(int argc, char *argv[]) {
             using namespace std::string_literals;
 
             // F1-F12
-            while (replace(input_str, std::string("\x00\x3b"s), std::string("\x1bOA") )) {
-              c++;
-            }
-            while (replace(input_str, std::string("\x00\x3c"s), "\x1bOB")) {
+            while (replace(input_str, std::string("\x00\x3b"s),
+                           std::string(func_keys[0]))) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x3d"s), "\x1bOC")) {
+            while (replace(input_str, std::string("\x00\x3c"s), func_keys[1])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x3e"s), "\x1bOD")) {
+            while (replace(input_str, std::string("\x00\x3d"s), func_keys[2])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x3f"s), "\x1b[6\x7e")) {
+            while (replace(input_str, std::string("\x00\x3e"s), func_keys[3])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x40"s), "\x1b[5\x7e")) {
+            while (replace(input_str, std::string("\x00\x3f"s), func_keys[4])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x41"s), "\x1bOH")) {
+            while (replace(input_str, std::string("\x00\x40"s), func_keys[5])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x42"s), "\x1bOF")) {
+            while (replace(input_str, std::string("\x00\x41"s), func_keys[6])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x43"s), "\x1b[2\x7e")) {
+            while (replace(input_str, std::string("\x00\x42"s), func_keys[7])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x44"s), "F10")) {
+            while (replace(input_str, std::string("\x00\x43"s), func_keys[8])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x85"s), "F11")) {
+            while (replace(input_str, std::string("\x00\x44"s), func_keys[9])) {
               c++;
             }
-            while (replace(input_str, std::string("\x00\x86"s), "F12")) {
+            /*
+            while (replace(input_str, std::string("\x00\x85"s), func_keys[10]))
+            { // F11 c++;
+            } */
+            while (
+                replace(input_str, std::string("\x00\x86"s), func_keys[11])) {
               c++;
             }
 
             // Delete = 0x7f (not here)
             // Left, Right, Up, Down, Insert, Home, End, PgUp, PgDwn
-            while (replace(input_str, std::string("\x00\x4b"s), "L")) {
+            while (replace(input_str, std::string("\x00\x4b"s),
+                           dir_keys[0])) { // Left
               c++;
             }
-            while (replace(input_str, std::string("\x00\x4d"s), "R")) {
+            while (replace(input_str, std::string("\x00\x4d"s),
+                           dir_keys[1])) { // Right
               c++;
             }
-            while (replace(input_str, std::string("\x00\x48"s), "U")) {
+            while (replace(input_str, std::string("\x00\x48"s),
+                           dir_keys[2])) { // Up
               c++;
             }
-            while (replace(input_str, std::string("\x00\x50"s), "D")) {
+            while (replace(input_str, std::string("\x00\x50"s),
+                           dir_keys[3])) { // Down
               c++;
             }
-            while (replace(input_str, std::string("\x00\x52"s), "INS")) {
+            while (replace(input_str, std::string("\x00\x52"s),
+                           dir_keys[4])) { // Ins
               c++;
             }
             // Delete is 0x7f (no null)
-            while (replace(input_str, std::string("\x00\x47"s), "HOME")) {
+
+            while (replace(input_str, std::string("\x00\x47"s),
+                           dir_keys[6])) { // Home
               c++;
             }
-            while (replace(input_str, std::string("\x00\x4f"s), "END")) {
+            while (replace(input_str, std::string("\x00\x4f"s),
+                           dir_keys[7])) { // End
               c++;
             }
-            while (replace(input_str, std::string("\x00\x49"s), "PU")) {
+            while (replace(input_str, std::string("\x00\x49"s),
+                           dir_keys[8])) { // Page Up
               c++;
             }
-            while (replace(input_str, std::string("\x00\x51"s), "PD")) {
+            while (replace(input_str, std::string("\x00\x51"s),
+                           dir_keys[9])) { // Page Down
               c++;
             }
 
             if (c) {
-              ZF_LOGE_MEM(input_str.data(), input_str.size(), "Input (%d changed):", c);
+              ZF_LOGE_MEM(input_str.data(), input_str.size(),
+                          "Input (%d changed):", c);
             }
           }
 
-          ZF_LOGD_MEM(input_str.data(), input_str.size(), "Write Input String:");
+          ZF_LOGD_MEM(input_str.data(), input_str.size(),
+                      "Write Input String:");
 
           // write(master, &input, r);
           write(master, input_str.data(), input_str.size());
@@ -1027,6 +1093,24 @@ int main(int argc, char *argv[]) {
       }
     }
 
+    // Ok, we get to EXIT, but the ... program is still open/running.  Why?
+
+    // I think this is a netcat problem.  It can't tell that the program has
+    // been closed, so it just flips out.
+
+    // FIXED with pipexec
+
+    /*
+    close(master);
+    close(STDIN_FILENO);
+    close(STDOUT_FILENO);
+
+    int wstate;
+    int rc = waitpid(pid, &wstate, 0);
+
+    ZF_LOGE("waitpid %d (%d) = %d", rc, pid, wstate);
+    */
+
     // Restore terminal
     tcsetattr(1, TCSAFLUSH, &orig1);
     ZF_LOGD("exit");

+ 25 - 1
utils.cpp

@@ -220,8 +220,32 @@ int string_insert(char *buffer, size_t max_length, size_t pos,
 }
 
 void remove_all(std::string &str, char c) {
-str.erase(std::remove(str.begin(), str.end(), c), str.end());
+  str.erase(std::remove(str.begin(), str.end(), c), str.end());
 }
+
+int unicode_len(char choad) {
+  /*
+    return length of unicode sequence, or 0 if not a unicode.
+   */
+
+  // https://en.wikipedia.org/wiki/UTF-8
+  
+  // 110xxxxx
+  if ((choad & 0xe0) == 0xc0) {
+    // CHOAD!
+    return 2;
+  }
+  // 1110xxxx
+  if ((choad & 0xf0) == 0xe0) {
+    return 3;
+  }
+  // 11110xxx
+  if ((choad & 0xf8) == 0xf0) {
+    return 4;
+  }
+  return 0;
+}
+
 /*
 Pascal String Copy.  Copy from pascal string, to C String.
 

+ 2 - 1
utils.h

@@ -39,7 +39,8 @@ std::map<std::string, std::string> read_configfile(std::string filename);
 extern std::map<std::string, std::string> CONFIG;
 bool replace(std::string &str, const std::string &from, const std::string &to);
 void remove_all(std::string &str, char c);
-
+int unicode_len(char c); 
+// int find_unicode(std::string &str, int pos);
 int harry_level(void);
 
 #include <iconv.h>