From a78faef856f97dbb5dffb0a83672371044487dfb Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 22 Feb 2025 06:37:09 +0000
Subject: [PATCH] [Github Action] Automated trickest wordlists update.

---
 .bin/wordlist-updaters/status.json            |  2 +-
 .../top-10000.txt                             |  8 +-
 utf8fixer.py                                  | 73 +++++++++++++++++++
 3 files changed, 78 insertions(+), 5 deletions(-)
 create mode 100755 utf8fixer.py

diff --git a/.bin/wordlist-updaters/status.json b/.bin/wordlist-updaters/status.json
index 1f5711a9..8e039054 100644
--- a/.bin/wordlist-updaters/status.json
+++ b/.bin/wordlist-updaters/status.json
@@ -1,6 +1,6 @@
 {
     "Jwt secrets update": {
-        "last_update": 1739818986
+        "last_update": 1740206221
     },
     "Trickest wordlist update": {
         "last_update": 1739786646
diff --git a/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt b/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt
index 7b11c498..35f329b0 100644
--- a/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt
+++ b/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt
@@ -94125,7 +94125,7 @@ index.php/search
 index.php/search-results
 index.php/search-seeds
 index.php/search/
-index.php/search/
+index.php/search/
 index.php/search?
 index.php/search_content_unece
 index.php/site-search
@@ -130292,7 +130292,7 @@ search.py
 search.ref
 search.rl
 search/
-search/
+search/
 search/"
 search/$
 search/*
@@ -174184,7 +174184,7 @@ zzz_systest
 ~www_pa/
 Ã„nderungsbestÃ¤tigungs-Mail
 Ã©diterbonachat
-Îª¿ÕÊ±¿ª·ÅËùÓÐÄ¿Â¼######
+ÎªÊ±Ä¿Â¼######
 Ð°Ð´Ñ€ÐµÑÐ½Ð°ÑÐºÐ½Ð¸Ð³Ð°
 Ð°Ð½Ð³Ð»Ð¸Ð¹ÑÐºÐ¸Ð¹-Ñ€ÑƒÑÑÐºÐ¸Ð¹
 Ð±ÑƒÐ¼Ð°Ð¶Ð½Ð¸Ðº
@@ -174242,7 +174242,7 @@ zzz_systest
 ãƒ—ãƒ­ãƒ•ã‚£ãƒ¼ãƒ«
 ãƒ¡ãƒ¼ãƒ«ã§å¤‰æ›´ã‚’ç¢ºèªã™ã‚‹
 ãƒ¡ãƒ¼ãƒ«ã‚’ç¢ºèªã™ã‚‹
-ähnliche-fahrzeuge/
+hnliche-fahrzeuge/
 ä¸ªäººä¿¡æ¯
 ä¸­æ–‡-è‹±è¯­
 ä»˜æ¬¾
diff --git a/utf8fixer.py b/utf8fixer.py
new file mode 100755
index 00000000..a89f5856
--- /dev/null
+++ b/utf8fixer.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+from __future__ import print_function
+from codecs import open as copen
+from os import listdir, path
+from sys import argv
+
+import unicodedata
+
+# usage: utf8-fix.py PATH [codec] [normalize]
+PATH = argv[1] if len(argv) > 1 else ""
+NORMALIZE = False
+ENCODING = None
+DEFAULT_ENCODING = "iso8859_2"  # iso8859_2 a.k.a latin2
+
+for arg in argv[2:]:
+    if arg.lower() == "normalize":
+        NORMALIZE = True
+    else:
+        ENCODING = arg
+
+
+
+def convert_file(file_path):
+    print("[*]", file_path, "fixed!")
+    foriginal = copen(file_path, "r", "utf8", errors='ignore')
+    content = foriginal.read()
+    foriginal.close()
+
+    ccontent = fix_encoding(content, ENCODING, NORMALIZE, True)
+    fconverted = copen(file_path, "w", "utf8")
+    fconverted.write(ccontent)
+    fconverted.close()
+
+def normalize_str(text):
+    return ''.join(
+        c for c in unicodedata.normalize('NFKD', text)
+        if unicodedata.category(c) != 'Mn'
+    )
+
+def fix_encoding(content, encoding=None, norm=False, verbose=False):
+    encoding = encoding or DEFAULT_ENCODING
+
+    try:
+        fixed = content.encode(encoding).decode("utf8")
+    except:
+        fixed = content
+        if verbose:
+            print("[*] error: can't fix the encoding. mixed encoding?")
+
+    if norm:
+        return normalize_str(fixed)
+    else:
+        return fixed
+
+
+if __name__ == "__main__":
+    if path.isfile(PATH):
+        convert_file(PATH)
+
+    elif path.isdir(PATH):
+
+        for ffile in listdir(PATH):
+            file_path = path.join(PATH, ffile)
+
+            if path.isfile(file_path):
+                convert_file(file_path)
+    else:
+        print(
+            "[*] error: "
+            "usage: %s FILE_OR_DIR_PATH [codec] [normalize]"
+            %
+            argv[0]
+        )