From a78faef856f97dbb5dffb0a83672371044487dfb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 22 Feb 2025 06:37:09 +0000 Subject: [PATCH] [Github Action] Automated trickest wordlists update. --- .bin/wordlist-updaters/status.json | 2 +- .../top-10000.txt | 8 +- utf8fixer.py | 73 +++++++++++++++++++ 3 files changed, 78 insertions(+), 5 deletions(-) create mode 100755 utf8fixer.py diff --git a/.bin/wordlist-updaters/status.json b/.bin/wordlist-updaters/status.json index 1f5711a9..8e039054 100644 --- a/.bin/wordlist-updaters/status.json +++ b/.bin/wordlist-updaters/status.json @@ -1,6 +1,6 @@ { "Jwt secrets update": { - "last_update": 1739818986 + "last_update": 1740206221 }, "Trickest wordlist update": { "last_update": 1739786646 diff --git a/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt b/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt index 7b11c498..35f329b0 100644 --- a/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt +++ b/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt @@ -94125,7 +94125,7 @@ index.php/search index.php/search-results index.php/search-seeds index.php/search/ -index.php/search/ +index.php/search/ index.php/search? index.php/search_content_unece index.php/site-search @@ -130292,7 +130292,7 @@ search.py search.ref search.rl search/ -search/ +search/ search/" search/$ search/* @@ -174184,7 +174184,7 @@ zzz_systest ~www_pa/ Änderungsbestätigungs-Mail éditerbonachat -ΪʱĿ¼###### +ΪʱĿ¼###### адреснаякнига английский-русский бумажник @@ -174242,7 +174242,7 @@ zzz_systest プロフィール メールで変更を確認する メールを確認する -hnliche-fahrzeuge/ +hnliche-fahrzeuge/ 个人信息 中文-英语 付款 diff --git a/utf8fixer.py b/utf8fixer.py new file mode 100755 index 00000000..a89f5856 --- /dev/null +++ b/utf8fixer.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +from __future__ import print_function +from codecs import open as copen +from os import listdir, path +from sys import argv + +import unicodedata + +# usage: utf8-fix.py PATH [codec] [normalize] +PATH = argv[1] if len(argv) > 1 else "" +NORMALIZE = False +ENCODING = None +DEFAULT_ENCODING = "iso8859_2" # iso8859_2 a.k.a latin2 + +for arg in argv[2:]: + if arg.lower() == "normalize": + NORMALIZE = True + else: + ENCODING = arg + + + +def convert_file(file_path): + print("[*]", file_path, "fixed!") + foriginal = copen(file_path, "r", "utf8", errors='ignore') + content = foriginal.read() + foriginal.close() + + ccontent = fix_encoding(content, ENCODING, NORMALIZE, True) + fconverted = copen(file_path, "w", "utf8") + fconverted.write(ccontent) + fconverted.close() + +def normalize_str(text): + return ''.join( + c for c in unicodedata.normalize('NFKD', text) + if unicodedata.category(c) != 'Mn' + ) + +def fix_encoding(content, encoding=None, norm=False, verbose=False): + encoding = encoding or DEFAULT_ENCODING + + try: + fixed = content.encode(encoding).decode("utf8") + except: + fixed = content + if verbose: + print("[*] error: can't fix the encoding. mixed encoding?") + + if norm: + return normalize_str(fixed) + else: + return fixed + + +if __name__ == "__main__": + if path.isfile(PATH): + convert_file(PATH) + + elif path.isdir(PATH): + + for ffile in listdir(PATH): + file_path = path.join(PATH, ffile) + + if path.isfile(file_path): + convert_file(file_path) + else: + print( + "[*] error: " + "usage: %s FILE_OR_DIR_PATH [codec] [normalize]" + % + argv[0] + )