mirror of
https://github.com/danielmiessler/SecLists.git
synced 2025-04-30 18:46:39 -04:00
Merge pull request #1005 from molangning/patch-remote-wordlist-updater-rebase
Added a remote wordlist updater (rebase)
This commit is contained in:
commit
bcc0c2f093
9 changed files with 102166 additions and 1475 deletions
92
.bin/checkers/check-if-auto-updated.py
Executable file
92
.bin/checkers/check-if-auto-updated.py
Executable file
|
@ -0,0 +1,92 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os,sys,json
|
||||
|
||||
if not sys.argv[1]:
|
||||
exit(0)
|
||||
|
||||
IS_WRAPPED=False
|
||||
|
||||
if "IS_RUNNING_UNDER_CALLER_SCRIPT" in os.environ:
|
||||
IS_WRAPPED=os.environ['IS_RUNNING_UNDER_CALLER_SCRIPT']=="1"
|
||||
|
||||
def print_normal(msg):
|
||||
|
||||
if IS_WRAPPED:
|
||||
return
|
||||
print(msg)
|
||||
|
||||
def print_err(file,line_number):
|
||||
|
||||
if IS_WRAPPED:
|
||||
print("E,%s,%s"%(file,line_number))
|
||||
|
||||
def print_warn(file,line_number):
|
||||
|
||||
if IS_WRAPPED:
|
||||
print("W,%s,%s"%(file,line_number))
|
||||
|
||||
print_normal("[+] Remote wordlist overwrite check")
|
||||
if IS_WRAPPED:
|
||||
print("Remote wordlist overwrite check")
|
||||
print("Files that the script catches will be overwritten next update.")
|
||||
|
||||
files=sys.argv[1].split(" ")
|
||||
|
||||
for i in files:
|
||||
if not os.path.isfile(i):
|
||||
print_err(i,0)
|
||||
print_normal("[!] %s does not exist!"%(i))
|
||||
exit(2)
|
||||
|
||||
overall_pass_status=True
|
||||
|
||||
sources = json.load(open(".bin/wordlist-updaters/sources.json"))
|
||||
overwritten_paths = {
|
||||
"dirs": [],
|
||||
"files": []
|
||||
}
|
||||
|
||||
for source in sources:
|
||||
found_paths = []
|
||||
|
||||
if "output" in source.keys():
|
||||
found_paths.append(source["output"])
|
||||
|
||||
if "additional_paths" in source.keys():
|
||||
found_paths += source["additional_paths"]
|
||||
|
||||
for path in found_paths:
|
||||
|
||||
if os.path.isdir(path):
|
||||
overwritten_paths["dirs"].append(path)
|
||||
|
||||
elif os.path.isfile(path):
|
||||
overwritten_paths["files"].append(path)
|
||||
|
||||
for i in files:
|
||||
|
||||
for dir_path in overwritten_paths["dirs"]:
|
||||
if i.startswith(dir_path):
|
||||
print_normal(f"[!] Warning: file {i} is in a directory that will get overwritten!")
|
||||
print_err(i, 0)
|
||||
overall_pass_status=False
|
||||
break
|
||||
|
||||
for file_path in overwritten_paths["files"]:
|
||||
if i == file_path:
|
||||
print_normal(f"[!] Warning: file {i} will get overwritten!")
|
||||
print_err(i, 0)
|
||||
overall_pass_status=False
|
||||
break
|
||||
|
||||
if overall_pass_status:
|
||||
print_normal("[+] All files passed overwrite checks")
|
||||
exit(0)
|
||||
|
||||
print_normal("[!] Warning: One or more files failed to pass the overwrite checks")
|
||||
|
||||
if IS_WRAPPED:
|
||||
exit(0)
|
||||
else:
|
||||
exit(2)
|
|
@ -41,6 +41,7 @@ for i in os.listdir(INPUT_ROBOTS):
|
|||
shutil.copytree(path,OUTPUT_ROBOTS,dirs_exist_ok=True)
|
||||
|
||||
print("[+] Copied all the files")
|
||||
|
||||
for i in [OUTPUT_ROBOTS,OUTPUT_TECHNOLOGIES]:
|
||||
for root,_,file_list in os.walk(i):
|
||||
for file in file_list:
|
||||
|
@ -64,6 +65,3 @@ for i in [OUTPUT_ROBOTS,OUTPUT_TECHNOLOGIES]:
|
|||
|
||||
if len(contents)!=len(patch_content):
|
||||
open(path,"wb").write(b"\n".join(patch_content))
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
#!/usr/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
mkdir -p .working_space
|
||||
cd .working_space
|
||||
git clone --depth=1 https://github.com/trickest/wordlists.git
|
||||
cd ../
|
||||
|
||||
./.bin/trickest-patcher.py
|
||||
rm -rf .working_space
|
56
.bin/wordlist-updaters/README.md
Normal file
56
.bin/wordlist-updaters/README.md
Normal file
|
@ -0,0 +1,56 @@
|
|||
# Wordlist updaters
|
||||
|
||||
## Overview
|
||||
The purpose of the scripts are to update wordlists from remote sources defined in sources.json.
|
||||
|
||||
A github action should check every hour to see if the update conditions are met, then updates accordingly
|
||||
|
||||
`status.json` is not meant to be edited in a pr.
|
||||
|
||||
## Format
|
||||
|
||||
Example sources.json
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"name": "Jwt secrets update",
|
||||
"type": "file",
|
||||
"source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list",
|
||||
"output": "Passwords/scraped-JWT-secrets.txt",
|
||||
"post_run_script": "",
|
||||
"frequency": "3h"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
All fields are required unless otherwise stated.
|
||||
|
||||
`name` is the name of the task.
|
||||
|
||||
`type` can be one of the following: `file, git_dir`.
|
||||
|
||||
`source` specify the remote location. If type is `git_dir`, the folder at that location will be cloned using git.
|
||||
|
||||
`frequency` is the update frequency. The script will use the `status.json` file to know when to update. Accepted units of time are `h,H` for hours and `d,D` for days. Frequency can be specified with only days or hours, or with both of them. Hours cannot be before days. (`6h1d`)
|
||||
|
||||
`update_time` specifies the daily frequency in utc 24 hour syntax (0300). Only one update frequency field can be set at a time. (`frequency` or `update_time`)
|
||||
|
||||
`output` is the output file/dir the script will put the output in.
|
||||
|
||||
`post_run_script` is the script to be run after pulling the list successfully. This field is optional.
|
||||
|
||||
`additional_paths` is the additional paths that the workflow script should alert if there is a pull request for the file. This field is optional and won't be used for the updater, but rather the checker.
|
||||
|
||||
- - -
|
||||
|
||||
Example status.json
|
||||
|
||||
```json
|
||||
{
|
||||
"Jwt secrets update": {
|
||||
"last_update" : 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
22
.bin/wordlist-updaters/sources.json
Normal file
22
.bin/wordlist-updaters/sources.json
Normal file
|
@ -0,0 +1,22 @@
|
|||
[
|
||||
{
|
||||
"name": "Jwt secrets update",
|
||||
"type": "file",
|
||||
"source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list",
|
||||
"output": "Passwords/scraped-JWT-secrets.txt",
|
||||
"post_run_script": "",
|
||||
"frequency": "6h"
|
||||
},
|
||||
{
|
||||
"name": "Trickest wordlist update",
|
||||
"type": "git_dir",
|
||||
"source": "https://github.com/trickest/wordlists.git",
|
||||
"output": ".working_space",
|
||||
"post_run_script": ".bin/trickest-patcher.py",
|
||||
"update_time": "1030",
|
||||
"additional_paths": [
|
||||
"Discovery/Web-Content/trickest-robots-disallowed-wordlists/",
|
||||
"Discovery/Web-Content/CMS/trickest-cms-wordlist/"
|
||||
]
|
||||
}
|
||||
]
|
8
.bin/wordlist-updaters/status.json
Normal file
8
.bin/wordlist-updaters/status.json
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"Jwt secrets update": {
|
||||
"last_update": 1712376971
|
||||
},
|
||||
"Trickest wordlist update": {
|
||||
"last_update": 1712310048
|
||||
}
|
||||
}
|
178
.bin/wordlist-updaters/updater.py
Executable file
178
.bin/wordlist-updaters/updater.py
Executable file
|
@ -0,0 +1,178 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import requests
|
||||
import subprocess
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# TODO Summary file
|
||||
# TODO Advanced crontab syntax
|
||||
|
||||
BASE_PATH = ".bin/wordlist-updaters"
|
||||
SOURCE_PATH = os.path.join(BASE_PATH, "sources.json")
|
||||
STATUS_PATH = os.path.join(BASE_PATH, "status.json")
|
||||
FREQUENCY_REGEX = r"^(?:([0-9]+)d|())(?:([0-9]+)h|())(?!.*?d)$"
|
||||
VALID_TYPES = ["file", "git_dir"]
|
||||
TIME_NOW = datetime.now()
|
||||
|
||||
def request_wrapper(url):
|
||||
|
||||
for i in range(1,4):
|
||||
r = requests.get(url)
|
||||
if r.status_code == 200:
|
||||
# print("[+] Got %s successfully!"%(url))
|
||||
break
|
||||
if i == 3:
|
||||
print("[!] Failed to get %s."%(url))
|
||||
exit(2)
|
||||
print("[!] Getting %s failed(%i/3)"%(url,i))
|
||||
|
||||
return r.text
|
||||
|
||||
# Check if the files exists
|
||||
if not os.path.isfile(SOURCE_PATH):
|
||||
print("[!] Sources.json is missing!")
|
||||
exit(2)
|
||||
|
||||
if not os.path.isfile(STATUS_PATH):
|
||||
print("[!] Status.json is missing!")
|
||||
exit(2)
|
||||
|
||||
SOURCES = json.load(open(SOURCE_PATH, "r"))
|
||||
STATUS = json.load(open(STATUS_PATH, "r"))
|
||||
|
||||
to_check = []
|
||||
|
||||
for source in SOURCES:
|
||||
task_name = source["name"]
|
||||
source_keys = source.keys()
|
||||
|
||||
if not task_name in STATUS.keys():
|
||||
print(f"[+] Queuing task {task_name} as task was never checked before")
|
||||
to_check.append(source)
|
||||
continue
|
||||
|
||||
if not "output" in source_keys or not isinstance(source["output"], str):
|
||||
print(f"[!] Skipping task {task_name} as output field is missing/invalid")
|
||||
continue
|
||||
|
||||
if not "type" in source_keys or not isinstance(source["type"], str):
|
||||
print(f"[!] Skipping task {task_name} as type field is missing/invalid")
|
||||
continue
|
||||
|
||||
if not source["type"] in VALID_TYPES:
|
||||
print(f"[!] Skipping task {task_name} as type is invalid")
|
||||
continue
|
||||
|
||||
if source["output"].startswith("/"):
|
||||
print(f"[!] Skipping task {task_name} as output path is not relative.")
|
||||
continue
|
||||
|
||||
if source["type"].startswith("git_") and not source["source"].endswith(".git"):
|
||||
print(f"[!] Skipping task {task_name} as a git task was defined with a non git url.")
|
||||
continue
|
||||
|
||||
if not "last_update" in STATUS[task_name].keys() or not isinstance(STATUS[task_name]["last_update"], int):
|
||||
print(f"[!] Queuing task {task_name} as last_update field is missing/invalid")
|
||||
to_check.append(source)
|
||||
continue
|
||||
|
||||
if not ("frequency" in source_keys) ^ ("update_time" in source_keys):
|
||||
print(f"[!] Skipping task {task_name} as only frequency or update_time can be specified")
|
||||
continue
|
||||
|
||||
if "frequency" in source_keys and isinstance(source["frequency"], str):
|
||||
regex_match = re.search(FREQUENCY_REGEX, source["frequency"])
|
||||
|
||||
if not regex_match:
|
||||
print(f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours")
|
||||
continue
|
||||
|
||||
days, _, hours, _ = regex_match.groups()
|
||||
|
||||
days = bool(days) | 0
|
||||
hours = bool(hours) | 0
|
||||
|
||||
next_update_time = datetime.fromtimestamp(STATUS[task_name]["last_update"]) + timedelta(days=days, hours=hours)
|
||||
time_from_update = TIME_NOW - next_update_time
|
||||
time_to_update = next_update_time - TIME_NOW
|
||||
|
||||
if TIME_NOW < next_update_time:
|
||||
if time_to_update.seconds <= 300:
|
||||
print(f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_to_update.seconds} seconds to update)")
|
||||
to_check.append(source)
|
||||
continue
|
||||
|
||||
print(f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_to_update.seconds} seconds to update)")
|
||||
continue
|
||||
|
||||
print(f"[+] Queuing task {task_name} as it is {time_to_update.seconds} seconds after scheduled update time.")
|
||||
to_check.append(source)
|
||||
|
||||
elif "update_time" in source_keys and isinstance(source["update_time"], str):
|
||||
update_time = source["update_time"]
|
||||
|
||||
if len(update_time) != 4 and update_time.isnumeric():
|
||||
print(f"[!] Skipping task {task_name} as it is in a incorrect format")
|
||||
continue
|
||||
|
||||
hours = int(update_time[:2])
|
||||
minutes = int(update_time[2:])
|
||||
|
||||
if not hours in range(1, 25):
|
||||
print(f"[!] Skipping task {task_name} as hours is not in range 1-24.")
|
||||
continue
|
||||
|
||||
if not minutes in range(1, 61):
|
||||
print(f"[!] Skipping task {task_name} as minutes is not in range 1-60.")
|
||||
continue
|
||||
|
||||
scheduled_update_time = TIME_NOW.replace(hour=hours, minute=minutes)
|
||||
if TIME_NOW <= scheduled_update_time and TIME_NOW + timedelta(hours=1) >= scheduled_update_time:
|
||||
print(f"[+] Queuing task {task_name} as update time is within the next hour")
|
||||
to_check.append(source)
|
||||
continue
|
||||
|
||||
else:
|
||||
print(f"[!] Skipping task {task_name} as update_time field is invalid")
|
||||
continue
|
||||
|
||||
if len(to_check) == 0:
|
||||
print(f"[!] No task were queued. Exiting.")
|
||||
exit()
|
||||
|
||||
print(f"[+] Queued a total of {len(to_check)} tasks to run.")
|
||||
|
||||
for task in to_check:
|
||||
print(f"[+] Starting task {task['name']}")
|
||||
|
||||
if not task["name"] in STATUS.keys():
|
||||
STATUS[task["name"]] = {}
|
||||
|
||||
task_type = task["type"]
|
||||
|
||||
if task_type == "file":
|
||||
content = request_wrapper(task["source"])
|
||||
open(task["output"], "w").write(content)
|
||||
print(f"[+] Saved file to output location")
|
||||
|
||||
STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp())
|
||||
|
||||
elif task_type == "git_dir":
|
||||
if not os.path.exists(task['output']):
|
||||
print(f"[+] Making directory {task['output']}")
|
||||
os.makedirs(task["output"])
|
||||
|
||||
subprocess.run(["git", "clone", "-q", "--depth=1", task["source"]], cwd=task["output"])
|
||||
STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp())
|
||||
|
||||
if task["post_run_script"]:
|
||||
print("[+] Running post run script")
|
||||
subprocess.run(task["post_run_script"])
|
||||
print("[+] Finished running post run script")
|
||||
|
||||
print(f"[+] Finished task {task['name']}")
|
||||
|
||||
json.dump(STATUS, open(STATUS_PATH, "w"), indent=4)
|
|
@ -1,8 +1,8 @@
|
|||
name: Wordlist Updater - Trickest wordlists updater
|
||||
name: Wordlist Updater - Remote wordlists updater
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 0 * * *
|
||||
- cron: 0 * * * *
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
|
@ -13,7 +13,7 @@ jobs:
|
|||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Update lists
|
||||
run: ./.bin/trickest-updater.sh
|
||||
run: ./.bin/wordlist-updaters/updater.py
|
||||
|
||||
- name: Commit files if changed
|
||||
run: |
|
||||
|
@ -23,7 +23,7 @@ jobs:
|
|||
echo "[+] No files were changed"
|
||||
else
|
||||
echo "[+] Files were changed! Pushing changed..."
|
||||
git add -A
|
||||
git add --renormalize -A && git add -A
|
||||
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
|
||||
git config --local user.email "example@github.com"
|
||||
git config --local user.name "GitHub Action"
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue