Here's a quick ahk script that calls a python library(Pywebcopy) compiled to 9mb. Download websites, webpages, assets, skip past robots. It handles threading, zips, html, whatever you need.
exe and ahk found here, no need to know python, compiled.
https://github.com/samfisherirl/ahk_pywebcopy/releases
Code: Select all
;website or webpage || url || pathlocal to save || name
definitions := ["website", "http://nytimes.com", A_ScriptDir "\temp", "NameProj"]
definitions := ["webpage", "http://www.nysed.gov/college-transcripts", A_ScriptDir "\differentFolder", "DifferentName"]
Code: Select all
definitions := ["website", "http://nytimes.com", A_ScriptDir "\temp", "NameProj"]
;website or webpage || url || pathlocal to save || name
definitions := ["webpage", "http://www.nysed.gov/college-transcripts", A_ScriptDir "\differentFolder", "DifferentName"]
obj := Web(definitions)
obj.runIt()
class Web
{
__New(def) {
;website or webpage || url || pathlocal to save || name
this.config := A_ScriptDir "\command.txt"
this.command := def[1]
this.path_to_save := def[3]
this.name := def[4]
this.url := def[2]
command_storage := this.command ",," this.url ",," this.path_to_save ",,", this.name ",,"
FileAppend(command_storage, A_ScriptDir "\temp.txt")
FileMove(A_ScriptDir "\temp.txt", this.config, 1)
}
runIt(){
Run("ahk_webcopy.exe", A_ScriptDir)
}
}
python code:
Code: Select all
from pywebcopy import save_webpage, save_website
from pathlib import Path
from os import mkdir
class Save:
def __init__(self, path):
self.path = path
self.config = ""
self.command = ""
self.save_path = ""
self.name = ""
self.url = ""
def get_config(self):
with open(self.path, "r") as f:
self.config = f.read()
line = self.config.split(",,")
self.command = line[0]
self.save_path = line[2]
try:
mkdir(self.save_path)
except Exception as e:
print(e)
self.url = line[1]
self.name = line[3]
if "webpage" in self.command:
self.webpage()
if "website" in self.command:
self.website()
def webpage(self):
save_webpage(
url=f"{self.url}",
project_folder=f"{self.save_path}",
project_name=f"{self.name}",
bypass_robots=True,
debug=True,
open_in_browser=True,
delay=None,
threaded=False,
)
def website(self):
save_website(
url=f"{self.url}",
project_folder=f"{self.save_path}",
project_name=f"{self.name}",
bypass_robots=True,
debug=True,
open_in_browser=True,
delay=None,
threaded=False,
)
if __name__ == "__main__":
p = Path.cwd()
config = p / "command.txt"
s = Save(config)
s.get_config()