From 14cb9f5003ad3b7a665015c3c1f735cb1a2be353 Mon Sep 17 00:00:00 2001 From: Jacob Stevens Date: Sun, 19 Mar 2023 03:53:24 -0500 Subject: [PATCH] Switched from wget pip to urllib. Now check for file to prevent redownloading and such. --- calibre_tools.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/calibre_tools.py b/calibre_tools.py index 989e688..a790335 100644 --- a/calibre_tools.py +++ b/calibre_tools.py @@ -1,11 +1,12 @@ import os import re -import wget +import cgi import time import requests from log_tools import * from bs4 import BeautifulSoup from _thread import start_new_thread +from urllib.request import urlopen, urlretrieve class CalibreTools: active_threads = 0 @@ -62,7 +63,6 @@ class CalibreTools: printInfo(f'Found {len(download_links)} books') printInfo(f'Downloading Books. This can take a while') while downloaded_books != len(download_links): - print(CalibreTools.active_threads) if CalibreTools.active_threads != dlthread: start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, )) CalibreTools.active_threads += 1 @@ -92,16 +92,25 @@ class CalibreTools: @staticmethod def threadDownloadBook(link, directory_name): + filename = link try: - fragment_removed = link.split("#")[0] - query_string_removed = fragment_removed.split("?")[0] - scheme_removed = query_string_removed.split("://")[-1].split(":")[-1].replace("%20", " ") - if scheme_removed.find("/") != -1: - if not os.path.exists(directory_name + "/" + os.path.basename(scheme_removed)): - printInfo(f'Downloading {link}') - wget.download(link, directory_name) - else: - printInfo(f'File Exists... Skipping {link}') + remotefile = urlopen(link) + remoteinfo = remotefile.info() + contentdisposition = remoteinfo['Content-Disposition'] + _, params = cgi.parse_header(contentdisposition) + filename = params["filename"] + filesize = remoteinfo['Content-Length'] + if not os.path.exists(directory_name + "/" + filename): + printInfo(f'Downloading {filename}') + urlretrieve(link, directory_name + "/" + filename) + printInfo(f'Completed {filename}') + elif not int(filesize) == os.path.getsize(directory_name + "/" + filename): + printInfo(f'Filesize does not match... Downloading file. {filename}') + os.remove(directory_name + "/" + filename) + urlretrieve(link, directory_name + "/" + filename) + printInfo(f'Completed {filename}') + else: + printInfo(f'File Exists... Skipping {filename}') except: - pass + printError(f'Encountered a issue downloading file. Skipping {filename}') CalibreTools.active_threads -= 1