Switched from wget pip to urllib. Now check for file to prevent redownloading and such.

This commit is contained in:
Jacob Stevens 2023-03-19 03:53:24 -05:00
parent 6d011ffbd4
commit 14cb9f5003

View File

@ -1,11 +1,12 @@
import os import os
import re import re
import wget import cgi
import time import time
import requests import requests
from log_tools import * from log_tools import *
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from _thread import start_new_thread from _thread import start_new_thread
from urllib.request import urlopen, urlretrieve
class CalibreTools: class CalibreTools:
active_threads = 0 active_threads = 0
@ -62,7 +63,6 @@ class CalibreTools:
printInfo(f'Found {len(download_links)} books') printInfo(f'Found {len(download_links)} books')
printInfo(f'Downloading Books. This can take a while') printInfo(f'Downloading Books. This can take a while')
while downloaded_books != len(download_links): while downloaded_books != len(download_links):
print(CalibreTools.active_threads)
if CalibreTools.active_threads != dlthread: if CalibreTools.active_threads != dlthread:
start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, )) start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, ))
CalibreTools.active_threads += 1 CalibreTools.active_threads += 1
@ -92,16 +92,25 @@ class CalibreTools:
@staticmethod @staticmethod
def threadDownloadBook(link, directory_name): def threadDownloadBook(link, directory_name):
filename = link
try: try:
fragment_removed = link.split("#")[0] remotefile = urlopen(link)
query_string_removed = fragment_removed.split("?")[0] remoteinfo = remotefile.info()
scheme_removed = query_string_removed.split("://")[-1].split(":")[-1].replace("%20", " ") contentdisposition = remoteinfo['Content-Disposition']
if scheme_removed.find("/") != -1: _, params = cgi.parse_header(contentdisposition)
if not os.path.exists(directory_name + "/" + os.path.basename(scheme_removed)): filename = params["filename"]
printInfo(f'Downloading {link}') filesize = remoteinfo['Content-Length']
wget.download(link, directory_name) if not os.path.exists(directory_name + "/" + filename):
else: printInfo(f'Downloading {filename}')
printInfo(f'File Exists... Skipping {link}') urlretrieve(link, directory_name + "/" + filename)
printInfo(f'Completed {filename}')
elif not int(filesize) == os.path.getsize(directory_name + "/" + filename):
printInfo(f'Filesize does not match... Downloading file. {filename}')
os.remove(directory_name + "/" + filename)
urlretrieve(link, directory_name + "/" + filename)
printInfo(f'Completed {filename}')
else:
printInfo(f'File Exists... Skipping {filename}')
except: except:
pass printError(f'Encountered a issue downloading file. Skipping {filename}')
CalibreTools.active_threads -= 1 CalibreTools.active_threads -= 1