From 4c6d31d5e4f78ae962807b7985a41e46816a3df9 Mon Sep 17 00:00:00 2001 From: Jacob Stevens Date: Wed, 12 Apr 2023 16:08:51 -0500 Subject: [PATCH] Now exports to file instead of using RAM. Also should with format filters. --- calibre_tools.py | 29 +++++++++++++++++------------ main.py | 9 ++++++++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/calibre_tools.py b/calibre_tools.py index be647be..09038ef 100644 --- a/calibre_tools.py +++ b/calibre_tools.py @@ -3,6 +3,7 @@ import re import cgi import time import requests +import linecache from tools import * from bs4 import BeautifulSoup from _thread import start_new_thread @@ -23,7 +24,6 @@ class CalibreTools: @staticmethod def getLibraries(ip_port): - # TODO: Fix by getting link to each library to list available libraries libraries = [] request = requests.get(f'http://{ip_port}/mobile') soap = BeautifulSoup(request.text, features='html.parser') @@ -47,9 +47,10 @@ class CalibreTools: return str(span.text).split('of')[1].strip() @staticmethod - def downloadBooks(ip_port, library_name, max=None, dlthread=1): + def downloadBooks(ip_port, library_name, max=None, update=False, format=None, dlthread=1): downloaded_books = 0 - download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max) + CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, format) + total_book_links = sum(1 for line in open(ip_port + " " + library_name + " links.txt")) directory_ip = "'" if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port): directory_ip = str(ip_port).split(':')[0] @@ -60,23 +61,23 @@ class CalibreTools: os.mkdir(f'./{directory_name}') except FileExistsError: pass - printInfo(f'Found {len(download_links)} books') + printInfo(f'Found {total_book_links} books') printInfo(f'Downloading Books. This can take a while') TerminalInterface.clearTerminal() TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1) - while downloaded_books != len(download_links): + while downloaded_books != total_book_links: if CalibreTools.active_threads != dlthread: for x in range(3, 3+dlthread): if CalibreTools.threads.count(x) == 0: - start_new_thread(CalibreTools.threadDownloadBook, (x, download_links[downloaded_books], directory_name, )) + start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + " " + library_name + " links.txt"), downloaded_books), directory_name, )) CalibreTools.active_threads += 1 downloaded_books += 1 - TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(len(download_links)), 2) + TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2) else: TerminalInterface.runInterfaceSchedule() @staticmethod - def getBooksLink(ip_port, library_name, multiple, max=None): + def getBooksLink(ip_port, library_name, multiple, max, update, format): n_books = int(CalibreTools.getTotalBooks(ip_port, library_name)) if max is not None: n_books = max @@ -84,16 +85,20 @@ class CalibreTools: if difference_next_multiple == multiple: difference_next_multiple = 0 major_number_it = (n_books + difference_next_multiple) // 25 - books_links = [] + if update: + if os.path.exists(ip_port + " " + library_name + " links.txt"): + os.remove(ip_port + " " + library_name + " links.txt") for i in range(major_number_it): start = multiple * i + 1 request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}') soup = BeautifulSoup(request.text, features='html.parser') - links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')}) + links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')}) + if format != None: + links_soup = soup.findAll('a', attrs={'href': re.compile(format)}) for link in links_soup: download_link = f'http://{ip_port}{link.get("href")}' - books_links.append(download_link) - return books_links + with open((ip_port + " " + library_name + " links.txt"), "a") as f: + f.write(download_link + "\n") @staticmethod def threadDownloadBook(line, link, directory_name): diff --git a/main.py b/main.py index 3857a43..ab1db63 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,7 @@ def calibre_implementation(ip_port): libraries = [] thr = 1 if args.lib is None: + # TODO: Export to a txt file to free up ram once completed libraries = CalibreTools.getLibraries(ip_port) if len(libraries) == 0: printError("Could not find any libraries") @@ -34,19 +35,25 @@ def calibre_implementation(ip_port): thr = args.thread for library in libraries: printInfo(f'Counting books from library: {library}') - CalibreTools.downloadBooks(ip_port, library, None, thr) + CalibreTools.downloadBooks(ip_port, library, None, update, format, thr) printSuccess("Download Complete!") exit(0) +# TODO: Add support to specify what format you want to download. +# Ex. -f epub,pdf +# -f epub if __name__ == '__main__': parser = argparse.ArgumentParser("Calibre Dumper") parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port') parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download') parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use') + parser.add_argument('-f', '--format', type=str, dest='format', action='store', help='Specify what format youd like to download. Ex. \\.epub|\\.mobi|\\.pdf') + parser.add_argument('-u', '--update', type=bool, dest='update', action='store', help='Force Scraper to get updated list of books') parser.usage = ''' CALIBRE DUMPER 1. Download all books from all libraries of calibre server using --calibre-host or -c 2. Specify library using --library or -l + 4. Specify what format youd like to download. Ex -f Ex. \\.epub|\\.mobi|\\.pdf 3. Specify how many download threads to use --threads or -t ''' args = parser.parse_args()