From 7b70a73ac2c7f152e505ba2789109123aad685c3 Mon Sep 17 00:00:00 2001 From: Jacob Stevens Date: Sun, 19 Mar 2023 02:40:18 -0500 Subject: [PATCH] Functional Calibre Dumper with Threading --- calibre_tools.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ log_tools.py | 13 +++++++ main.py | 55 +++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 calibre_tools.py create mode 100644 log_tools.py create mode 100644 main.py diff --git a/calibre_tools.py b/calibre_tools.py new file mode 100644 index 0000000..ceb8091 --- /dev/null +++ b/calibre_tools.py @@ -0,0 +1,99 @@ +import os +import re +import wget +import time +import requests +from log_tools import * +from bs4 import BeautifulSoup +from _thread import start_new_thread + +class CalibreTools: + active_threads = 0 + threads = [] + + @staticmethod + def checkServer(ip_port): + dorks = ['/interface-data/init', '/browse/categories/allbooks'] + for dork in dorks: + request = requests.get(f'http://{ip_port}{dork}') + if request.status_code == 200: + return True + return False + + @staticmethod + def getLibraries(ip_port): + # TODO: Fix by getting link to each library to list available libraries + libraries = [] + request = requests.get(f'http://{ip_port}/mobile') + soap = BeautifulSoup(request.text, features='html.parser') + library_div = soap.find('div', attrs={'id': 'choose_library'}) + try: + results = library_div.find_all_next('option') + for result in results: + library = str(result.text).replace(' ', '_') + libraries.append(library) + except AttributeError: + libraries.append("random") + return libraries + + @staticmethod + def getTotalBooks(ip_port, library_name): + request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}') + soap = BeautifulSoup(request.text, features='html.parser') + span_tags = soap.findAll('span') + for span in span_tags: + if re.search('Books \\d+ to \\d+ of \\d+', span.text): + return str(span.text).split('of')[1].strip() + + @staticmethod + def downloadBooks(ip_port, library_name, max=None, dlthread=1): + downloaded_books = 0 + download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max) + directory_ip = "'" + if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port): + directory_ip = str(ip_port).split(':')[0] + if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port): + directory_ip = str(ip_port).split(':')[-1] + directory_name = f'{directory_ip}-{library_name}' + try: + os.mkdir(f'./{directory_name}') + except FileExistsError: + pass + printInfo(f'Found {len(download_links)} books') + printInfo(f'Downloading Books. This can take a while') + while downloaded_books != len(download_links): + print(CalibreTools.active_threads) + if CalibreTools.active_threads != dlthread: + start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, )) + CalibreTools.active_threads += 1 + downloaded_books += 1 + else: + time.sleep(1) + + @staticmethod + def getBooksLink(ip_port, library_name, multiple, max=None): + n_books = int(CalibreTools.getTotalBooks(ip_port, library_name)) + if max is not None: + n_books = max + difference_next_multiple = multiple - (n_books % multiple) + if difference_next_multiple == multiple: + difference_next_multiple = 0 + major_number_it = (n_books + difference_next_multiple) // 25 + books_links = [] + for i in range(major_number_it): + start = multiple * i + 1 + request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}') + soup = BeautifulSoup(request.text, features='html.parser') + links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')}) + for link in links_soup: + download_link = f'http://{ip_port}{link.get("href")}' + books_links.append(download_link) + return books_links + + @staticmethod + def threadDownloadBook(link, directory_name): + try: + wget.download(link, directory_name) + except: + pass + CalibreTools.active_threads -= 1 \ No newline at end of file diff --git a/log_tools.py b/log_tools.py new file mode 100644 index 0000000..013f13b --- /dev/null +++ b/log_tools.py @@ -0,0 +1,13 @@ +from colorama import Style, Fore + +def printError(message): + print(f'{Fore.RED}[Error] - {message}{Style.RESET_ALL}') + +def printWarning(message): + print(f'{Fore.YELLOW}[Warning] - {message}{Style.RESET_ALL}') + +def printInfo(message): + print(f'{Fore.BLUE}[Info] - {message}{Style.RESET_ALL}') + +def printSuccess(message): + print(f'{Fore.GREEN}[Success] - {message}{Style.RESET_ALL}') \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..ad2147a --- /dev/null +++ b/main.py @@ -0,0 +1,55 @@ +import re +import argparse + +from log_tools import * +from calibre_tools import CalibreTools + +def validateArgs(parser, args): + if args.ip_port is None and args.library is None: + printError("Minimum one parameter required") + exit(0) + if args.ip_port is not None: + if not re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', args.ip_port) and not re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', args.ip_port): + printError("Invalid IP") + exit(0) + +def calibre_implementation(ip_port): + printInfo("Checking if server is accessible") + if not CalibreTools.checkServer(ip_port): + printError("Server requires authentication.") + exit(0) + printSuccess("Server is accessible") + printInfo("Getting Libraries") + libraries = [] + thr = 1 + if args.lib is None: + libraries = CalibreTools.getLibraries(ip_port) + if len(libraries) == 0: + printError("Could not find any libraries") + exit(0) + printSuccess(f'Found {len(libraries)} libraries') + else: + libraries.append(args.lib) + if args.thread not is None: + thr = args.thread + for library in libraries: + printInfo(f'Counting books from library: {library}') + CalibreTools.downloadBooks(ip_port, library, None, thr) + printSuccess("Download Complete!") + exit(0) + +if __name__ == '__main__': + parser = argparse.ArgumentParser("Calibre Dumper") + parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port') + parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download') + parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use') + parser.usage = ''' + CALIBRE DUMPER + 1. Download all books from all libraries of calibre server using --calibre-host or -c + 2. Specify library using --library or -l + 3. Specify how many download threads to use --threads or -t + ''' + args = parser.parse_args() + validateArgs(parser, args) + if args.ip_port is not None: + calibre_implementation(args.ip_port) \ No newline at end of file