Functional Calibre Dumper with Threading

This commit is contained in:
Jacob Stevens 2023-03-19 02:40:18 -05:00
parent ed475b8b0f
commit 7b70a73ac2
3 changed files with 167 additions and 0 deletions

99
calibre_tools.py Normal file
View File

@ -0,0 +1,99 @@
import os
import re
import wget
import time
import requests
from log_tools import *
from bs4 import BeautifulSoup
from _thread import start_new_thread
class CalibreTools:
active_threads = 0
threads = []
@staticmethod
def checkServer(ip_port):
dorks = ['/interface-data/init', '/browse/categories/allbooks']
for dork in dorks:
request = requests.get(f'http://{ip_port}{dork}')
if request.status_code == 200:
return True
return False
@staticmethod
def getLibraries(ip_port):
# TODO: Fix by getting link to each library to list available libraries
libraries = []
request = requests.get(f'http://{ip_port}/mobile')
soap = BeautifulSoup(request.text, features='html.parser')
library_div = soap.find('div', attrs={'id': 'choose_library'})
try:
results = library_div.find_all_next('option')
for result in results:
library = str(result.text).replace(' ', '_')
libraries.append(library)
except AttributeError:
libraries.append("random")
return libraries
@staticmethod
def getTotalBooks(ip_port, library_name):
request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}')
soap = BeautifulSoup(request.text, features='html.parser')
span_tags = soap.findAll('span')
for span in span_tags:
if re.search('Books \\d+ to \\d+ of \\d+', span.text):
return str(span.text).split('of')[1].strip()
@staticmethod
def downloadBooks(ip_port, library_name, max=None, dlthread=1):
downloaded_books = 0
download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max)
directory_ip = "'"
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[0]
if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[-1]
directory_name = f'{directory_ip}-{library_name}'
try:
os.mkdir(f'./{directory_name}')
except FileExistsError:
pass
printInfo(f'Found {len(download_links)} books')
printInfo(f'Downloading Books. This can take a while')
while downloaded_books != len(download_links):
print(CalibreTools.active_threads)
if CalibreTools.active_threads != dlthread:
start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, ))
CalibreTools.active_threads += 1
downloaded_books += 1
else:
time.sleep(1)
@staticmethod
def getBooksLink(ip_port, library_name, multiple, max=None):
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
if max is not None:
n_books = max
difference_next_multiple = multiple - (n_books % multiple)
if difference_next_multiple == multiple:
difference_next_multiple = 0
major_number_it = (n_books + difference_next_multiple) // 25
books_links = []
for i in range(major_number_it):
start = multiple * i + 1
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
soup = BeautifulSoup(request.text, features='html.parser')
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')})
for link in links_soup:
download_link = f'http://{ip_port}{link.get("href")}'
books_links.append(download_link)
return books_links
@staticmethod
def threadDownloadBook(link, directory_name):
try:
wget.download(link, directory_name)
except:
pass
CalibreTools.active_threads -= 1

13
log_tools.py Normal file
View File

@ -0,0 +1,13 @@
from colorama import Style, Fore
def printError(message):
print(f'{Fore.RED}[Error] - {message}{Style.RESET_ALL}')
def printWarning(message):
print(f'{Fore.YELLOW}[Warning] - {message}{Style.RESET_ALL}')
def printInfo(message):
print(f'{Fore.BLUE}[Info] - {message}{Style.RESET_ALL}')
def printSuccess(message):
print(f'{Fore.GREEN}[Success] - {message}{Style.RESET_ALL}')

55
main.py Normal file
View File

@ -0,0 +1,55 @@
import re
import argparse
from log_tools import *
from calibre_tools import CalibreTools
def validateArgs(parser, args):
if args.ip_port is None and args.library is None:
printError("Minimum one parameter required")
exit(0)
if args.ip_port is not None:
if not re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', args.ip_port) and not re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', args.ip_port):
printError("Invalid IP")
exit(0)
def calibre_implementation(ip_port):
printInfo("Checking if server is accessible")
if not CalibreTools.checkServer(ip_port):
printError("Server requires authentication.")
exit(0)
printSuccess("Server is accessible")
printInfo("Getting Libraries")
libraries = []
thr = 1
if args.lib is None:
libraries = CalibreTools.getLibraries(ip_port)
if len(libraries) == 0:
printError("Could not find any libraries")
exit(0)
printSuccess(f'Found {len(libraries)} libraries')
else:
libraries.append(args.lib)
if args.thread not is None:
thr = args.thread
for library in libraries:
printInfo(f'Counting books from library: {library}')
CalibreTools.downloadBooks(ip_port, library, None, thr)
printSuccess("Download Complete!")
exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser("Calibre Dumper")
parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port')
parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download')
parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use')
parser.usage = '''
CALIBRE DUMPER
1. Download all books from all libraries of calibre server using --calibre-host <IP:PORT> or -c <IP:PORT>
2. Specify library using --library <LIBRARY NAME> or -l <LIBRARY NAME>
3. Specify how many download threads to use --threads <NUMBER> or -t <NUMBER>
'''
args = parser.parse_args()
validateArgs(parser, args)
if args.ip_port is not None:
calibre_implementation(args.ip_port)