Calibre-Dumper/calibre_tools.py

130 lines
6.8 KiB
Python
Raw Normal View History

import os
import re
import cgi
import time
import requests
import linecache
from tools import *
from bs4 import BeautifulSoup
from _thread import start_new_thread
from urllib.request import urlopen, urlretrieve
class CalibreTools:
active_threads = 0
threads = []
@staticmethod
def checkServer(ip_port):
dorks = ['/interface-data/init', '/browse/categories/allbooks']
for dork in dorks:
request = requests.get(f'http://{ip_port}{dork}')
if request.status_code == 200:
return True
return False
@staticmethod
def getLibraries(ip_port):
libraries = []
request = requests.get(f'http://{ip_port}/mobile')
soap = BeautifulSoup(request.text, features='html.parser')
library_div = soap.find('div', attrs={'id': 'choose_library'})
try:
results = library_div.find_all_next('option')
for result in results:
library = str(result.text).replace(' ', '_')
libraries.append(library)
except AttributeError:
libraries.append("random")
return libraries
@staticmethod
def getTotalBooks(ip_port, library_name):
request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}')
soap = BeautifulSoup(request.text, features='html.parser')
span_tags = soap.findAll('span')
for span in span_tags:
if re.search('Books \\d+ to \\d+ of \\d+', span.text):
return str(span.text).split('of')[1].strip()
@staticmethod
def downloadBooks(ip_port, library_name, max=None, update=False, file_format=None, dlthread=1):
downloaded_books = 0
CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, file_format)
total_book_links = sum(1 for line in open(ip_port + " " + library_name + " links.txt"))
directory_ip = "'"
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[0]
if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[-1]
directory_name = f'{directory_ip}-{library_name}'
try:
os.mkdir(f'./{directory_name}')
except FileExistsError:
pass
printInfo(f'Found {total_book_links} books')
printInfo(f'Downloading Books. This can take a while')
TerminalInterface.clearTerminal()
TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
while downloaded_books != total_book_links:
if CalibreTools.active_threads != dlthread:
for x in range(3, 3+dlthread):
if CalibreTools.threads.count(x) == 0:
start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + " " + library_name + " links.txt"), downloaded_books), directory_name, ))
CalibreTools.active_threads += 1
downloaded_books += 1
TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)
else:
TerminalInterface.runInterfaceSchedule()
@staticmethod
def getBooksLink(ip_port, library_name, multiple, max, update, file_format):
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
if max is not None:
n_books = max
difference_next_multiple = multiple - (n_books % multiple)
if difference_next_multiple == multiple:
difference_next_multiple = 0
major_number_it = (n_books + difference_next_multiple) // 25
if update:
if os.path.exists(ip_port + " " + library_name + " links.txt"):
os.remove(ip_port + " " + library_name + " links.txt")
for i in range(major_number_it):
start = multiple * i + 1
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
soup = BeautifulSoup(request.text, features='html.parser')
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')})
if file_format is not None:
links_soup = soup.findAll('a', attrs={'href': re.compile(file_format)})
for link in links_soup:
download_link = f'http://{ip_port}{link.get("href")}'
with open((ip_port + " " + library_name + " links.txt"), "a") as f:
f.write(download_link + "\n")
@staticmethod
def threadDownloadBook(line, link, directory_name):
CalibreTools.threads.append(line)
filename = link
try:
remotefile = urlopen(link)
remoteinfo = remotefile.info()
contentdisposition = remoteinfo['Content-Disposition']
_, params = cgi.parse_header(contentdisposition)
filename = params["filename"]
filesize = remoteinfo['Content-Length']
if not os.path.exists(directory_name + "/" + filename):
TerminalInterface.appendInterface("Info", f"Thread-{line-3} Downloading {filename}", line)
urlretrieve(link, directory_name + "/" + filename)
TerminalInterface.appendInterface("Success", f"Thread-{line-3} Completed", line)
elif not int(filesize) == os.path.getsize(directory_name + "/" + filename):
TerminalInterface.appendInterface("Warning", f'Thread-{line-3} Filesize does not match... Downloading file. {filename}', line)
os.remove(directory_name + "/" + filename)
urlretrieve(link, directory_name + "/" + filename)
TerminalInterface.appendInterface("Success", f'Thread-{line-3} Completed {filename}', line)
else:
TerminalInterface.appendInterface("Success", f'Thread-{line-3} File Exists... Skipping {filename}', line)
except:
TerminalInterface.appendInterface("Error", f'Thread-{line-3} Encountered a issue downloading file. Skipping {filename}', line)
time.sleep(2)
CalibreTools.active_threads -= 1
CalibreTools.threads.remove(line)