Calibre-Dumper/calibre_tools.py

99 lines
4.5 KiB
Python
Raw Normal View History

import os
import re
import wget
import time
import requests
from log_tools import *
from bs4 import BeautifulSoup
from _thread import start_new_thread
class CalibreTools:
active_threads = 0
threads = []
@staticmethod
def checkServer(ip_port):
dorks = ['/interface-data/init', '/browse/categories/allbooks']
for dork in dorks:
request = requests.get(f'http://{ip_port}{dork}')
if request.status_code == 200:
return True
return False
@staticmethod
def getLibraries(ip_port):
# TODO: Fix by getting link to each library to list available libraries
libraries = []
request = requests.get(f'http://{ip_port}/mobile')
soap = BeautifulSoup(request.text, features='html.parser')
library_div = soap.find('div', attrs={'id': 'choose_library'})
try:
results = library_div.find_all_next('option')
for result in results:
library = str(result.text).replace(' ', '_')
libraries.append(library)
except AttributeError:
libraries.append("random")
return libraries
@staticmethod
def getTotalBooks(ip_port, library_name):
request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}')
soap = BeautifulSoup(request.text, features='html.parser')
span_tags = soap.findAll('span')
for span in span_tags:
if re.search('Books \\d+ to \\d+ of \\d+', span.text):
return str(span.text).split('of')[1].strip()
@staticmethod
def downloadBooks(ip_port, library_name, max=None, dlthread=1):
downloaded_books = 0
download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max)
directory_ip = "'"
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[0]
if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[-1]
directory_name = f'{directory_ip}-{library_name}'
try:
os.mkdir(f'./{directory_name}')
except FileExistsError:
pass
printInfo(f'Found {len(download_links)} books')
printInfo(f'Downloading Books. This can take a while')
while downloaded_books != len(download_links):
print(CalibreTools.active_threads)
if CalibreTools.active_threads != dlthread:
start_new_thread(CalibreTools.threadDownloadBook, (download_links[downloaded_books], directory_name, ))
CalibreTools.active_threads += 1
downloaded_books += 1
else:
time.sleep(1)
@staticmethod
def getBooksLink(ip_port, library_name, multiple, max=None):
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
if max is not None:
n_books = max
difference_next_multiple = multiple - (n_books % multiple)
if difference_next_multiple == multiple:
difference_next_multiple = 0
major_number_it = (n_books + difference_next_multiple) // 25
books_links = []
for i in range(major_number_it):
start = multiple * i + 1
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
soup = BeautifulSoup(request.text, features='html.parser')
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')})
for link in links_soup:
download_link = f'http://{ip_port}{link.get("href")}'
books_links.append(download_link)
return books_links
@staticmethod
def threadDownloadBook(link, directory_name):
try:
wget.download(link, directory_name)
except:
pass
CalibreTools.active_threads -= 1