Now exports to file instead of using RAM. Also should with format filters.

This commit is contained in:
Jacob Stevens 2023-04-12 16:08:51 -05:00
parent 6189258a6f
commit 4c6d31d5e4
2 changed files with 25 additions and 13 deletions

View File

@ -3,6 +3,7 @@ import re
import cgi
import time
import requests
import linecache
from tools import *
from bs4 import BeautifulSoup
from _thread import start_new_thread
@ -23,7 +24,6 @@ class CalibreTools:
@staticmethod
def getLibraries(ip_port):
# TODO: Fix by getting link to each library to list available libraries
libraries = []
request = requests.get(f'http://{ip_port}/mobile')
soap = BeautifulSoup(request.text, features='html.parser')
@ -47,9 +47,10 @@ class CalibreTools:
return str(span.text).split('of')[1].strip()
@staticmethod
def downloadBooks(ip_port, library_name, max=None, dlthread=1):
def downloadBooks(ip_port, library_name, max=None, update=False, format=None, dlthread=1):
downloaded_books = 0
download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max)
CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, format)
total_book_links = sum(1 for line in open(ip_port + " " + library_name + " links.txt"))
directory_ip = "'"
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
directory_ip = str(ip_port).split(':')[0]
@ -60,23 +61,23 @@ class CalibreTools:
os.mkdir(f'./{directory_name}')
except FileExistsError:
pass
printInfo(f'Found {len(download_links)} books')
printInfo(f'Found {total_book_links} books')
printInfo(f'Downloading Books. This can take a while')
TerminalInterface.clearTerminal()
TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
while downloaded_books != len(download_links):
while downloaded_books != total_book_links:
if CalibreTools.active_threads != dlthread:
for x in range(3, 3+dlthread):
if CalibreTools.threads.count(x) == 0:
start_new_thread(CalibreTools.threadDownloadBook, (x, download_links[downloaded_books], directory_name, ))
start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + " " + library_name + " links.txt"), downloaded_books), directory_name, ))
CalibreTools.active_threads += 1
downloaded_books += 1
TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(len(download_links)), 2)
TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)
else:
TerminalInterface.runInterfaceSchedule()
@staticmethod
def getBooksLink(ip_port, library_name, multiple, max=None):
def getBooksLink(ip_port, library_name, multiple, max, update, format):
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
if max is not None:
n_books = max
@ -84,16 +85,20 @@ class CalibreTools:
if difference_next_multiple == multiple:
difference_next_multiple = 0
major_number_it = (n_books + difference_next_multiple) // 25
books_links = []
if update:
if os.path.exists(ip_port + " " + library_name + " links.txt"):
os.remove(ip_port + " " + library_name + " links.txt")
for i in range(major_number_it):
start = multiple * i + 1
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
soup = BeautifulSoup(request.text, features='html.parser')
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')})
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')})
if format != None:
links_soup = soup.findAll('a', attrs={'href': re.compile(format)})
for link in links_soup:
download_link = f'http://{ip_port}{link.get("href")}'
books_links.append(download_link)
return books_links
with open((ip_port + " " + library_name + " links.txt"), "a") as f:
f.write(download_link + "\n")
@staticmethod
def threadDownloadBook(line, link, directory_name):

View File

@ -23,6 +23,7 @@ def calibre_implementation(ip_port):
libraries = []
thr = 1
if args.lib is None:
# TODO: Export to a txt file to free up ram once completed
libraries = CalibreTools.getLibraries(ip_port)
if len(libraries) == 0:
printError("Could not find any libraries")
@ -34,19 +35,25 @@ def calibre_implementation(ip_port):
thr = args.thread
for library in libraries:
printInfo(f'Counting books from library: {library}')
CalibreTools.downloadBooks(ip_port, library, None, thr)
CalibreTools.downloadBooks(ip_port, library, None, update, format, thr)
printSuccess("Download Complete!")
exit(0)
# TODO: Add support to specify what format you want to download.
# Ex. -f epub,pdf
# -f epub
if __name__ == '__main__':
parser = argparse.ArgumentParser("Calibre Dumper")
parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port')
parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download')
parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use')
parser.add_argument('-f', '--format', type=str, dest='format', action='store', help='Specify what format youd like to download. Ex. \\.epub|\\.mobi|\\.pdf')
parser.add_argument('-u', '--update', type=bool, dest='update', action='store', help='Force Scraper to get updated list of books')
parser.usage = '''
CALIBRE DUMPER
1. Download all books from all libraries of calibre server using --calibre-host <IP:PORT> or -c <IP:PORT>
2. Specify library using --library <LIBRARY NAME> or -l <LIBRARY NAME>
4. Specify what format youd like to download. Ex -f Ex. \\.epub|\\.mobi|\\.pdf
3. Specify how many download threads to use --threads <NUMBER> or -t <NUMBER>
'''
args = parser.parse_args()