Now exports to file instead of using RAM. Also should with format filters.
This commit is contained in:
parent
6189258a6f
commit
4c6d31d5e4
@ -3,6 +3,7 @@ import re
|
|||||||
import cgi
|
import cgi
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
|
import linecache
|
||||||
from tools import *
|
from tools import *
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from _thread import start_new_thread
|
from _thread import start_new_thread
|
||||||
@ -23,7 +24,6 @@ class CalibreTools:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLibraries(ip_port):
|
def getLibraries(ip_port):
|
||||||
# TODO: Fix by getting link to each library to list available libraries
|
|
||||||
libraries = []
|
libraries = []
|
||||||
request = requests.get(f'http://{ip_port}/mobile')
|
request = requests.get(f'http://{ip_port}/mobile')
|
||||||
soap = BeautifulSoup(request.text, features='html.parser')
|
soap = BeautifulSoup(request.text, features='html.parser')
|
||||||
@ -47,9 +47,10 @@ class CalibreTools:
|
|||||||
return str(span.text).split('of')[1].strip()
|
return str(span.text).split('of')[1].strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def downloadBooks(ip_port, library_name, max=None, dlthread=1):
|
def downloadBooks(ip_port, library_name, max=None, update=False, format=None, dlthread=1):
|
||||||
downloaded_books = 0
|
downloaded_books = 0
|
||||||
download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max)
|
CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, format)
|
||||||
|
total_book_links = sum(1 for line in open(ip_port + " " + library_name + " links.txt"))
|
||||||
directory_ip = "'"
|
directory_ip = "'"
|
||||||
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
|
if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):
|
||||||
directory_ip = str(ip_port).split(':')[0]
|
directory_ip = str(ip_port).split(':')[0]
|
||||||
@ -60,23 +61,23 @@ class CalibreTools:
|
|||||||
os.mkdir(f'./{directory_name}')
|
os.mkdir(f'./{directory_name}')
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
pass
|
pass
|
||||||
printInfo(f'Found {len(download_links)} books')
|
printInfo(f'Found {total_book_links} books')
|
||||||
printInfo(f'Downloading Books. This can take a while')
|
printInfo(f'Downloading Books. This can take a while')
|
||||||
TerminalInterface.clearTerminal()
|
TerminalInterface.clearTerminal()
|
||||||
TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
|
TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
|
||||||
while downloaded_books != len(download_links):
|
while downloaded_books != total_book_links:
|
||||||
if CalibreTools.active_threads != dlthread:
|
if CalibreTools.active_threads != dlthread:
|
||||||
for x in range(3, 3+dlthread):
|
for x in range(3, 3+dlthread):
|
||||||
if CalibreTools.threads.count(x) == 0:
|
if CalibreTools.threads.count(x) == 0:
|
||||||
start_new_thread(CalibreTools.threadDownloadBook, (x, download_links[downloaded_books], directory_name, ))
|
start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + " " + library_name + " links.txt"), downloaded_books), directory_name, ))
|
||||||
CalibreTools.active_threads += 1
|
CalibreTools.active_threads += 1
|
||||||
downloaded_books += 1
|
downloaded_books += 1
|
||||||
TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(len(download_links)), 2)
|
TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)
|
||||||
else:
|
else:
|
||||||
TerminalInterface.runInterfaceSchedule()
|
TerminalInterface.runInterfaceSchedule()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getBooksLink(ip_port, library_name, multiple, max=None):
|
def getBooksLink(ip_port, library_name, multiple, max, update, format):
|
||||||
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
|
n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
|
||||||
if max is not None:
|
if max is not None:
|
||||||
n_books = max
|
n_books = max
|
||||||
@ -84,16 +85,20 @@ class CalibreTools:
|
|||||||
if difference_next_multiple == multiple:
|
if difference_next_multiple == multiple:
|
||||||
difference_next_multiple = 0
|
difference_next_multiple = 0
|
||||||
major_number_it = (n_books + difference_next_multiple) // 25
|
major_number_it = (n_books + difference_next_multiple) // 25
|
||||||
books_links = []
|
if update:
|
||||||
|
if os.path.exists(ip_port + " " + library_name + " links.txt"):
|
||||||
|
os.remove(ip_port + " " + library_name + " links.txt")
|
||||||
for i in range(major_number_it):
|
for i in range(major_number_it):
|
||||||
start = multiple * i + 1
|
start = multiple * i + 1
|
||||||
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
|
request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
|
||||||
soup = BeautifulSoup(request.text, features='html.parser')
|
soup = BeautifulSoup(request.text, features='html.parser')
|
||||||
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')})
|
links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')})
|
||||||
|
if format != None:
|
||||||
|
links_soup = soup.findAll('a', attrs={'href': re.compile(format)})
|
||||||
for link in links_soup:
|
for link in links_soup:
|
||||||
download_link = f'http://{ip_port}{link.get("href")}'
|
download_link = f'http://{ip_port}{link.get("href")}'
|
||||||
books_links.append(download_link)
|
with open((ip_port + " " + library_name + " links.txt"), "a") as f:
|
||||||
return books_links
|
f.write(download_link + "\n")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def threadDownloadBook(line, link, directory_name):
|
def threadDownloadBook(line, link, directory_name):
|
||||||
|
9
main.py
9
main.py
@ -23,6 +23,7 @@ def calibre_implementation(ip_port):
|
|||||||
libraries = []
|
libraries = []
|
||||||
thr = 1
|
thr = 1
|
||||||
if args.lib is None:
|
if args.lib is None:
|
||||||
|
# TODO: Export to a txt file to free up ram once completed
|
||||||
libraries = CalibreTools.getLibraries(ip_port)
|
libraries = CalibreTools.getLibraries(ip_port)
|
||||||
if len(libraries) == 0:
|
if len(libraries) == 0:
|
||||||
printError("Could not find any libraries")
|
printError("Could not find any libraries")
|
||||||
@ -34,19 +35,25 @@ def calibre_implementation(ip_port):
|
|||||||
thr = args.thread
|
thr = args.thread
|
||||||
for library in libraries:
|
for library in libraries:
|
||||||
printInfo(f'Counting books from library: {library}')
|
printInfo(f'Counting books from library: {library}')
|
||||||
CalibreTools.downloadBooks(ip_port, library, None, thr)
|
CalibreTools.downloadBooks(ip_port, library, None, update, format, thr)
|
||||||
printSuccess("Download Complete!")
|
printSuccess("Download Complete!")
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
# TODO: Add support to specify what format you want to download.
|
||||||
|
# Ex. -f epub,pdf
|
||||||
|
# -f epub
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser("Calibre Dumper")
|
parser = argparse.ArgumentParser("Calibre Dumper")
|
||||||
parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port')
|
parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port')
|
||||||
parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download')
|
parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download')
|
||||||
parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use')
|
parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use')
|
||||||
|
parser.add_argument('-f', '--format', type=str, dest='format', action='store', help='Specify what format youd like to download. Ex. \\.epub|\\.mobi|\\.pdf')
|
||||||
|
parser.add_argument('-u', '--update', type=bool, dest='update', action='store', help='Force Scraper to get updated list of books')
|
||||||
parser.usage = '''
|
parser.usage = '''
|
||||||
CALIBRE DUMPER
|
CALIBRE DUMPER
|
||||||
1. Download all books from all libraries of calibre server using --calibre-host <IP:PORT> or -c <IP:PORT>
|
1. Download all books from all libraries of calibre server using --calibre-host <IP:PORT> or -c <IP:PORT>
|
||||||
2. Specify library using --library <LIBRARY NAME> or -l <LIBRARY NAME>
|
2. Specify library using --library <LIBRARY NAME> or -l <LIBRARY NAME>
|
||||||
|
4. Specify what format youd like to download. Ex -f Ex. \\.epub|\\.mobi|\\.pdf
|
||||||
3. Specify how many download threads to use --threads <NUMBER> or -t <NUMBER>
|
3. Specify how many download threads to use --threads <NUMBER> or -t <NUMBER>
|
||||||
'''
|
'''
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
Loading…
Reference in New Issue
Block a user