Now exports to file instead of using RAM. Also should with format filters.

2023-04-12 16:08:51 -05:00 · 2023-04-12 16:08:51 -05:00 · 4c6d31d5e4
commit 4c6d31d5e4
parent 6189258a6f
2 changed files with 25 additions and 13 deletions
--- a/calibre_tools.py
+++ b/calibre_tools.py
@ -3,6 +3,7 @@ import re
 import cgi
 import time
 import requests
 import linecache
 from tools import *
 from bs4 import BeautifulSoup
 from _thread import start_new_thread
@ -23,7 +24,6 @@ class CalibreTools:
    @staticmethod
    def getLibraries(ip_port):
        # TODO: Fix by getting link to each library to list available libraries
        libraries = []
        request = requests.get(f'http://{ip_port}/mobile')
        soap = BeautifulSoup(request.text, features='html.parser')
@ -47,9 +47,10 @@ class CalibreTools:
                return str(span.text).split('of')[1].strip()
    @staticmethod
-    def downloadBooks(ip_port, library_name, max=None, dlthread=1):
+    def downloadBooks(ip_port, library_name, max=None, update=False, format=None, dlthread=1):
        downloaded_books = 0
-        download_links = CalibreTools.getBooksLink(ip_port, library_name, 25, max)
+        CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, format)
        total_book_links = sum(1 for line in open(ip_port + "  " + library_name + " links.txt"))
        directory_ip = "'"
        if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port): 
            directory_ip = str(ip_port).split(':')[0]
@ -60,23 +61,23 @@ class CalibreTools:
            os.mkdir(f'./{directory_name}')
        except FileExistsError:
            pass
-        printInfo(f'Found {len(download_links)} books')
+        printInfo(f'Found {total_book_links} books')
        printInfo(f'Downloading Books. This can take a while')
        TerminalInterface.clearTerminal()
        TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
-        while downloaded_books != len(download_links):
+        while downloaded_books != total_book_links:
            if CalibreTools.active_threads != dlthread:
                for x in range(3, 3+dlthread):
                    if CalibreTools.threads.count(x) == 0:
-                        start_new_thread(CalibreTools.threadDownloadBook, (x, download_links[downloaded_books], directory_name, ))
+                        start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + "  " + library_name + " links.txt"), downloaded_books), directory_name, ))
                        CalibreTools.active_threads += 1
                        downloaded_books += 1
-                TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(len(download_links)), 2)
+                TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)
            else:
                TerminalInterface.runInterfaceSchedule()
    @staticmethod
-    def getBooksLink(ip_port, library_name, multiple, max=None):
+    def getBooksLink(ip_port, library_name, multiple, max, update, format):
        n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
        if max is not None:
            n_books = max
@ -84,16 +85,20 @@ class CalibreTools:
        if difference_next_multiple == multiple:
            difference_next_multiple = 0
        major_number_it = (n_books + difference_next_multiple) // 25
-        books_links = []
+        if update:
            if os.path.exists(ip_port + "  " + library_name + " links.txt"):
                os.remove(ip_port + "  " + library_name + " links.txt")
        for i in range(major_number_it):
            start = multiple * i + 1
            request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
            soup = BeautifulSoup(request.text, features='html.parser')
-            links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi')})
+            links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')})
            if format != None:
                links_soup = soup.findAll('a', attrs={'href': re.compile(format)})
            for link in links_soup:
                download_link = f'http://{ip_port}{link.get("href")}'
-                books_links.append(download_link)
+                with open((ip_port + "  " + library_name + " links.txt"), "a") as f:
-        return books_links
+                    f.write(download_link + "\n")
    @staticmethod
    def threadDownloadBook(line, link, directory_name):
--- a/main.py
+++ b/main.py
@ -23,6 +23,7 @@ def calibre_implementation(ip_port):
    libraries = []
    thr = 1
    if args.lib is None:
        # TODO: Export to a txt file to free up ram once completed
        libraries = CalibreTools.getLibraries(ip_port)
        if len(libraries) == 0:
            printError("Could not find any libraries")
@ -34,19 +35,25 @@ def calibre_implementation(ip_port):
        thr = args.thread
    for library in libraries:
        printInfo(f'Counting books from library: {library}')
-        CalibreTools.downloadBooks(ip_port, library, None, thr)
+        CalibreTools.downloadBooks(ip_port, library, None, update, format, thr)
    printSuccess("Download Complete!")
    exit(0)
 # TODO: Add support to specify what format you want to download. 
 # Ex. -f epub,pdf
 #     -f epub
 if __name__ == '__main__':
    parser = argparse.ArgumentParser("Calibre Dumper")
    parser.add_argument('-c', '--calibre-host', type=str, dest='ip_port', action='store', help='Provide ip and port of calibre server, Format ip:port')
    parser.add_argument('-l', '--library', type=str, dest='lib', action='store', help='Specify which library to download')
    parser.add_argument('-t', '--threads', type=int, dest='thread', action='store', help='Specify how many download threads to use')
    parser.add_argument('-f', '--format', type=str, dest='format', action='store', help='Specify what format youd like to download. Ex. \\.epub|\\.mobi|\\.pdf')
    parser.add_argument('-u', '--update', type=bool, dest='update', action='store', help='Force Scraper to get updated list of books')
    parser.usage = '''
    CALIBRE DUMPER
    1. Download all books from all libraries of calibre server using --calibre-host <IP:PORT> or -c <IP:PORT>
    2. Specify library using --library <LIBRARY NAME> or -l <LIBRARY NAME>
    4. Specify what format youd like to download. Ex -f Ex. \\.epub|\\.mobi|\\.pdf
    3. Specify how many download threads to use --threads <NUMBER> or -t <NUMBER>
    '''
    args = parser.parse_args()