Calibre-Dumper/calibre_tools.py

import os
import re
import cgi
import time
import requests
import linecache
from tools import *
from bs4 import BeautifulSoup
from _thread import start_new_thread
from urllib.request import urlopen, urlretrieve

class CalibreTools:
    active_threads = 0
    threads = []

    @staticmethod
    def checkServer(ip_port):
        dorks = ['/interface-data/init', '/browse/categories/allbooks']
        for dork in dorks:
            request = requests.get(f'http://{ip_port}{dork}')
            if request.status_code == 200:
                return True
        return False

    @staticmethod
    def getLibraries(ip_port):
        libraries = []
        request = requests.get(f'http://{ip_port}/mobile')
        soap = BeautifulSoup(request.text, features='html.parser')
        library_div = soap.find('div', attrs={'id': 'choose_library'})
        try:
            results = library_div.find_all_next('option')
            for result in results:
                library = str(result.text).replace(' ', '_')
                libraries.append(library)
        except AttributeError:
            libraries.append("random")
        return libraries

    @staticmethod
    def getTotalBooks(ip_port, library_name):
        request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}')
        soap = BeautifulSoup(request.text, features='html.parser')
        span_tags = soap.findAll('span')
        for span in span_tags:
            if re.search('Books \\d+ to \\d+ of \\d+', span.text):
                return str(span.text).split('of')[1].strip()

    @staticmethod
    def downloadBooks(ip_port, library_name, max=None, update=False, file_format=None, dlthread=1):
        downloaded_books = 0
        CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, file_format)
        total_book_links = sum(1 for line in open(ip_port + "  " + library_name + " links.txt"))
        directory_ip = "'"
        if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port): 
            directory_ip = str(ip_port).split(':')[0]
        if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port):
            directory_ip = str(ip_port).split(':')[-1]
        directory_name = f'{directory_ip}-{library_name}'
        try:
            os.mkdir(f'./{directory_name}')
        except FileExistsError:
            pass
        printInfo(f'Found {total_book_links} books')
        printInfo(f'Downloading Books. This can take a while')
        TerminalInterface.clearTerminal()
        TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)
        while downloaded_books != total_book_links:
            if CalibreTools.active_threads != dlthread:
                for x in range(3, 3+dlthread):
                    if CalibreTools.threads.count(x) == 0:
                        start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + "  " + library_name + " links.txt"), downloaded_books), directory_name, ))
                        CalibreTools.active_threads += 1
                        downloaded_books += 1
                TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)
            else:
                TerminalInterface.runInterfaceSchedule()

    @staticmethod
    def getBooksLink(ip_port, library_name, multiple, max, update, file_format):
        n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))
        if max is not None:
            n_books = max
        difference_next_multiple = multiple - (n_books % multiple)
        if difference_next_multiple == multiple:
            difference_next_multiple = 0
        major_number_it = (n_books + difference_next_multiple) // 25
        if update:
            if os.path.exists(ip_port + "  " + library_name + " links.txt"):
                os.remove(ip_port + "  " + library_name + " links.txt")
        for i in range(major_number_it):
            start = multiple * i + 1
            request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')
            soup = BeautifulSoup(request.text, features='html.parser')
            links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub|\\.pdf|\\.mobi|\\.cbr')})
            if file_format is not None:
                links_soup = soup.findAll('a', attrs={'href': re.compile(file_format)})
            for link in links_soup:
                download_link = f'http://{ip_port}{link.get("href")}'
                with open((ip_port + "  " + library_name + " links.txt"), "a") as f:
                    f.write(download_link + "\n")

    @staticmethod
    def threadDownloadBook(line, link, directory_name):
        CalibreTools.threads.append(line)
        filename = link
        try:
            remotefile = urlopen(link)
            remoteinfo = remotefile.info()
            contentdisposition = remoteinfo['Content-Disposition']
            _, params = cgi.parse_header(contentdisposition)
            filename = params["filename"]
            filesize = remoteinfo['Content-Length']
            if not os.path.exists(directory_name + "/" + filename):
                TerminalInterface.appendInterface("Info", f"Thread-{line-3} Downloading {filename}", line)
                urlretrieve(link, directory_name + "/" + filename)
                TerminalInterface.appendInterface("Success", f"Thread-{line-3} Completed", line)
            elif not int(filesize) == os.path.getsize(directory_name + "/" + filename):
                TerminalInterface.appendInterface("Warning", f'Thread-{line-3} Filesize does not match... Downloading file. {filename}', line)
                os.remove(directory_name + "/" + filename)
                urlretrieve(link, directory_name + "/" + filename)
                TerminalInterface.appendInterface("Success", f'Thread-{line-3} Completed {filename}', line)
            else:
                TerminalInterface.appendInterface("Success", f'Thread-{line-3} File Exists... Skipping {filename}', line)
        except:
            TerminalInterface.appendInterface("Error", f'Thread-{line-3} Encountered a issue downloading file. Skipping {filename}', line)
        time.sleep(2)
        CalibreTools.active_threads -= 1
        CalibreTools.threads.remove(line)
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`import os`
			`import re`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`import cgi`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`import time`
			`import requests`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`import linecache`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`from tools import *`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`from bs4 import BeautifulSoup`
			`from _thread import start_new_thread`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`from urllib.request import urlopen, urlretrieve`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00
			`class CalibreTools:`
			`active_threads = 0`
			`threads = []`

			`@staticmethod`
			`def checkServer(ip_port):`
			`dorks = ['/interface-data/init', '/browse/categories/allbooks']`
			`for dork in dorks:`
			`request = requests.get(f'http://{ip_port}{dork}')`
			`if request.status_code == 200:`
			`return True`
			`return False`

			`@staticmethod`
			`def getLibraries(ip_port):`
			`libraries = []`
			`request = requests.get(f'http://{ip_port}/mobile')`
			`soap = BeautifulSoup(request.text, features='html.parser')`
			`library_div = soap.find('div', attrs={'id': 'choose_library'})`
			`try:`
			`results = library_div.find_all_next('option')`
			`for result in results:`
			`library = str(result.text).replace(' ', '_')`
			`libraries.append(library)`
			`except AttributeError:`
			`libraries.append("random")`
			`return libraries`

			`@staticmethod`
			`def getTotalBooks(ip_port, library_name):`
			`request = requests.get(f'http://{ip_port}/mobile?library_id={library_name}')`
			`soap = BeautifulSoup(request.text, features='html.parser')`
			`span_tags = soap.findAll('span')`
			`for span in span_tags:`
			`if re.search('Books \\d+ to \\d+ of \\d+', span.text):`
			`return str(span.text).split('of')[1].strip()`

			`@staticmethod`
Fixed some issues and cleaned up some code 2023-04-12 23:08:38 -05:00			`def downloadBooks(ip_port, library_name, max=None, update=False, file_format=None, dlthread=1):`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`downloaded_books = 0`
Fixed some issues and cleaned up some code 2023-04-12 23:08:38 -05:00			`CalibreTools.getBooksLink(ip_port, library_name, 25, max, update, file_format)`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`total_book_links = sum(1 for line in open(ip_port + " " + library_name + " links.txt"))`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`directory_ip = "'"`
			`if re.match('^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d+$', ip_port):`
			`directory_ip = str(ip_port).split(':')[0]`
			if re.match('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}\|([0-9a-fA-F]{1,4}:){1,7}:\|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}\|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}\|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}\|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}\|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}\|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})\|:((:[0-9a-fA-F]{1,4}){1,7}\|:)\|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}\|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])):\\d+$', ip_port):
			`directory_ip = str(ip_port).split(':')[-1]`
			`directory_name = f'{directory_ip}-{library_name}'`
			`try:`
			`os.mkdir(f'./{directory_name}')`
			`except FileExistsError:`
			`pass`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`printInfo(f'Found {total_book_links} books')`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`printInfo(f'Downloading Books. This can take a while')`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.clearTerminal()`
			`TerminalInterface.appendInterface("Info", "Currently Downloading from " + ip_port, 1)`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`while downloaded_books != total_book_links:`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`if CalibreTools.active_threads != dlthread:`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`for x in range(3, 3+dlthread):`
			`if CalibreTools.threads.count(x) == 0:`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`start_new_thread(CalibreTools.threadDownloadBook, (x, linecache.getline((ip_port + " " + library_name + " links.txt"), downloaded_books), directory_name, ))`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`CalibreTools.active_threads += 1`
			`downloaded_books += 1`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`TerminalInterface.appendInterface("Info", "Downloading Books " + str(downloaded_books) + " out of " + str(total_book_links), 2)`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`else:`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.runInterfaceSchedule()`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00
			`@staticmethod`
Fixed some issues and cleaned up some code 2023-04-12 23:08:38 -05:00			`def getBooksLink(ip_port, library_name, multiple, max, update, file_format):`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`n_books = int(CalibreTools.getTotalBooks(ip_port, library_name))`
			`if max is not None:`
			`n_books = max`
			`difference_next_multiple = multiple - (n_books % multiple)`
			`if difference_next_multiple == multiple:`
			`difference_next_multiple = 0`
			`major_number_it = (n_books + difference_next_multiple) // 25`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`if update:`
			`if os.path.exists(ip_port + " " + library_name + " links.txt"):`
			`os.remove(ip_port + " " + library_name + " links.txt")`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`for i in range(major_number_it):`
			`start = multiple * i + 1`
			`request = requests.get(f'http://{ip_port}/mobile?sort=timestamp&library_id={library_name}&num={n_books}&order=descending&start={start}')`
			`soup = BeautifulSoup(request.text, features='html.parser')`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`links_soup = soup.findAll('a', attrs={'href': re.compile('\\.epub\|\\.pdf\|\\.mobi\|\\.cbr')})`
Fixed some issues and cleaned up some code 2023-04-12 23:08:38 -05:00			`if file_format is not None:`
			`links_soup = soup.findAll('a', attrs={'href': re.compile(file_format)})`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`for link in links_soup:`
			`download_link = f'http://{ip_port}{link.get("href")}'`
Now exports to file instead of using RAM. Also should with format filters. 2023-04-12 16:08:51 -05:00			`with open((ip_port + " " + library_name + " links.txt"), "a") as f:`
			`f.write(download_link + "\n")`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00
			`@staticmethod`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`def threadDownloadBook(line, link, directory_name):`
			`CalibreTools.threads.append(line)`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`filename = link`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`try:`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`remotefile = urlopen(link)`
			`remoteinfo = remotefile.info()`
			`contentdisposition = remoteinfo['Content-Disposition']`
			`_, params = cgi.parse_header(contentdisposition)`
			`filename = params["filename"]`
			`filesize = remoteinfo['Content-Length']`
			`if not os.path.exists(directory_name + "/" + filename):`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.appendInterface("Info", f"Thread-{line-3} Downloading {filename}", line)`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`urlretrieve(link, directory_name + "/" + filename)`
Removed sleep timer and made a successful download green 2023-03-19 06:02:28 -05:00			`TerminalInterface.appendInterface("Success", f"Thread-{line-3} Completed", line)`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`elif not int(filesize) == os.path.getsize(directory_name + "/" + filename):`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.appendInterface("Warning", f'Thread-{line-3} Filesize does not match... Downloading file. {filename}', line)`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`os.remove(directory_name + "/" + filename)`
			`urlretrieve(link, directory_name + "/" + filename)`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.appendInterface("Success", f'Thread-{line-3} Completed {filename}', line)`
Switched from wget pip to urllib. Now check for file to prevent redownloading and such. 2023-03-19 03:53:24 -05:00			`else:`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.appendInterface("Success", f'Thread-{line-3} File Exists... Skipping {filename}', line)`
Functional Calibre Dumper with Threading 2023-03-19 02:40:18 -05:00			`except:`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`TerminalInterface.appendInterface("Error", f'Thread-{line-3} Encountered a issue downloading file. Skipping {filename}', line)`
			`time.sleep(2)`
Fixed typo and implemented file checking to prevent redownloading 2023-03-19 03:05:59 -05:00			`CalibreTools.active_threads -= 1`
Implemented a form of tui to make it easier to understand and watch. 2023-03-19 05:50:48 -05:00			`CalibreTools.threads.remove(line)`