commit f65fe8f93f496816eb17ea9f83867c2782cf5c52 Author: Kenwood Date: Fri Nov 27 19:11:21 2020 -0500 Push it to the init diff --git a/channels.dat b/channels.dat new file mode 100644 index 0000000..a1f28f6 --- /dev/null +++ b/channels.dat @@ -0,0 +1,16 @@ +# Store channels in here + +# Kenwood (for testing) +https://www.youtube.com/channel/UCoypvzQtMdlfZdJnS48WlNg + +# Incidental Information +https://www.youtube.com/channel/UCheAPvWdDad0KzDPRCqnqLg + +# Unus Annus +https://www.youtube.com/channel/UCIcgBZ9hEJxHv6r_jDYOMqg + +# Saberspark +https://www.youtube.com/c/Saberspark + +# Pyrocynical +https://www.youtube.com/c/Pyrocynical \ No newline at end of file diff --git a/youtube_archive.py b/youtube_archive.py new file mode 100644 index 0000000..1435237 --- /dev/null +++ b/youtube_archive.py @@ -0,0 +1,71 @@ +from discord_webhook import DiscordWebhook +from numpy import loadtxt +import os, youtube_dl + +class youtube_archive: + def __init__(self): + self.webhook_urls = ['https://discordapp.com/api/webhooks/ah-ah-ah/you_dident_say_the_magic_word'] + self.last_percent = None + + self.publish("Youtube archiver is online, and ready to archive.") + + try: + self.extract_urls() # Extract the urls + except: + self.publish("There was an error extracting URLS") + + #try: + self.archive() # Begin the archive + #except: + self.publish("There was an error archiving") + self.publish("Nothing to do.") + + def publish(self, content): + if(isinstance(content, str)): + self.webhook = DiscordWebhook(url=self.webhook_urls, content=content) + elif("_percent_str" in content): + percent = content['_percent_str'] + print("PERCENTAGE IS " + str(percent) + " LAST PERCENT WAS " + str(self.last_percent)) + if (percent != self.last_percent and percent in ["1%", "10%", "20%", "30%", "40%", "50%", "60%", "70%", "80%", "90%", "99%", "100%"]): + eta = content['_eta_str'] + filename = content['filename'] + message = "Downloading {0} {1}, eta {2}".format(filename, percent, eta) + self.webhook = DiscordWebhook(url=self.webhook_urls, content=message) + else: + self.webhook == None + self.last_percent = percent + else: + self.webhook == None + + try: + response = self.webhook.execute() + except: + pass + + def extract_urls(self): + self.urls = loadtxt("/mnt/YouTube/channels.dat", dtype=str, comments="#", delimiter="\n") + self.publish("Found {0} urls in database, cataloging and begining backup now.".format(len(self.urls))) + print(self.urls) + + def archive(self): + for url in self.urls: # For each of the urls on file + with youtube_dl.YoutubeDL({}) as ydl: # Startup a new youtube-dl session to grab the meta + self.meta = ydl.extract_info(url, download=False) # First extract the metadata + self.publish("Begining download of channel {0}.".format(self.meta['uploader'])) # Publish who we're archiving + self.base_url = "/mnt/YouTube/" + self.meta['uploader'] + "/" # Create the baseurl + print(self.base_url) + print(url) + + ydl_opts = { # Setup the options + 'format': 'bestvideo/best', + 'progress_hooks': [self.publish], + 'download_archive': self.base_url + 'history.dat', + 'outtmpl': self.base_url + '%(title)s.%(ext)s', + } + with youtube_dl.YoutubeDL(ydl_opts) as ydl: # Startup a new youtube-dl session + video = ydl.download([url]) + self.publish("Finished download of channel {0}. Grabbed {1} new videos and archived them.".format(self.meta['uploader'], "UNKNOWN")) + + +if __name__ == '__main__': + youtube_archive()