Push it to the init

2020-11-27 19:11:21 -05:00
commit f65fe8f93f
2 changed files with 87 additions and 0 deletions
--- a/channels.dat
+++ b/channels.dat
@@ -0,0 +1,16 @@
+# Store channels in here
+
+# Kenwood (for testing)
+https://www.youtube.com/channel/UCoypvzQtMdlfZdJnS48WlNg
+
+# Incidental Information
+https://www.youtube.com/channel/UCheAPvWdDad0KzDPRCqnqLg
+
+# Unus Annus
+https://www.youtube.com/channel/UCIcgBZ9hEJxHv6r_jDYOMqg
+
+# Saberspark
+https://www.youtube.com/c/Saberspark
+
+# Pyrocynical
+https://www.youtube.com/c/Pyrocynical
--- a/youtube_archive.py
+++ b/youtube_archive.py
@@ -0,0 +1,71 @@
+from discord_webhook import DiscordWebhook
+from numpy import loadtxt
+import os, youtube_dl
+
+class youtube_archive:
+    def __init__(self):
+        self.webhook_urls = ['https://discordapp.com/api/webhooks/ah-ah-ah/you_dident_say_the_magic_word']
+        self.last_percent = None
+
+        self.publish("Youtube archiver is online, and ready to archive.")
+
+        try:
+            self.extract_urls() # Extract the urls
+        except:
+            self.publish("There was an error extracting URLS")
+
+        #try:
+        self.archive() # Begin the archive
+        #except:
+        self.publish("There was an error archiving")
+        self.publish("Nothing to do.")
+
+    def publish(self, content):
+        if(isinstance(content, str)):
+            self.webhook = DiscordWebhook(url=self.webhook_urls, content=content)
+        elif("_percent_str" in content):
+            percent = content['_percent_str']
+            print("PERCENTAGE IS " + str(percent) + " LAST PERCENT WAS " + str(self.last_percent))
+            if (percent != self.last_percent and percent in ["1%", "10%", "20%", "30%", "40%", "50%", "60%", "70%", "80%", "90%", "99%", "100%"]):
+                eta = content['_eta_str']
+                filename = content['filename']
+                message = "Downloading {0} {1}, eta {2}".format(filename, percent, eta)
+                self.webhook = DiscordWebhook(url=self.webhook_urls, content=message)
+            else:
+                self.webhook == None
+            self.last_percent = percent
+        else:
+            self.webhook == None
+
+        try:
+            response = self.webhook.execute()
+        except:
+            pass
+
+    def extract_urls(self):
+        self.urls = loadtxt("/mnt/YouTube/channels.dat", dtype=str, comments="#", delimiter="\n")
+        self.publish("Found {0} urls in database, cataloging and begining backup now.".format(len(self.urls)))
+        print(self.urls)
+
+    def archive(self):
+        for url in self.urls: # For each of the urls on file
+            with youtube_dl.YoutubeDL({}) as ydl: # Startup a new youtube-dl session to grab the meta
+                self.meta = ydl.extract_info(url, download=False) # First extract the metadata
+                self.publish("Begining download of channel {0}.".format(self.meta['uploader'])) # Publish who we're archiving
+                self.base_url = "/mnt/YouTube/" + self.meta['uploader'] + "/" # Create the baseurl
+                print(self.base_url)
+                print(url)
+
+            ydl_opts = { # Setup the options
+                'format': 'bestvideo/best',
+                'progress_hooks': [self.publish],
+                'download_archive': self.base_url + 'history.dat',
+                'outtmpl': self.base_url + '%(title)s.%(ext)s',
+            }
+            with youtube_dl.YoutubeDL(ydl_opts) as ydl: # Startup a new youtube-dl session
+                video = ydl.download([url])
+            self.publish("Finished download of channel {0}. Grabbed {1} new videos and archived them.".format(self.meta['uploader'], "UNKNOWN"))
+
+
+if __name__ == '__main__':
+    youtube_archive()