Push it to the init

This commit is contained in:
Kenwood 2020-11-27 19:11:21 -05:00
commit f65fe8f93f
2 changed files with 87 additions and 0 deletions

16
channels.dat Normal file
View File

@ -0,0 +1,16 @@
# Store channels in here
# Kenwood (for testing)
https://www.youtube.com/channel/UCoypvzQtMdlfZdJnS48WlNg
# Incidental Information
https://www.youtube.com/channel/UCheAPvWdDad0KzDPRCqnqLg
# Unus Annus
https://www.youtube.com/channel/UCIcgBZ9hEJxHv6r_jDYOMqg
# Saberspark
https://www.youtube.com/c/Saberspark
# Pyrocynical
https://www.youtube.com/c/Pyrocynical

71
youtube_archive.py Normal file
View File

@ -0,0 +1,71 @@
from discord_webhook import DiscordWebhook
from numpy import loadtxt
import os, youtube_dl
class youtube_archive:
def __init__(self):
self.webhook_urls = ['https://discordapp.com/api/webhooks/ah-ah-ah/you_dident_say_the_magic_word']
self.last_percent = None
self.publish("Youtube archiver is online, and ready to archive.")
try:
self.extract_urls() # Extract the urls
except:
self.publish("There was an error extracting URLS")
#try:
self.archive() # Begin the archive
#except:
self.publish("There was an error archiving")
self.publish("Nothing to do.")
def publish(self, content):
if(isinstance(content, str)):
self.webhook = DiscordWebhook(url=self.webhook_urls, content=content)
elif("_percent_str" in content):
percent = content['_percent_str']
print("PERCENTAGE IS " + str(percent) + " LAST PERCENT WAS " + str(self.last_percent))
if (percent != self.last_percent and percent in ["1%", "10%", "20%", "30%", "40%", "50%", "60%", "70%", "80%", "90%", "99%", "100%"]):
eta = content['_eta_str']
filename = content['filename']
message = "Downloading {0} {1}, eta {2}".format(filename, percent, eta)
self.webhook = DiscordWebhook(url=self.webhook_urls, content=message)
else:
self.webhook == None
self.last_percent = percent
else:
self.webhook == None
try:
response = self.webhook.execute()
except:
pass
def extract_urls(self):
self.urls = loadtxt("/mnt/YouTube/channels.dat", dtype=str, comments="#", delimiter="\n")
self.publish("Found {0} urls in database, cataloging and begining backup now.".format(len(self.urls)))
print(self.urls)
def archive(self):
for url in self.urls: # For each of the urls on file
with youtube_dl.YoutubeDL({}) as ydl: # Startup a new youtube-dl session to grab the meta
self.meta = ydl.extract_info(url, download=False) # First extract the metadata
self.publish("Begining download of channel {0}.".format(self.meta['uploader'])) # Publish who we're archiving
self.base_url = "/mnt/YouTube/" + self.meta['uploader'] + "/" # Create the baseurl
print(self.base_url)
print(url)
ydl_opts = { # Setup the options
'format': 'bestvideo/best',
'progress_hooks': [self.publish],
'download_archive': self.base_url + 'history.dat',
'outtmpl': self.base_url + '%(title)s.%(ext)s',
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl: # Startup a new youtube-dl session
video = ydl.download([url])
self.publish("Finished download of channel {0}. Grabbed {1} new videos and archived them.".format(self.meta['uploader'], "UNKNOWN"))
if __name__ == '__main__':
youtube_archive()