mirror of
https://github.com/fiso64/slsk-batchdl.git
synced 2024-12-22 22:42:41 +00:00
527 lines
19 KiB
C#
527 lines
19 KiB
C#
using Google.Apis.YouTube.v3;
|
||
using Google.Apis.Services;
|
||
using System.Xml;
|
||
using YoutubeExplode;
|
||
using System.Text.RegularExpressions;
|
||
using YoutubeExplode.Common;
|
||
using System.Diagnostics;
|
||
using HtmlAgilityPack;
|
||
using System.Text;
|
||
using System.Threading.Channels;
|
||
using System.Collections.Concurrent;
|
||
using System;
|
||
|
||
public static class YouTube
|
||
{
|
||
private static YoutubeClient? youtube = new YoutubeClient();
|
||
private static YouTubeService? youtubeService = null;
|
||
public static string apiKey = "";
|
||
|
||
public static async Task<(string, List<Track>)> GetTracksApi(string url, int max = int.MaxValue, int offset = 0)
|
||
{
|
||
StartService();
|
||
|
||
string playlistId = await UrlToId(url);
|
||
|
||
var playlistRequest = youtubeService.Playlists.List("snippet");
|
||
playlistRequest.Id = playlistId;
|
||
var playlistResponse = playlistRequest.Execute();
|
||
|
||
string playlistName = playlistResponse.Items[0].Snippet.Title;
|
||
|
||
var playlistItemsRequest = youtubeService.PlaylistItems.List("snippet,contentDetails");
|
||
playlistItemsRequest.PlaylistId = playlistId;
|
||
playlistItemsRequest.MaxResults = Math.Min(max, 100);
|
||
|
||
var tracksDict = await GetDictYtExplode(url, max, offset);
|
||
var tracks = new List<Track>();
|
||
int count = 0;
|
||
|
||
while (playlistItemsRequest != null && count < max + offset)
|
||
{
|
||
var playlistItemsResponse = playlistItemsRequest.Execute();
|
||
foreach (var playlistItem in playlistItemsResponse.Items)
|
||
{
|
||
if (count >= offset)
|
||
{
|
||
if (tracksDict.ContainsKey(playlistItem.Snippet.ResourceId.VideoId))
|
||
tracks.Add(tracksDict[playlistItem.Snippet.ResourceId.VideoId]);
|
||
else
|
||
{
|
||
var title = "";
|
||
var uploader = "";
|
||
var length = 0;
|
||
var desc = "";
|
||
|
||
var videoRequest = youtubeService.Videos.List("contentDetails,snippet");
|
||
videoRequest.Id = playlistItem.Snippet.ResourceId.VideoId;
|
||
var videoResponse = videoRequest.Execute();
|
||
|
||
title = playlistItem.Snippet.Title;
|
||
if (videoResponse.Items.Count == 0)
|
||
continue;
|
||
uploader = videoResponse.Items[0].Snippet.ChannelTitle;
|
||
length = (int)XmlConvert.ToTimeSpan(videoResponse.Items[0].ContentDetails.Duration).TotalSeconds;
|
||
desc = videoResponse.Items[0].Snippet.Description;
|
||
|
||
Track track = await ParseTrackInfo(title, uploader, playlistItem.Snippet.ResourceId.VideoId, length, desc);
|
||
tracks.Add(track);
|
||
}
|
||
}
|
||
|
||
if (++count >= max + offset)
|
||
break;
|
||
}
|
||
|
||
if (tracksDict.Count >= 200 && !Console.IsOutputRedirected)
|
||
{
|
||
Console.SetCursorPosition(0, Console.CursorTop);
|
||
Console.Write($"Loaded: {tracks.Count}");
|
||
}
|
||
|
||
playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken;
|
||
if (playlistItemsRequest.PageToken == null || count >= max + offset)
|
||
playlistItemsRequest = null;
|
||
else
|
||
playlistItemsRequest.MaxResults = Math.Min(offset + max - count, 100);
|
||
}
|
||
|
||
Console.WriteLine();
|
||
|
||
return (playlistName, tracks);
|
||
}
|
||
|
||
// requestInfoIfNeeded=true is way too slow
|
||
public static async Task<Track> ParseTrackInfo(string title, string uploader, string id, int length, string desc = "", bool requestInfoIfNeeded=false)
|
||
{
|
||
(string title, string uploader, int length, string desc) info = ("", "", -1, "");
|
||
var track = new Track();
|
||
track.URI = id;
|
||
|
||
uploader = Regex.Replace(uploader.Replace("–", "-").Trim(), @"\s+", " ");
|
||
title = Regex.Replace(title.Replace("–", "-").Trim(), @"\s+", " ");
|
||
|
||
var artist = uploader;
|
||
var trackTitle = title;
|
||
|
||
if (artist.EndsWith(" - Topic"))
|
||
{
|
||
artist = artist.Substring(0, artist.Length - 7).Trim();
|
||
trackTitle = title;
|
||
|
||
if (artist == "Various Artists")
|
||
{
|
||
if (desc == "" && requestInfoIfNeeded && id != "")
|
||
{
|
||
info = await GetVideoInfo(id);
|
||
desc = info.desc;
|
||
}
|
||
|
||
if (desc != "")
|
||
{
|
||
var lines = desc.Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);
|
||
var dotLine = lines.FirstOrDefault(line => line.Contains(" · "));
|
||
|
||
if (dotLine != null)
|
||
artist = dotLine.Split(new[] { " · " }, StringSplitOptions.None)[1];
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
track.ArtistMaybeWrong = !title.ContainsWithBoundary(artist, true) && !desc.ContainsWithBoundary(artist, true);
|
||
|
||
var split = title.Split(" - ", StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
|
||
if (split.Length == 2)
|
||
{
|
||
artist = split[0];
|
||
trackTitle = split[1];
|
||
track.ArtistMaybeWrong = false;
|
||
}
|
||
else if (split.Length > 2)
|
||
{
|
||
int index = Array.FindIndex(split, s => s.ContainsWithBoundary(artist, true));
|
||
if (index != -1 && index < split.Length - 1)
|
||
{
|
||
artist = split[index];
|
||
trackTitle = String.Join(" - ", split[(index + 1)..]);
|
||
track.ArtistMaybeWrong = false;
|
||
}
|
||
}
|
||
|
||
if (track.ArtistMaybeWrong && requestInfoIfNeeded && desc == "")
|
||
{
|
||
info = await GetVideoInfo(id);
|
||
track.ArtistMaybeWrong = !info.desc.ContainsWithBoundary(artist, true);
|
||
}
|
||
}
|
||
|
||
if (length <= 0 && id != "" && requestInfoIfNeeded)
|
||
{
|
||
if (info.length > 0)
|
||
length = info.length;
|
||
else
|
||
{
|
||
info = await GetVideoInfo(id);
|
||
length = info.length;
|
||
}
|
||
}
|
||
|
||
track.Length = length;
|
||
track.Artist = artist;
|
||
track.Title = trackTitle;
|
||
|
||
return track;
|
||
}
|
||
|
||
public static async Task<(string title, string uploader, int length, string desc)> GetVideoInfo(string id)
|
||
{
|
||
(string title, string uploader, int length, string desc) o = ("", "", -1, "");
|
||
|
||
try
|
||
{
|
||
var vid = await youtube.Videos.GetAsync(id);
|
||
o.title = vid.Title;
|
||
o.uploader = vid.Author.ChannelTitle;
|
||
o.desc = vid.Description;
|
||
o.length = (int)vid.Duration.Value.TotalSeconds;
|
||
}
|
||
catch
|
||
{
|
||
if (apiKey != "")
|
||
{
|
||
try
|
||
{
|
||
StartService();
|
||
var videoRequest = youtubeService.Videos.List("contentDetails,snippet");
|
||
videoRequest.Id = id;
|
||
var videoResponse = videoRequest.Execute();
|
||
|
||
o.title = videoResponse.Items[0].Snippet.Title;
|
||
o.uploader = videoResponse.Items[0].Snippet.ChannelTitle;
|
||
o.length = (int)XmlConvert.ToTimeSpan(videoResponse.Items[0].ContentDetails.Duration).TotalSeconds;
|
||
o.desc = videoResponse.Items[0].Snippet.Description;
|
||
}
|
||
catch { }
|
||
}
|
||
}
|
||
|
||
return o;
|
||
}
|
||
|
||
public static void StartService()
|
||
{
|
||
if (youtubeService == null)
|
||
{
|
||
if (apiKey == "")
|
||
throw new Exception("No API key");
|
||
|
||
youtubeService = new YouTubeService(new BaseClientService.Initializer()
|
||
{
|
||
ApiKey = apiKey,
|
||
ApplicationName = "slsk-batchdl"
|
||
});
|
||
}
|
||
}
|
||
|
||
public static void StopService()
|
||
{
|
||
youtubeService = null;
|
||
}
|
||
|
||
public static async Task<Dictionary<string, Track>> GetDictYtExplode(string url, int max = int.MaxValue, int offset = 0)
|
||
{
|
||
var youtube = new YoutubeClient();
|
||
var playlist = await youtube.Playlists.GetAsync(url);
|
||
|
||
var tracks = new Dictionary<string, Track>();
|
||
int count = 0;
|
||
|
||
await foreach (var video in youtube.Playlists.GetVideosAsync(playlist.Id))
|
||
{
|
||
if (count >= offset && count < offset + max)
|
||
{
|
||
var title = video.Title;
|
||
var uploader = video.Author.Title;
|
||
var ytId = video.Id.Value;
|
||
var length = (int)video.Duration.Value.TotalSeconds;
|
||
|
||
var track = await ParseTrackInfo(title, uploader, ytId, length);
|
||
|
||
tracks[ytId] = track;
|
||
}
|
||
|
||
if (count++ >= offset + max)
|
||
break;
|
||
}
|
||
|
||
return tracks;
|
||
}
|
||
|
||
public static async Task<(string, List<Track>)> GetTracksYtExplode(string url, int max = int.MaxValue, int offset = 0)
|
||
{
|
||
var youtube = new YoutubeClient();
|
||
var playlist = await youtube.Playlists.GetAsync(url);
|
||
|
||
var playlistTitle = playlist.Title;
|
||
var tracks = new List<Track>();
|
||
int count = 0;
|
||
|
||
await foreach (var video in youtube.Playlists.GetVideosAsync(playlist.Id))
|
||
{
|
||
if (count >= offset && count < offset + max)
|
||
{
|
||
var title = video.Title;
|
||
var uploader = video.Author.Title;
|
||
var ytId = video.Id.Value;
|
||
var length = (int)video.Duration.Value.TotalSeconds;
|
||
|
||
var track = await ParseTrackInfo(title, uploader, ytId, length);
|
||
|
||
tracks.Add(track);
|
||
}
|
||
|
||
if (count++ >= offset + max)
|
||
break;
|
||
}
|
||
|
||
return (playlistTitle, tracks);
|
||
}
|
||
|
||
|
||
public static async Task<string> UrlToId(string url)
|
||
{
|
||
var playlist = await youtube.Playlists.GetAsync(url);
|
||
return playlist.Id.ToString();
|
||
}
|
||
|
||
public class YouTubeArchiveRetriever
|
||
{
|
||
private HttpClient _client;
|
||
|
||
public YouTubeArchiveRetriever()
|
||
{
|
||
_client = new HttpClient();
|
||
_client.Timeout = TimeSpan.FromSeconds(10);
|
||
}
|
||
|
||
public async Task<List<Track>> RetrieveDeleted(string url)
|
||
{
|
||
var deletedVideoUrls = new BlockingCollection<string>();
|
||
var tracks = new ConcurrentBag<Track>();
|
||
|
||
var process = new Process()
|
||
{
|
||
StartInfo = new ProcessStartInfo
|
||
{
|
||
FileName = "yt-dlp",
|
||
Arguments = $"--ignore-no-formats-error --no-warn --match-filter \"!uploader\" --print webpage_url {url}",
|
||
RedirectStandardOutput = true,
|
||
UseShellExecute = false,
|
||
CreateNoWindow = true,
|
||
}
|
||
};
|
||
process.EnableRaisingEvents = true;
|
||
bool ok = false;
|
||
process.OutputDataReceived += (sender, e) =>
|
||
{
|
||
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
|
||
deletedVideoUrls.Add(e.Data);
|
||
};
|
||
process.Exited += (sender, e) =>
|
||
{
|
||
deletedVideoUrls.CompleteAdding();
|
||
};
|
||
|
||
process.Start();
|
||
process.BeginOutputReadLine();
|
||
|
||
List<Task> workers = new List<Task>();
|
||
int workerCount = 4;
|
||
for (int i = 0; i < workerCount; i++)
|
||
{
|
||
workers.Add(Task.Run(async () =>
|
||
{
|
||
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
|
||
{
|
||
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
|
||
if (!string.IsNullOrEmpty(waybackUrl))
|
||
{
|
||
var x = await GetVideoDetails(waybackUrl);
|
||
if (!string.IsNullOrEmpty(x.title))
|
||
{
|
||
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration);
|
||
tracks.Add(track);
|
||
if (!Console.IsOutputRedirected)
|
||
{
|
||
Console.SetCursorPosition(0, Console.CursorTop);
|
||
Console.Write($"Deleted videos processed: {tracks.Count}");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}));
|
||
}
|
||
|
||
await Task.WhenAll(workers);
|
||
process.WaitForExit();
|
||
deletedVideoUrls.CompleteAdding();
|
||
Console.WriteLine();
|
||
return tracks.ToList();
|
||
}
|
||
|
||
private async Task<string> GetOldestArchiveUrl(string url)
|
||
{
|
||
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
|
||
HttpResponseMessage response = null;
|
||
for (int i = 0; i < 3; i++)
|
||
{
|
||
try {
|
||
response = await _client.GetAsync(url2);
|
||
break;
|
||
}
|
||
catch (Exception e) { }
|
||
}
|
||
if (response == null) return null;
|
||
|
||
if (response.IsSuccessStatusCode)
|
||
{
|
||
var content = await response.Content.ReadAsStringAsync();
|
||
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
|
||
if (lines.Any())
|
||
{
|
||
var parts = lines[0].Split(" ");
|
||
var timestamp = parts[0];
|
||
var originalUrl = parts[1];
|
||
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
|
||
return oldestArchive;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
public async Task<(string title, string uploader, int duration)> GetVideoDetails(string url)
|
||
{
|
||
var web = new HtmlWeb();
|
||
var doc = await web.LoadFromWebAsync(url);
|
||
|
||
var titlePatterns = new[]
|
||
{
|
||
"//h1[@id='video_title']",
|
||
"//meta[@name='title']",
|
||
};
|
||
|
||
var usernamePatterns = new[]
|
||
{
|
||
"//div[@id='userInfoDiv']/b/a",
|
||
"//a[contains(@class, 'contributor')]",
|
||
"//a[@id='watch-username']",
|
||
"//a[contains(@class, 'author')]",
|
||
"//div[@class='yt-user-info']/a",
|
||
"//div[@id='upload-info']//yt-formatted-string/a",
|
||
"//span[@itemprop='author']//link[@itemprop='name']",
|
||
"//a[contains(@class, 'yt-user-name')]",
|
||
};
|
||
|
||
string getItem(string[] patterns)
|
||
{
|
||
foreach (var pattern in patterns)
|
||
{
|
||
var node = doc.DocumentNode.SelectSingleNode(pattern);
|
||
var res = "";
|
||
if (node != null)
|
||
{
|
||
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
|
||
res = node.GetAttributeValue("content", "");
|
||
else
|
||
res = node.InnerText;
|
||
if (!string.IsNullOrEmpty(res)) return res;
|
||
}
|
||
}
|
||
return "";
|
||
}
|
||
|
||
int duration = -1;
|
||
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
|
||
if (node != null)
|
||
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
|
||
|
||
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
|
||
}
|
||
}
|
||
|
||
public static async Task<List<(int length, string id, string title)>> YtdlpSearch(Track track)
|
||
{
|
||
Process process = new Process();
|
||
ProcessStartInfo startInfo = new ProcessStartInfo();
|
||
|
||
startInfo.FileName = "yt-dlp";
|
||
string search = track.Artist != "" ? $"{track.Artist} - {track.Title}" : track.Title;
|
||
startInfo.Arguments = $"\"ytsearch3:{search}\" --print \"%(duration>%s)s === %(id)s === %(title)s\"";
|
||
|
||
startInfo.RedirectStandardOutput = true;
|
||
startInfo.RedirectStandardError = true;
|
||
startInfo.UseShellExecute = false;
|
||
process.StartInfo = startInfo;
|
||
process.OutputDataReceived += (sender, e) => { Console.WriteLine(e.Data); };
|
||
process.ErrorDataReceived += (sender, e) => { Console.WriteLine(e.Data); };
|
||
|
||
process.Start();
|
||
|
||
List<(int, string, string)> results = new List<(int, string, string)>();
|
||
string output;
|
||
Regex regex = new Regex(@"^(\d+) === ([\w-]+) === (.+)$");
|
||
while ((output = process.StandardOutput.ReadLine()) != null)
|
||
{
|
||
Match match = regex.Match(output);
|
||
if (match.Success)
|
||
{
|
||
int seconds = int.Parse(match.Groups[1].Value);
|
||
string id = match.Groups[2].Value;
|
||
string title = match.Groups[3].Value;
|
||
results.Add((seconds, id, title));
|
||
}
|
||
}
|
||
|
||
process.WaitForExit();
|
||
return results;
|
||
}
|
||
|
||
public static async Task<string> YtdlpDownload(string id, string savePathNoExt, string ytdlpArgument="")
|
||
{
|
||
Process process = new Process();
|
||
ProcessStartInfo startInfo = new ProcessStartInfo();
|
||
|
||
if (ytdlpArgument == "")
|
||
ytdlpArgument = "\"{id}\" -f bestaudio/best -ci -o \"{savepath-noext}.%(ext)s\" -x";
|
||
|
||
startInfo.FileName = "yt-dlp";
|
||
startInfo.Arguments = ytdlpArgument
|
||
.Replace("{id}", id)
|
||
.Replace("{savepath}", savePathNoExt)
|
||
.Replace("{savepath-noext}", savePathNoExt)
|
||
.Replace("{savedir}", Path.GetDirectoryName(savePathNoExt));
|
||
|
||
startInfo.RedirectStandardOutput = true;
|
||
startInfo.RedirectStandardError = true;
|
||
startInfo.UseShellExecute = false;
|
||
process.StartInfo = startInfo;
|
||
//process.OutputDataReceived += (sender, e) => { Console.WriteLine(e.Data); };
|
||
//process.ErrorDataReceived += (sender, e) => { Console.WriteLine(e.Data); };
|
||
|
||
process.Start();
|
||
process.WaitForExit();
|
||
|
||
if (File.Exists(savePathNoExt + ".opus"))
|
||
return savePathNoExt + ".opus";
|
||
|
||
string parentDirectory = Path.GetDirectoryName(savePathNoExt);
|
||
string[] musicFiles = Directory.GetFiles(parentDirectory, "*", SearchOption.TopDirectoryOnly)
|
||
.Where(file => Utils.IsMusicFile(file))
|
||
.ToArray();
|
||
if (musicFiles.Length > 0)
|
||
return musicFiles[0];
|
||
|
||
return "";
|
||
}
|
||
}
|