1
0
Fork 0
mirror of https://github.com/fiso64/slsk-batchdl.git synced 2025-01-18 11:20:33 +00:00
This commit is contained in:
fiso64 2024-05-28 14:02:03 +02:00
parent 36d85ec33e
commit 11579385a9
4 changed files with 153 additions and 50 deletions

View file

@ -134,6 +134,8 @@ Options:
--youtube-key <key> Youtube data API key --youtube-key <key> Youtube data API key
--get-deleted Attempt to retrieve titles of deleted videos from wayback --get-deleted Attempt to retrieve titles of deleted videos from wayback
machine. Requires yt-dlp. machine. Requires yt-dlp.
--deleted-only Only retrieve & download deleted music. Combine with --print
tracks-full to display a list of all deleted titles & urls.
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
for durations like 1:04:35.123). Default: s for durations like 1:04:35.123). Default: s

View file

@ -114,6 +114,7 @@ static class Program
static bool useTagsCheckExisting = false; static bool useTagsCheckExisting = false;
static bool removeTracksFromSource = false; static bool removeTracksFromSource = false;
static bool getDeleted = false; static bool getDeleted = false;
static bool deletedOnly = false;
static bool removeSingleCharacterSearchTerms = false; static bool removeSingleCharacterSearchTerms = false;
static int maxTracks = int.MaxValue; static int maxTracks = int.MaxValue;
static int minUsersAggregate = 2; static int minUsersAggregate = 2;
@ -201,6 +202,8 @@ static class Program
"\n --youtube-key <key> Youtube data API key" + "\n --youtube-key <key> Youtube data API key" +
"\n --get-deleted Attempt to retrieve titles of deleted videos from wayback" + "\n --get-deleted Attempt to retrieve titles of deleted videos from wayback" +
"\n machine. Requires yt-dlp." + "\n machine. Requires yt-dlp." +
"\n --deleted-only Only retrieve & download deleted music. Combine with --print" +
"\n tracks-full to display a list of all deleted titles & urls." +
"\n" + "\n" +
"\n --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms" + "\n --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms" +
"\n for durations like 1:04:35.123). Default: s" + "\n for durations like 1:04:35.123). Default: s" +
@ -221,7 +224,7 @@ static class Program
"\n --pref-format <format> Preferred file format(s), comma-separated (default: mp3)" + "\n --pref-format <format> Preferred file format(s), comma-separated (default: mp3)" +
"\n --pref-length-tol <sec> Preferred length tolerance in seconds (default: 2)" + "\n --pref-length-tol <sec> Preferred length tolerance in seconds (default: 2)" +
"\n --pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)" + "\n --pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)" +
"\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)" + "\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2500)" +
"\n --pref-min-samplerate <rate> Preferred minimum sample rate" + "\n --pref-min-samplerate <rate> Preferred minimum sample rate" +
"\n --pref-max-samplerate <rate> Preferred maximum sample rate (default: 48000)" + "\n --pref-max-samplerate <rate> Preferred maximum sample rate (default: 48000)" +
"\n --pref-min-bitdepth <depth> Preferred minimum bit depth" + "\n --pref-min-bitdepth <depth> Preferred minimum bit depth" +
@ -566,6 +569,11 @@ static class Program
case "--get-deleted": case "--get-deleted":
getDeleted = true; getDeleted = true;
break; break;
case "--do":
case "--deleted-only":
getDeleted = true;
deletedOnly = true;
break;
case "--re": case "--re":
case "--regex": case "--regex":
string s = args[++i].Replace("\\;", "<<semicol>>"); string s = args[++i].Replace("\\;", "<<semicol>>");
@ -992,14 +1000,16 @@ static class Program
string name; string name;
List<Track>? deleted = null; List<Track>? deleted = null;
List<Track> tracks; List<Track> tracks = new();
if (getDeleted) if (getDeleted)
{ {
Console.WriteLine("Getting deleted videos.."); Console.WriteLine("Getting deleted videos..");
var archive = new YouTube.YouTubeArchiveRetriever(); var archive = new YouTube.YouTubeArchiveRetriever();
deleted = await archive.RetrieveDeleted(ytUrl); deleted = await archive.RetrieveDeleted(ytUrl, printFailed: deletedOnly);
} }
if (!deletedOnly)
{
if (YouTube.apiKey != "") if (YouTube.apiKey != "")
{ {
Console.WriteLine("Loading YouTube playlist (API)"); Console.WriteLine("Loading YouTube playlist (API)");
@ -1010,6 +1020,11 @@ static class Program
Console.WriteLine("Loading YouTube playlist"); Console.WriteLine("Loading YouTube playlist");
(name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off); (name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off);
} }
}
else
{
name = await YouTube.GetPlaylistTitle(ytUrl);
}
if (deleted != null) if (deleted != null)
{ {
tracks.InsertRange(0, deleted); tracks.InsertRange(0, deleted);
@ -2582,7 +2597,7 @@ static class Program
if (!noRemoveSpecialChars) if (!noRemoveSpecialChars)
{ {
old = str; old = str;
str = str.ReplaceSpecialChars(" ").RemoveConsecutiveWs().Trim(); str = str.ReplaceSpecialChars(" ").Trim().RemoveConsecutiveWs();
if (str == "") str = old; if (str == "") str = old;
} }
foreach (var banned in bannedTerms) foreach (var banned in bannedTerms)
@ -2602,7 +2617,7 @@ static class Program
public static Track InferTrack(string filename, Track defaultTrack) public static Track InferTrack(string filename, Track defaultTrack)
{ {
Track t = new Track(defaultTrack); Track t = new Track(defaultTrack);
filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").RemoveConsecutiveWs().Trim(); filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").Trim().RemoveConsecutiveWs();
var trackNumStart = new Regex(@"^(?:(?:[0-9][-\.])?\d{2,3}[. -]|\b\d\.\s|\b\d\s-\s)(?=.+\S)"); var trackNumStart = new Regex(@"^(?:(?:[0-9][-\.])?\d{2,3}[. -]|\b\d\.\s|\b\d\s-\s)(?=.+\S)");
//var trackNumMiddle = new Regex(@"\s+-\s+(\d{2,3})(?: -|\.|)\s+|\s+-(\d{2,3})-\s+"); //var trackNumMiddle = new Regex(@"\s+-\s+(\d{2,3})(?: -|\.|)\s+|\s+-(\d{2,3})-\s+");
@ -3131,11 +3146,11 @@ static class Program
fname = fname.Replace("_", " ").ReplaceInvalidChars(" ", true, false); fname = fname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
fname = regexRemove != "" ? Regex.Replace(fname, regexRemove, "") : fname; fname = regexRemove != "" ? Regex.Replace(fname, regexRemove, "") : fname;
fname = diacrRemove ? fname.RemoveDiacritics() : fname; fname = diacrRemove ? fname.RemoveDiacritics() : fname;
fname = fname.Trim(); fname = fname.Trim().RemoveConsecutiveWs();
tname = tname.Replace("_", " ").ReplaceInvalidChars(" ", true, false); tname = tname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
tname = regexRemove != "" ? Regex.Replace(tname, regexRemove, "") : tname; tname = regexRemove != "" ? Regex.Replace(tname, regexRemove, "") : tname;
tname = diacrRemove ? tname.RemoveDiacritics() : tname; tname = diacrRemove ? tname.RemoveDiacritics() : tname;
tname = tname.Trim(); tname = tname.Trim().RemoveConsecutiveWs();
if (boundarySkipWs) if (boundarySkipWs)
return fname.ContainsWithBoundaryIgnoreWs(tname, ignoreCase, acceptLeftDigit: true); return fname.ContainsWithBoundaryIgnoreWs(tname, ignoreCase, acceptLeftDigit: true);
@ -4073,6 +4088,8 @@ static class Program
Console.WriteLine($" Album: {tracks[i].Album}"); Console.WriteLine($" Album: {tracks[i].Album}");
if (!string.IsNullOrEmpty(tracks[i].URI)) if (!string.IsNullOrEmpty(tracks[i].URI))
Console.WriteLine($" URL/ID: {tracks[i].URI}"); Console.WriteLine($" URL/ID: {tracks[i].URI}");
if (!string.IsNullOrEmpty(tracks[i].Other))
Console.WriteLine($" Other: {tracks[i].Other}");
if (tracks[i].ArtistMaybeWrong) if (tracks[i].ArtistMaybeWrong)
Console.WriteLine($" Artist maybe wrong: {tracks[i].ArtistMaybeWrong}"); Console.WriteLine($" Artist maybe wrong: {tracks[i].ArtistMaybeWrong}");
if (tracks[i].Downloads != null) { if (tracks[i].Downloads != null) {
@ -4358,6 +4375,7 @@ public struct Track
public bool IsNotAudio = false; public bool IsNotAudio = false;
public string FailureReason = ""; public string FailureReason = "";
public string DownloadPath = ""; public string DownloadPath = "";
public string Other = "";
public State TrackState = State.Initial; public State TrackState = State.Initial;
public SlDictionary? Downloads = null; public SlDictionary? Downloads = null;
@ -4387,6 +4405,7 @@ public struct Track
TrackState = other.TrackState; TrackState = other.TrackState;
FailureReason = other.FailureReason; FailureReason = other.FailureReason;
DownloadPath = other.DownloadPath; DownloadPath = other.DownloadPath;
Other = other.Other;
} }
public override readonly string ToString() public override readonly string ToString()

View file

@ -1,6 +1,5 @@
using System.Net; using System.Net;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Xml.Linq;
public static class Utils public static class Utils
{ {
@ -112,7 +111,7 @@ public static class Utils
public static string RemoveConsecutiveWs(this string input) public static string RemoveConsecutiveWs(this string input)
{ {
return Regex.Replace(input, @"\s+", " "); return string.Join(' ', input.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries));
} }
public static string RemoveSquareBrackets(this string str) public static string RemoveSquareBrackets(this string str)

View file

@ -6,10 +6,7 @@ using System.Text.RegularExpressions;
using YoutubeExplode.Common; using YoutubeExplode.Common;
using System.Diagnostics; using System.Diagnostics;
using HtmlAgilityPack; using HtmlAgilityPack;
using System.Text;
using System.Threading.Channels;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System;
public static class YouTube public static class YouTube
{ {
@ -98,8 +95,8 @@ public static class YouTube
var track = new Track(); var track = new Track();
track.URI = id; track.URI = id;
uploader = Regex.Replace(uploader.Replace("", "-").Trim(), @"\s+", " "); uploader = uploader.Replace("", "-").Trim().RemoveConsecutiveWs();
title = Regex.Replace(title.Replace("", "-").Trim(), @"\s+", " "); title = title.Replace("", "-").Replace(" -- ", " - ").Trim().RemoveConsecutiveWs();
var artist = uploader; var artist = uploader;
var trackTitle = title; var trackTitle = title;
@ -258,6 +255,13 @@ public static class YouTube
return tracks; return tracks;
} }
public static async Task<string> GetPlaylistTitle(string url)
{
var youtube = new YoutubeClient();
var playlist = await youtube.Playlists.GetAsync(url);
return playlist.Title;
}
public static async Task<(string, List<Track>)> GetTracksYtExplode(string url, int max = int.MaxValue, int offset = 0) public static async Task<(string, List<Track>)> GetTracksYtExplode(string url, int max = int.MaxValue, int offset = 0)
{ {
var youtube = new YoutubeClient(); var youtube = new YoutubeClient();
@ -305,12 +309,34 @@ public static class YouTube
_client.Timeout = TimeSpan.FromSeconds(10); _client.Timeout = TimeSpan.FromSeconds(10);
} }
public async Task<List<Track>> RetrieveDeleted(string url) public async Task<List<Track>> RetrieveDeleted(string url, bool printFailed = true)
{ {
var deletedVideoUrls = new BlockingCollection<string>(); var deletedVideoUrls = new BlockingCollection<string>();
var tracks = new ConcurrentBag<Track>();
var process = new Process() int totalCount = 0;
int archivedCount = 0;
var tracks = new ConcurrentBag<Track>();
var noArchive = new ConcurrentBag<string>();
var failRetrieve = new ConcurrentBag<string>();
int workerCount = 4;
var workers = new List<Task>();
var consoleLock = new object();
void updateInfo()
{
lock (consoleLock)
{
if (!Console.IsOutputRedirected)
{
string info = "Deleted metadata total/archived/retrieved: ";
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"{info}{totalCount}/{archivedCount}/{tracks.Count}");
}
}
}
var process = new Process
{ {
StartInfo = new ProcessStartInfo StartInfo = new ProcessStartInfo
{ {
@ -319,14 +345,17 @@ public static class YouTube
RedirectStandardOutput = true, RedirectStandardOutput = true,
UseShellExecute = false, UseShellExecute = false,
CreateNoWindow = true, CreateNoWindow = true,
} },
EnableRaisingEvents = true
}; };
process.EnableRaisingEvents = true;
bool ok = false;
process.OutputDataReceived += (sender, e) => process.OutputDataReceived += (sender, e) =>
{ {
if (!ok) { Console.WriteLine("Got first video"); ok = true; } if (!string.IsNullOrWhiteSpace(e.Data))
{
deletedVideoUrls.Add(e.Data); deletedVideoUrls.Add(e.Data);
Interlocked.Increment(ref totalCount);
updateInfo();
}
}; };
process.Exited += (sender, e) => process.Exited += (sender, e) =>
{ {
@ -336,29 +365,42 @@ public static class YouTube
process.Start(); process.Start();
process.BeginOutputReadLine(); process.BeginOutputReadLine();
List<Task> workers = new List<Task>();
int workerCount = 4;
for (int i = 0; i < workerCount; i++) for (int i = 0; i < workerCount; i++)
{ {
workers.Add(Task.Run(async () => workers.Add(Task.Run(async () =>
{ {
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable()) foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
{ {
var waybackUrl = await GetOldestArchiveUrl(videoUrl); var waybackUrls = await GetOldestArchiveUrls(videoUrl, limit: 2);
if (!string.IsNullOrEmpty(waybackUrl)) if (waybackUrls != null && waybackUrls.Count > 0)
{ {
var x = await GetVideoDetails(waybackUrl); Interlocked.Increment(ref archivedCount);
if (!string.IsNullOrEmpty(x.title))
bool good = false;
foreach (var waybackUrl in waybackUrls)
{ {
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration); var (title, uploader, duration) = await GetVideoDetails(waybackUrl);
if (!string.IsNullOrWhiteSpace(title))
{
var track = await ParseTrackInfo(title, uploader, waybackUrl, duration);
track.Other = $"{{\"t\":\"{title.Trim()}\",\"u\":\"{uploader.Trim()}\"}}";
tracks.Add(track); tracks.Add(track);
if (!Console.IsOutputRedirected) good = true;
break;
}
}
if (!good)
{ {
Console.SetCursorPosition(0, Console.CursorTop); failRetrieve.Add(waybackUrls[0]);
Console.Write($"Deleted videos processed: {tracks.Count}");
} }
} }
else
{
noArchive.Add(videoUrl);
} }
updateInfo();
} }
})); }));
} }
@ -367,12 +409,32 @@ public static class YouTube
process.WaitForExit(); process.WaitForExit();
deletedVideoUrls.CompleteAdding(); deletedVideoUrls.CompleteAdding();
Console.WriteLine(); Console.WriteLine();
if (printFailed)
{
if (archivedCount < totalCount)
{
Console.WriteLine("No archived version found for the following:");
foreach (var x in noArchive)
Console.WriteLine($" {x}");
Console.WriteLine();
}
if (tracks.Count < archivedCount)
{
Console.WriteLine("Failed to parse archived version for the following:");
foreach (var x in failRetrieve)
Console.WriteLine($" {x}");
Console.WriteLine();
}
}
return tracks.ToList(); return tracks.ToList();
} }
private async Task<string> GetOldestArchiveUrl(string url) private async Task<List<string>> GetOldestArchiveUrls(string url, int limit)
{ {
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1"; var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit={limit}";
HttpResponseMessage response = null; HttpResponseMessage response = null;
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
@ -388,13 +450,16 @@ public static class YouTube
{ {
var content = await response.Content.ReadAsStringAsync(); var content = await response.Content.ReadAsStringAsync();
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList(); var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
if (lines.Any()) if (lines.Count > 0)
{ {
var parts = lines[0].Split(" "); for (int i = 0; i < lines.Count; i++)
{
var parts = lines[i].Split(" ");
var timestamp = parts[0]; var timestamp = parts[0];
var originalUrl = parts[1]; var originalUrl = parts[1];
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}"; lines[i] = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
return oldestArchive; }
return lines;
} }
} }
return null; return null;
@ -428,25 +493,43 @@ public static class YouTube
foreach (var pattern in patterns) foreach (var pattern in patterns)
{ {
var node = doc.DocumentNode.SelectSingleNode(pattern); var node = doc.DocumentNode.SelectSingleNode(pattern);
var res = "";
if (node != null) if (node != null)
{ {
var res = "";
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop")) if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
res = node.GetAttributeValue("content", ""); res = node.GetAttributeValue("content", "");
else else
res = node.InnerText; res = node.InnerText;
if (!string.IsNullOrEmpty(res)) return res; if (!string.IsNullOrEmpty(res))
return Utils.UnHtmlString(res);
} }
} }
return ""; return "";
} }
var title = getItem(titlePatterns);
if (string.IsNullOrEmpty(title))
{
var pattern = @"document\.title\s*=\s*""(.+?) - YouTube"";";
var match = Regex.Match(doc.Text, pattern);
if (match.Success)
title = match.Groups[1].Value;
}
var username = getItem(usernamePatterns);
int duration = -1; int duration = -1;
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']"); var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
if (node != null) if (node != null)
{
try
{
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds; duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
}
catch { }
}
return (getItem(titlePatterns), getItem(usernamePatterns), duration); return (title, username, duration);
} }
} }