1
0
Fork 0
mirror of https://github.com/fiso64/slsk-batchdl.git synced 2024-12-22 14:32:40 +00:00
This commit is contained in:
fiso64 2024-05-28 14:02:03 +02:00
parent 36d85ec33e
commit 11579385a9
4 changed files with 153 additions and 50 deletions

View file

@ -134,6 +134,8 @@ Options:
--youtube-key <key> Youtube data API key
--get-deleted Attempt to retrieve titles of deleted videos from wayback
machine. Requires yt-dlp.
--deleted-only Only retrieve & download deleted music. Combine with --print
tracks-full to display a list of all deleted titles & urls.
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
for durations like 1:04:35.123). Default: s

View file

@ -114,6 +114,7 @@ static class Program
static bool useTagsCheckExisting = false;
static bool removeTracksFromSource = false;
static bool getDeleted = false;
static bool deletedOnly = false;
static bool removeSingleCharacterSearchTerms = false;
static int maxTracks = int.MaxValue;
static int minUsersAggregate = 2;
@ -201,6 +202,8 @@ static class Program
"\n --youtube-key <key> Youtube data API key" +
"\n --get-deleted Attempt to retrieve titles of deleted videos from wayback" +
"\n machine. Requires yt-dlp." +
"\n --deleted-only Only retrieve & download deleted music. Combine with --print" +
"\n tracks-full to display a list of all deleted titles & urls." +
"\n" +
"\n --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms" +
"\n for durations like 1:04:35.123). Default: s" +
@ -221,7 +224,7 @@ static class Program
"\n --pref-format <format> Preferred file format(s), comma-separated (default: mp3)" +
"\n --pref-length-tol <sec> Preferred length tolerance in seconds (default: 2)" +
"\n --pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)" +
"\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)" +
"\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2500)" +
"\n --pref-min-samplerate <rate> Preferred minimum sample rate" +
"\n --pref-max-samplerate <rate> Preferred maximum sample rate (default: 48000)" +
"\n --pref-min-bitdepth <depth> Preferred minimum bit depth" +
@ -566,6 +569,11 @@ static class Program
case "--get-deleted":
getDeleted = true;
break;
case "--do":
case "--deleted-only":
getDeleted = true;
deletedOnly = true;
break;
case "--re":
case "--regex":
string s = args[++i].Replace("\\;", "<<semicol>>");
@ -992,23 +1000,30 @@ static class Program
string name;
List<Track>? deleted = null;
List<Track> tracks;
List<Track> tracks = new();
if (getDeleted)
{
Console.WriteLine("Getting deleted videos..");
var archive = new YouTube.YouTubeArchiveRetriever();
deleted = await archive.RetrieveDeleted(ytUrl);
deleted = await archive.RetrieveDeleted(ytUrl, printFailed: deletedOnly);
}
if (YouTube.apiKey != "")
if (!deletedOnly)
{
Console.WriteLine("Loading YouTube playlist (API)");
(name, tracks) = await YouTube.GetTracksApi(ytUrl, max, off);
if (YouTube.apiKey != "")
{
Console.WriteLine("Loading YouTube playlist (API)");
(name, tracks) = await YouTube.GetTracksApi(ytUrl, max, off);
}
else
{
Console.WriteLine("Loading YouTube playlist");
(name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off);
}
}
else
{
Console.WriteLine("Loading YouTube playlist");
(name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off);
name = await YouTube.GetPlaylistTitle(ytUrl);
}
if (deleted != null)
{
@ -2582,7 +2597,7 @@ static class Program
if (!noRemoveSpecialChars)
{
old = str;
str = str.ReplaceSpecialChars(" ").RemoveConsecutiveWs().Trim();
str = str.ReplaceSpecialChars(" ").Trim().RemoveConsecutiveWs();
if (str == "") str = old;
}
foreach (var banned in bannedTerms)
@ -2602,7 +2617,7 @@ static class Program
public static Track InferTrack(string filename, Track defaultTrack)
{
Track t = new Track(defaultTrack);
filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").RemoveConsecutiveWs().Trim();
filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").Trim().RemoveConsecutiveWs();
var trackNumStart = new Regex(@"^(?:(?:[0-9][-\.])?\d{2,3}[. -]|\b\d\.\s|\b\d\s-\s)(?=.+\S)");
//var trackNumMiddle = new Regex(@"\s+-\s+(\d{2,3})(?: -|\.|)\s+|\s+-(\d{2,3})-\s+");
@ -3131,11 +3146,11 @@ static class Program
fname = fname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
fname = regexRemove != "" ? Regex.Replace(fname, regexRemove, "") : fname;
fname = diacrRemove ? fname.RemoveDiacritics() : fname;
fname = fname.Trim();
fname = fname.Trim().RemoveConsecutiveWs();
tname = tname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
tname = regexRemove != "" ? Regex.Replace(tname, regexRemove, "") : tname;
tname = diacrRemove ? tname.RemoveDiacritics() : tname;
tname = tname.Trim();
tname = tname.Trim().RemoveConsecutiveWs();
if (boundarySkipWs)
return fname.ContainsWithBoundaryIgnoreWs(tname, ignoreCase, acceptLeftDigit: true);
@ -4073,6 +4088,8 @@ static class Program
Console.WriteLine($" Album: {tracks[i].Album}");
if (!string.IsNullOrEmpty(tracks[i].URI))
Console.WriteLine($" URL/ID: {tracks[i].URI}");
if (!string.IsNullOrEmpty(tracks[i].Other))
Console.WriteLine($" Other: {tracks[i].Other}");
if (tracks[i].ArtistMaybeWrong)
Console.WriteLine($" Artist maybe wrong: {tracks[i].ArtistMaybeWrong}");
if (tracks[i].Downloads != null) {
@ -4358,6 +4375,7 @@ public struct Track
public bool IsNotAudio = false;
public string FailureReason = "";
public string DownloadPath = "";
public string Other = "";
public State TrackState = State.Initial;
public SlDictionary? Downloads = null;
@ -4387,6 +4405,7 @@ public struct Track
TrackState = other.TrackState;
FailureReason = other.FailureReason;
DownloadPath = other.DownloadPath;
Other = other.Other;
}
public override readonly string ToString()

View file

@ -1,6 +1,5 @@
using System.Net;
using System.Text.RegularExpressions;
using System.Xml.Linq;
public static class Utils
{
@ -112,7 +111,7 @@ public static class Utils
public static string RemoveConsecutiveWs(this string input)
{
return Regex.Replace(input, @"\s+", " ");
return string.Join(' ', input.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries));
}
public static string RemoveSquareBrackets(this string str)

View file

@ -6,10 +6,7 @@ using System.Text.RegularExpressions;
using YoutubeExplode.Common;
using System.Diagnostics;
using HtmlAgilityPack;
using System.Text;
using System.Threading.Channels;
using System.Collections.Concurrent;
using System;
public static class YouTube
{
@ -98,8 +95,8 @@ public static class YouTube
var track = new Track();
track.URI = id;
uploader = Regex.Replace(uploader.Replace("", "-").Trim(), @"\s+", " ");
title = Regex.Replace(title.Replace("", "-").Trim(), @"\s+", " ");
uploader = uploader.Replace("", "-").Trim().RemoveConsecutiveWs();
title = title.Replace("", "-").Replace(" -- ", " - ").Trim().RemoveConsecutiveWs();
var artist = uploader;
var trackTitle = title;
@ -258,6 +255,13 @@ public static class YouTube
return tracks;
}
public static async Task<string> GetPlaylistTitle(string url)
{
var youtube = new YoutubeClient();
var playlist = await youtube.Playlists.GetAsync(url);
return playlist.Title;
}
public static async Task<(string, List<Track>)> GetTracksYtExplode(string url, int max = int.MaxValue, int offset = 0)
{
var youtube = new YoutubeClient();
@ -305,12 +309,34 @@ public static class YouTube
_client.Timeout = TimeSpan.FromSeconds(10);
}
public async Task<List<Track>> RetrieveDeleted(string url)
public async Task<List<Track>> RetrieveDeleted(string url, bool printFailed = true)
{
var deletedVideoUrls = new BlockingCollection<string>();
var tracks = new ConcurrentBag<Track>();
var process = new Process()
int totalCount = 0;
int archivedCount = 0;
var tracks = new ConcurrentBag<Track>();
var noArchive = new ConcurrentBag<string>();
var failRetrieve = new ConcurrentBag<string>();
int workerCount = 4;
var workers = new List<Task>();
var consoleLock = new object();
void updateInfo()
{
lock (consoleLock)
{
if (!Console.IsOutputRedirected)
{
string info = "Deleted metadata total/archived/retrieved: ";
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"{info}{totalCount}/{archivedCount}/{tracks.Count}");
}
}
}
var process = new Process
{
StartInfo = new ProcessStartInfo
{
@ -319,14 +345,17 @@ public static class YouTube
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true,
}
},
EnableRaisingEvents = true
};
process.EnableRaisingEvents = true;
bool ok = false;
process.OutputDataReceived += (sender, e) =>
{
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
deletedVideoUrls.Add(e.Data);
if (!string.IsNullOrWhiteSpace(e.Data))
{
deletedVideoUrls.Add(e.Data);
Interlocked.Increment(ref totalCount);
updateInfo();
}
};
process.Exited += (sender, e) =>
{
@ -336,29 +365,42 @@ public static class YouTube
process.Start();
process.BeginOutputReadLine();
List<Task> workers = new List<Task>();
int workerCount = 4;
for (int i = 0; i < workerCount; i++)
{
workers.Add(Task.Run(async () =>
{
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
{
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
if (!string.IsNullOrEmpty(waybackUrl))
var waybackUrls = await GetOldestArchiveUrls(videoUrl, limit: 2);
if (waybackUrls != null && waybackUrls.Count > 0)
{
var x = await GetVideoDetails(waybackUrl);
if (!string.IsNullOrEmpty(x.title))
Interlocked.Increment(ref archivedCount);
bool good = false;
foreach (var waybackUrl in waybackUrls)
{
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration);
tracks.Add(track);
if (!Console.IsOutputRedirected)
var (title, uploader, duration) = await GetVideoDetails(waybackUrl);
if (!string.IsNullOrWhiteSpace(title))
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"Deleted videos processed: {tracks.Count}");
var track = await ParseTrackInfo(title, uploader, waybackUrl, duration);
track.Other = $"{{\"t\":\"{title.Trim()}\",\"u\":\"{uploader.Trim()}\"}}";
tracks.Add(track);
good = true;
break;
}
}
if (!good)
{
failRetrieve.Add(waybackUrls[0]);
}
}
else
{
noArchive.Add(videoUrl);
}
updateInfo();
}
}));
}
@ -367,12 +409,32 @@ public static class YouTube
process.WaitForExit();
deletedVideoUrls.CompleteAdding();
Console.WriteLine();
if (printFailed)
{
if (archivedCount < totalCount)
{
Console.WriteLine("No archived version found for the following:");
foreach (var x in noArchive)
Console.WriteLine($" {x}");
Console.WriteLine();
}
if (tracks.Count < archivedCount)
{
Console.WriteLine("Failed to parse archived version for the following:");
foreach (var x in failRetrieve)
Console.WriteLine($" {x}");
Console.WriteLine();
}
}
return tracks.ToList();
}
private async Task<string> GetOldestArchiveUrl(string url)
private async Task<List<string>> GetOldestArchiveUrls(string url, int limit)
{
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit={limit}";
HttpResponseMessage response = null;
for (int i = 0; i < 3; i++)
{
@ -388,13 +450,16 @@ public static class YouTube
{
var content = await response.Content.ReadAsStringAsync();
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
if (lines.Any())
if (lines.Count > 0)
{
var parts = lines[0].Split(" ");
var timestamp = parts[0];
var originalUrl = parts[1];
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
return oldestArchive;
for (int i = 0; i < lines.Count; i++)
{
var parts = lines[i].Split(" ");
var timestamp = parts[0];
var originalUrl = parts[1];
lines[i] = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
}
return lines;
}
}
return null;
@ -428,25 +493,43 @@ public static class YouTube
foreach (var pattern in patterns)
{
var node = doc.DocumentNode.SelectSingleNode(pattern);
var res = "";
if (node != null)
{
var res = "";
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
res = node.GetAttributeValue("content", "");
else
res = node.InnerText;
if (!string.IsNullOrEmpty(res)) return res;
if (!string.IsNullOrEmpty(res))
return Utils.UnHtmlString(res);
}
}
return "";
}
var title = getItem(titlePatterns);
if (string.IsNullOrEmpty(title))
{
var pattern = @"document\.title\s*=\s*""(.+?) - YouTube"";";
var match = Regex.Match(doc.Text, pattern);
if (match.Success)
title = match.Groups[1].Value;
}
var username = getItem(usernamePatterns);
int duration = -1;
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
if (node != null)
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
{
try
{
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
}
catch { }
}
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
return (title, username, duration);
}
}