mirror of
https://github.com/fiso64/slsk-batchdl.git
synced 2024-12-22 14:32:40 +00:00
commit
This commit is contained in:
parent
36d85ec33e
commit
11579385a9
4 changed files with 153 additions and 50 deletions
|
@ -134,6 +134,8 @@ Options:
|
|||
--youtube-key <key> Youtube data API key
|
||||
--get-deleted Attempt to retrieve titles of deleted videos from wayback
|
||||
machine. Requires yt-dlp.
|
||||
--deleted-only Only retrieve & download deleted music. Combine with --print
|
||||
tracks-full to display a list of all deleted titles & urls.
|
||||
|
||||
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
|
||||
for durations like 1:04:35.123). Default: s
|
||||
|
|
|
@ -114,6 +114,7 @@ static class Program
|
|||
static bool useTagsCheckExisting = false;
|
||||
static bool removeTracksFromSource = false;
|
||||
static bool getDeleted = false;
|
||||
static bool deletedOnly = false;
|
||||
static bool removeSingleCharacterSearchTerms = false;
|
||||
static int maxTracks = int.MaxValue;
|
||||
static int minUsersAggregate = 2;
|
||||
|
@ -201,6 +202,8 @@ static class Program
|
|||
"\n --youtube-key <key> Youtube data API key" +
|
||||
"\n --get-deleted Attempt to retrieve titles of deleted videos from wayback" +
|
||||
"\n machine. Requires yt-dlp." +
|
||||
"\n --deleted-only Only retrieve & download deleted music. Combine with --print" +
|
||||
"\n tracks-full to display a list of all deleted titles & urls." +
|
||||
"\n" +
|
||||
"\n --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms" +
|
||||
"\n for durations like 1:04:35.123). Default: s" +
|
||||
|
@ -221,7 +224,7 @@ static class Program
|
|||
"\n --pref-format <format> Preferred file format(s), comma-separated (default: mp3)" +
|
||||
"\n --pref-length-tol <sec> Preferred length tolerance in seconds (default: 2)" +
|
||||
"\n --pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)" +
|
||||
"\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)" +
|
||||
"\n --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2500)" +
|
||||
"\n --pref-min-samplerate <rate> Preferred minimum sample rate" +
|
||||
"\n --pref-max-samplerate <rate> Preferred maximum sample rate (default: 48000)" +
|
||||
"\n --pref-min-bitdepth <depth> Preferred minimum bit depth" +
|
||||
|
@ -566,6 +569,11 @@ static class Program
|
|||
case "--get-deleted":
|
||||
getDeleted = true;
|
||||
break;
|
||||
case "--do":
|
||||
case "--deleted-only":
|
||||
getDeleted = true;
|
||||
deletedOnly = true;
|
||||
break;
|
||||
case "--re":
|
||||
case "--regex":
|
||||
string s = args[++i].Replace("\\;", "<<semicol>>");
|
||||
|
@ -992,23 +1000,30 @@ static class Program
|
|||
|
||||
string name;
|
||||
List<Track>? deleted = null;
|
||||
List<Track> tracks;
|
||||
List<Track> tracks = new();
|
||||
|
||||
if (getDeleted)
|
||||
{
|
||||
Console.WriteLine("Getting deleted videos..");
|
||||
var archive = new YouTube.YouTubeArchiveRetriever();
|
||||
deleted = await archive.RetrieveDeleted(ytUrl);
|
||||
deleted = await archive.RetrieveDeleted(ytUrl, printFailed: deletedOnly);
|
||||
}
|
||||
if (YouTube.apiKey != "")
|
||||
if (!deletedOnly)
|
||||
{
|
||||
Console.WriteLine("Loading YouTube playlist (API)");
|
||||
(name, tracks) = await YouTube.GetTracksApi(ytUrl, max, off);
|
||||
if (YouTube.apiKey != "")
|
||||
{
|
||||
Console.WriteLine("Loading YouTube playlist (API)");
|
||||
(name, tracks) = await YouTube.GetTracksApi(ytUrl, max, off);
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Loading YouTube playlist");
|
||||
(name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Loading YouTube playlist");
|
||||
(name, tracks) = await YouTube.GetTracksYtExplode(ytUrl, max, off);
|
||||
name = await YouTube.GetPlaylistTitle(ytUrl);
|
||||
}
|
||||
if (deleted != null)
|
||||
{
|
||||
|
@ -2582,7 +2597,7 @@ static class Program
|
|||
if (!noRemoveSpecialChars)
|
||||
{
|
||||
old = str;
|
||||
str = str.ReplaceSpecialChars(" ").RemoveConsecutiveWs().Trim();
|
||||
str = str.ReplaceSpecialChars(" ").Trim().RemoveConsecutiveWs();
|
||||
if (str == "") str = old;
|
||||
}
|
||||
foreach (var banned in bannedTerms)
|
||||
|
@ -2602,7 +2617,7 @@ static class Program
|
|||
public static Track InferTrack(string filename, Track defaultTrack)
|
||||
{
|
||||
Track t = new Track(defaultTrack);
|
||||
filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").RemoveConsecutiveWs().Trim();
|
||||
filename = GetFileNameWithoutExtSlsk(filename).Replace(" — ", " - ").Replace("_", " ").Trim().RemoveConsecutiveWs();
|
||||
|
||||
var trackNumStart = new Regex(@"^(?:(?:[0-9][-\.])?\d{2,3}[. -]|\b\d\.\s|\b\d\s-\s)(?=.+\S)");
|
||||
//var trackNumMiddle = new Regex(@"\s+-\s+(\d{2,3})(?: -|\.|)\s+|\s+-(\d{2,3})-\s+");
|
||||
|
@ -3131,11 +3146,11 @@ static class Program
|
|||
fname = fname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
|
||||
fname = regexRemove != "" ? Regex.Replace(fname, regexRemove, "") : fname;
|
||||
fname = diacrRemove ? fname.RemoveDiacritics() : fname;
|
||||
fname = fname.Trim();
|
||||
fname = fname.Trim().RemoveConsecutiveWs();
|
||||
tname = tname.Replace("_", " ").ReplaceInvalidChars(" ", true, false);
|
||||
tname = regexRemove != "" ? Regex.Replace(tname, regexRemove, "") : tname;
|
||||
tname = diacrRemove ? tname.RemoveDiacritics() : tname;
|
||||
tname = tname.Trim();
|
||||
tname = tname.Trim().RemoveConsecutiveWs();
|
||||
|
||||
if (boundarySkipWs)
|
||||
return fname.ContainsWithBoundaryIgnoreWs(tname, ignoreCase, acceptLeftDigit: true);
|
||||
|
@ -4073,6 +4088,8 @@ static class Program
|
|||
Console.WriteLine($" Album: {tracks[i].Album}");
|
||||
if (!string.IsNullOrEmpty(tracks[i].URI))
|
||||
Console.WriteLine($" URL/ID: {tracks[i].URI}");
|
||||
if (!string.IsNullOrEmpty(tracks[i].Other))
|
||||
Console.WriteLine($" Other: {tracks[i].Other}");
|
||||
if (tracks[i].ArtistMaybeWrong)
|
||||
Console.WriteLine($" Artist maybe wrong: {tracks[i].ArtistMaybeWrong}");
|
||||
if (tracks[i].Downloads != null) {
|
||||
|
@ -4358,6 +4375,7 @@ public struct Track
|
|||
public bool IsNotAudio = false;
|
||||
public string FailureReason = "";
|
||||
public string DownloadPath = "";
|
||||
public string Other = "";
|
||||
public State TrackState = State.Initial;
|
||||
|
||||
public SlDictionary? Downloads = null;
|
||||
|
@ -4387,6 +4405,7 @@ public struct Track
|
|||
TrackState = other.TrackState;
|
||||
FailureReason = other.FailureReason;
|
||||
DownloadPath = other.DownloadPath;
|
||||
Other = other.Other;
|
||||
}
|
||||
|
||||
public override readonly string ToString()
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Xml.Linq;
|
||||
|
||||
public static class Utils
|
||||
{
|
||||
|
@ -112,7 +111,7 @@ public static class Utils
|
|||
|
||||
public static string RemoveConsecutiveWs(this string input)
|
||||
{
|
||||
return Regex.Replace(input, @"\s+", " ");
|
||||
return string.Join(' ', input.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries));
|
||||
}
|
||||
|
||||
public static string RemoveSquareBrackets(this string str)
|
||||
|
|
|
@ -6,10 +6,7 @@ using System.Text.RegularExpressions;
|
|||
using YoutubeExplode.Common;
|
||||
using System.Diagnostics;
|
||||
using HtmlAgilityPack;
|
||||
using System.Text;
|
||||
using System.Threading.Channels;
|
||||
using System.Collections.Concurrent;
|
||||
using System;
|
||||
|
||||
public static class YouTube
|
||||
{
|
||||
|
@ -98,8 +95,8 @@ public static class YouTube
|
|||
var track = new Track();
|
||||
track.URI = id;
|
||||
|
||||
uploader = Regex.Replace(uploader.Replace("–", "-").Trim(), @"\s+", " ");
|
||||
title = Regex.Replace(title.Replace("–", "-").Trim(), @"\s+", " ");
|
||||
uploader = uploader.Replace("–", "-").Trim().RemoveConsecutiveWs();
|
||||
title = title.Replace("–", "-").Replace(" -- ", " - ").Trim().RemoveConsecutiveWs();
|
||||
|
||||
var artist = uploader;
|
||||
var trackTitle = title;
|
||||
|
@ -258,6 +255,13 @@ public static class YouTube
|
|||
return tracks;
|
||||
}
|
||||
|
||||
public static async Task<string> GetPlaylistTitle(string url)
|
||||
{
|
||||
var youtube = new YoutubeClient();
|
||||
var playlist = await youtube.Playlists.GetAsync(url);
|
||||
return playlist.Title;
|
||||
}
|
||||
|
||||
public static async Task<(string, List<Track>)> GetTracksYtExplode(string url, int max = int.MaxValue, int offset = 0)
|
||||
{
|
||||
var youtube = new YoutubeClient();
|
||||
|
@ -305,12 +309,34 @@ public static class YouTube
|
|||
_client.Timeout = TimeSpan.FromSeconds(10);
|
||||
}
|
||||
|
||||
public async Task<List<Track>> RetrieveDeleted(string url)
|
||||
public async Task<List<Track>> RetrieveDeleted(string url, bool printFailed = true)
|
||||
{
|
||||
var deletedVideoUrls = new BlockingCollection<string>();
|
||||
var tracks = new ConcurrentBag<Track>();
|
||||
|
||||
var process = new Process()
|
||||
int totalCount = 0;
|
||||
int archivedCount = 0;
|
||||
var tracks = new ConcurrentBag<Track>();
|
||||
var noArchive = new ConcurrentBag<string>();
|
||||
var failRetrieve = new ConcurrentBag<string>();
|
||||
|
||||
int workerCount = 4;
|
||||
var workers = new List<Task>();
|
||||
var consoleLock = new object();
|
||||
|
||||
void updateInfo()
|
||||
{
|
||||
lock (consoleLock)
|
||||
{
|
||||
if (!Console.IsOutputRedirected)
|
||||
{
|
||||
string info = "Deleted metadata total/archived/retrieved: ";
|
||||
Console.SetCursorPosition(0, Console.CursorTop);
|
||||
Console.Write($"{info}{totalCount}/{archivedCount}/{tracks.Count}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
|
@ -319,14 +345,17 @@ public static class YouTube
|
|||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
}
|
||||
},
|
||||
EnableRaisingEvents = true
|
||||
};
|
||||
process.EnableRaisingEvents = true;
|
||||
bool ok = false;
|
||||
process.OutputDataReceived += (sender, e) =>
|
||||
{
|
||||
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
|
||||
deletedVideoUrls.Add(e.Data);
|
||||
if (!string.IsNullOrWhiteSpace(e.Data))
|
||||
{
|
||||
deletedVideoUrls.Add(e.Data);
|
||||
Interlocked.Increment(ref totalCount);
|
||||
updateInfo();
|
||||
}
|
||||
};
|
||||
process.Exited += (sender, e) =>
|
||||
{
|
||||
|
@ -336,29 +365,42 @@ public static class YouTube
|
|||
process.Start();
|
||||
process.BeginOutputReadLine();
|
||||
|
||||
List<Task> workers = new List<Task>();
|
||||
int workerCount = 4;
|
||||
for (int i = 0; i < workerCount; i++)
|
||||
{
|
||||
workers.Add(Task.Run(async () =>
|
||||
{
|
||||
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
|
||||
{
|
||||
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
|
||||
if (!string.IsNullOrEmpty(waybackUrl))
|
||||
var waybackUrls = await GetOldestArchiveUrls(videoUrl, limit: 2);
|
||||
if (waybackUrls != null && waybackUrls.Count > 0)
|
||||
{
|
||||
var x = await GetVideoDetails(waybackUrl);
|
||||
if (!string.IsNullOrEmpty(x.title))
|
||||
Interlocked.Increment(ref archivedCount);
|
||||
|
||||
bool good = false;
|
||||
foreach (var waybackUrl in waybackUrls)
|
||||
{
|
||||
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration);
|
||||
tracks.Add(track);
|
||||
if (!Console.IsOutputRedirected)
|
||||
var (title, uploader, duration) = await GetVideoDetails(waybackUrl);
|
||||
if (!string.IsNullOrWhiteSpace(title))
|
||||
{
|
||||
Console.SetCursorPosition(0, Console.CursorTop);
|
||||
Console.Write($"Deleted videos processed: {tracks.Count}");
|
||||
var track = await ParseTrackInfo(title, uploader, waybackUrl, duration);
|
||||
track.Other = $"{{\"t\":\"{title.Trim()}\",\"u\":\"{uploader.Trim()}\"}}";
|
||||
tracks.Add(track);
|
||||
good = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!good)
|
||||
{
|
||||
failRetrieve.Add(waybackUrls[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
noArchive.Add(videoUrl);
|
||||
}
|
||||
|
||||
updateInfo();
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
@ -367,12 +409,32 @@ public static class YouTube
|
|||
process.WaitForExit();
|
||||
deletedVideoUrls.CompleteAdding();
|
||||
Console.WriteLine();
|
||||
|
||||
if (printFailed)
|
||||
{
|
||||
if (archivedCount < totalCount)
|
||||
{
|
||||
Console.WriteLine("No archived version found for the following:");
|
||||
foreach (var x in noArchive)
|
||||
Console.WriteLine($" {x}");
|
||||
Console.WriteLine();
|
||||
|
||||
}
|
||||
if (tracks.Count < archivedCount)
|
||||
{
|
||||
Console.WriteLine("Failed to parse archived version for the following:");
|
||||
foreach (var x in failRetrieve)
|
||||
Console.WriteLine($" {x}");
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
|
||||
return tracks.ToList();
|
||||
}
|
||||
|
||||
private async Task<string> GetOldestArchiveUrl(string url)
|
||||
private async Task<List<string>> GetOldestArchiveUrls(string url, int limit)
|
||||
{
|
||||
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
|
||||
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit={limit}";
|
||||
HttpResponseMessage response = null;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
|
@ -388,13 +450,16 @@ public static class YouTube
|
|||
{
|
||||
var content = await response.Content.ReadAsStringAsync();
|
||||
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
|
||||
if (lines.Any())
|
||||
if (lines.Count > 0)
|
||||
{
|
||||
var parts = lines[0].Split(" ");
|
||||
var timestamp = parts[0];
|
||||
var originalUrl = parts[1];
|
||||
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
|
||||
return oldestArchive;
|
||||
for (int i = 0; i < lines.Count; i++)
|
||||
{
|
||||
var parts = lines[i].Split(" ");
|
||||
var timestamp = parts[0];
|
||||
var originalUrl = parts[1];
|
||||
lines[i] = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
@ -428,25 +493,43 @@ public static class YouTube
|
|||
foreach (var pattern in patterns)
|
||||
{
|
||||
var node = doc.DocumentNode.SelectSingleNode(pattern);
|
||||
var res = "";
|
||||
if (node != null)
|
||||
{
|
||||
var res = "";
|
||||
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
|
||||
res = node.GetAttributeValue("content", "");
|
||||
else
|
||||
res = node.InnerText;
|
||||
if (!string.IsNullOrEmpty(res)) return res;
|
||||
if (!string.IsNullOrEmpty(res))
|
||||
return Utils.UnHtmlString(res);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
var title = getItem(titlePatterns);
|
||||
if (string.IsNullOrEmpty(title))
|
||||
{
|
||||
var pattern = @"document\.title\s*=\s*""(.+?) - YouTube"";";
|
||||
var match = Regex.Match(doc.Text, pattern);
|
||||
if (match.Success)
|
||||
title = match.Groups[1].Value;
|
||||
}
|
||||
|
||||
var username = getItem(usernamePatterns);
|
||||
|
||||
int duration = -1;
|
||||
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
|
||||
if (node != null)
|
||||
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
|
||||
{
|
||||
try
|
||||
{
|
||||
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
|
||||
return (title, username, duration);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue