1
0
Fork 0
mirror of https://github.com/fiso64/slsk-batchdl.git synced 2024-12-22 22:42:41 +00:00
This commit is contained in:
fiso64 2023-11-15 22:45:51 +01:00
parent a76abef429
commit f35f1e1e6c
5 changed files with 809 additions and 605 deletions

100
README.md
View file

@ -2,32 +2,32 @@
A batch downloader for Soulseek using Soulseek.NET. Accepts CSV files and Spotify or YouTube urls. A batch downloader for Soulseek using Soulseek.NET. Accepts CSV files and Spotify or YouTube urls.
##### Download tracks from a csv file: #### Download tracks from a csv file:
``` ```
slsk-batchdl -i test.csv slsk-batchdl -i test.csv
``` ```
Use `--print tracks` before downloading to check if everything has been parsed correctly. The names of the columns should be: `Artist`, `Title`, `Album`, `Length`. Only the title column is required, but any additional info improves search. Use `--print tracks` before downloading to check if everything has been parsed correctly. The names of the columns should be: `Artist`, `Title`, `Album`, `Length`. Only the title column is required, but any additional info improves search.
##### Download spotify likes while skipping existing songs and creating an m3u file: #### Download spotify likes while skipping existing songs:
``` ```
slsk-batchdl -i spotify-likes --m3u --skip-existing slsk-batchdl -i spotify-likes --skip-existing
``` ```
You might need to provide an id and secret when using spotify (e.g when downloading a private playlist), which you can get here https://developer.spotify.com/dashboard/applications. Create an app, then select it and add `http://localhost:48721/callback` as a redirect url in the settings. To download private playlists or liked songs you will need to provide a client id and secret, which you can get here https://developer.spotify.com/dashboard/applications. Create an app and add `http://localhost:48721/callback` as a redirect url in its settings.
##### Download the first 10 songs of a youtube playlist: #### Download youtube playlist (with fallback to yt-dlp), including deleted videos:
``` ```
slsk-batchdl -n 10 -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX" slsk-batchdl --get-deleted --yt-dlp -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
``` ```
To include unavailable videos, you will need to provide an api key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts. Playlists are retrieved using the YoutubeExplode library which unfortunately doesn't always return all videos. You can use the official API by providing a key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.
##### Search & download a specific song, preferring high quality: #### Search & download a specific song:
``` ```
slsk-batchdl -i "title=MC MENTAL @ HIS BEST,length=242" --pref-format "flac,wav" slsk-batchdl -i "title=MC MENTAL @ HIS BEST,length=242" --pref-format "flac,wav"
``` ```
##### Find an artist's songs which aren't in your library: #### Find an artist's songs which aren't in your library:
``` ```
slsk-batchdl -i "artist=MC MENTAL" -a --print tracks --skip-existing --music-dir "path\to\music" slsk-batchdl -i "artist=MC MENTAL" --aggregate --print tracks --skip-existing --music-dir "path\to\music"
``` ```
### Options: ### Options:
@ -48,47 +48,43 @@ Usage: slsk-batchdl -i <input> [OPTIONS]
Title, Album, Length. Only the title column is required, but Title, Album, Length. Only the title column is required, but
any extra info improves search results. any extra info improves search results.
String for the track, album, or artist to search for: Name of the track, album, or artist to search for:
Can either be any typical search text like "Artist - Title" Can either be any typical search string or a comma-separated
or a comma-separated list like "title=Song,artist=Artist" list like "title=Song Name,artist=Artist Name,length=215"
Available fields: title, artist, album, length (in seconds). Allowed properties are: title, artist, album, length (sec)
Options: Options:
--user <username> Soulseek username --user <username> Soulseek username
--pass <password> Soulseek password --pass <password> Soulseek password
--spotify Input is a spotify url (override automatic parsing) --spotify-id <id> spotify client ID
--spotify-id <id> spotify client ID (required for private playlists) --spotify-secret <secret> spotify client secret
--spotify-secret <secret> spotify client secret (required for private playlists)
--youtube Input is a youtube url (override automatic parsing)
--youtube-key <key> Youtube data API key --youtube-key <key> Youtube data API key
--get-deleted Attempt to retrieve titles of deleted videos from wayback
machine. Requires yt-dlp.
--csv Input is a path to a local CSV (override automatic parsing)
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms --time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
for durations like 1:04:35.123). Default: s (seconds) for durations like 1:04:35.123). Default: s
--yt-parse Enable if the csv file contains YouTube video titles and --yt-parse Enable if the csv file contains YouTube video titles and
channel names; attempt to parse them into proper title and channel names; attempt to parse them into title and artist
artist. If the the csv contains an "ID", "URL", or names.
"Description" column then those will be used for parsing as
well.
--string Input is a search string (override automatic parsing) -a --aggregate When input is a string: Instead of downloading a single
-a --aggregate Instead of downloading a single track matching the search track matching the search string, find and download all
string, find and download all distinct songs associated with distinct songs associated with the provided artist, album,
the provided artist, album, or track title. Search string must or track title. Input string must be a list of properties.
be a list of properties.
--min-users-aggregate <num> Minimum number of users sharing a track before it is --min-users-aggregate <num> Minimum number of users sharing a track before it is
downloaded in aggregate mode. Setting it to 2 or more will downloaded in aggregate mode. Setting it to higher values
significantly reduce false positives, but may introduce false will significantly reduce false positives, but may introduce
negatives. Default: 1 false negatives. Default: 2
-p --path <path> Where to place downloaded files -p --path <path> Download folder
-f --folder <name> Subfolder name -f --folder <name> Subfolder name (default: playlist/csv name)
-n --number <maxtracks> Download the first n tracks of a playlist -n --number <maxtracks> Download the first n tracks of a playlist
-o --offset <offset> Skip a specified number of tracks -o --offset <offset> Skip a specified number of tracks
--reverse Download tracks in reverse order --reverse Download tracks in reverse order
--remove-from-playlist Remove downloaded tracks from playlist (spotify only) --remove-from-playlist Remove downloaded tracks from playlist (for spotify only)
--name-format <format> Name format for downloaded tracks, e.g "{artist} - {title}" --name-format <format> Name format for downloaded tracks, e.g "{artist} - {title}"
--m3u Create an m3u8 playlist file --m3u Create an m3u8 playlist file
@ -102,54 +98,50 @@ Options:
--banned-users <list> Comma-separated list of users to ignore --banned-users <list> Comma-separated list of users to ignore
--danger-words <list> Comma-separated list of words that must appear in either --danger-words <list> Comma-separated list of words that must appear in either
both search result and track title or in neither of the both search result and track title or in neither of the
two. Case-insensitive. (default:"mix, edit, dj, cover") two. Case-insensitive. (default:"remix, edit,cover")
--pref-format <format> Preferred file format(s), comma-separated (default: mp3) --pref-format <format> Preferred file format(s), comma-separated (default: mp3)
--pref-length-tol <tol> Preferred length tolerance in seconds (default: 3) --pref-length-tol <tol> Preferred length tolerance in seconds (default: 2)
--pref-min-bitrate <rate> Preferred minimum bitrate (default: 200) --pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)
--pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200) --pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)
--pref-max-samplerate <rate> Preferred maximum sample rate (default: 96000) --pref-max-samplerate <rate> Preferred maximum sample rate (default: 96000)
--pref-strict-title Prefer download if filename contains track title
--pref-strict-artist Prefer download if filepath contains track artist --pref-strict-artist Prefer download if filepath contains track artist
--pref-banned-users <list> Comma-separated list of users to deprioritize --pref-banned-users <list> Comma-separated list of users to deprioritize
--pref-danger-words <list> Comma-separated list of words that should appear in either --pref-danger-words <list> Comma-separated list of words that should appear in either
both search result and track title or in neither of the both search result and track title or in neither of the
two. two. (default: see github)
-s --skip-existing Skip if a track matching file conditions is found in the -s --skip-existing Skip if a track matching file conditions is found in the
output folder or your music library (if provided) output folder or your music library (if provided)
--skip-mode <mode> Sets the way the program checks if a track exists --skip-mode <mode> name: Use only filenames to check if a track exists
name: Use only filenames
name-precise (default): Use filenames and check conditions name-precise (default): Use filenames and check conditions
tag: Use file tags (slower) tag: Use file tags (slower)
tag-precise: Use file tags and check file conditions tag-precise: Use file tags and check file conditions
--music-dir <path> Specify to skip downloading tracks found in a music library --music-dir <path> Specify to skip downloading tracks found in a music library
use with --skip-existing Use with --skip-existing
--skip-not-found Skip searching for tracks that weren't found on Soulseek --skip-not-found Skip searching for tracks that weren't found on Soulseek
during the last run. during the last run.
--remove-ft Remove "ft." or "feat." and everything after from the --remove-ft Remove "ft." or "feat." and everything after from the
track names before searching. track names before searching
--remove-brackets Remove text in square brackets from track names before --remove-regex <regex> Remove a regex from all track names and artist names
searching.
--no-artist-search Perform a search without artist name if nothing was --no-artist-search Perform a search without artist name if nothing was
found. Only use for sources such as youtube or soundcloud found. Only use for sources such as youtube or soundcloud
where the "artist" could just be an uploader. where the "artist" could just be an uploader.
--artist-search Also try to find track by searching for the artist only --artist-search Also try to find track by searching for the artist only
--no-regex-search <reg> Perform an additional search without a regex pattern --no-diacr-search Also perform a search without diacritics
--no-diacr-search Perform an additional search without diacritics --no-regex-search <regex> Also perform a search without a regex pattern
-d --desperate Equivalent to enabling all additional searches, slower.
--yt-dlp Use yt-dlp to download tracks that weren't found on --yt-dlp Use yt-dlp to download tracks that weren't found on
Soulseek. yt-dlp must be available from the command line. Soulseek. yt-dlp must be available from the command line.
--config <path> Specify config file location --config <path> Specify config file location
--search-timeout <ms> Max search time in ms (default: 6000) --search-timeout <ms> Max search time in ms (default: 6000)
--max-stale-time <ms> Max download time without progress in ms (default: 50000) --max-stale-time <ms> Max download time without progress in ms (default: 50000)
--concurrent-processes <num> Max concurrent searches & downloads (default: 2) --concurrent-downloads <num> Max concurrent searches & downloads (default: 2)
--display <option> Changes how searches and downloads are displayed. --display <option> Changes how searches and downloads are displayed:
single (default): Show transfer state and percentage. single (default): Show transfer state and percentage
double: Also show a progress bar. double: Transfer state and a large progress bar
simple: Only printing simple: No download bars or changing percentages
--print <option> Only print tracks or results instead of downloading. --print <option> Print tracks or search results instead of downloading:
tracks: Print all tracks to be downloaded tracks: Print all tracks to be downloaded
tracks-full: Print extended information about all tracks tracks-full: Print extended information about all tracks
results: Print search results satisfying file conditions results: Print search results satisfying file conditions

File diff suppressed because it is too large Load diff

View file

@ -85,7 +85,7 @@ public class Spotify
public async Task<List<Track>> GetLikes(int max = int.MaxValue, int offset = 0) public async Task<List<Track>> GetLikes(int max = int.MaxValue, int offset = 0)
{ {
if (!loggedIn) if (!loggedIn)
throw new Exception("Can't get liked music, not logged in"); throw new Exception("Can't get liked music as user is not logged in");
List<Track> res = new List<Track>(); List<Track> res = new List<Track>();
int limit = Math.Min(max, 50); int limit = Math.Min(max, 50);

View file

@ -3,7 +3,12 @@ using Google.Apis.Services;
using System.Xml; using System.Xml;
using YoutubeExplode; using YoutubeExplode;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using YoutubeExplode.Common;
using System.Diagnostics;
using HtmlAgilityPack;
using System.Text;
using System.Threading.Channels;
using System.Collections.Concurrent;
public static class YouTube public static class YouTube
{ {
@ -67,10 +72,10 @@ public static class YouTube
break; break;
} }
if (tracksDict.Count >= 200) if (tracksDict.Count >= 200 && !Console.IsOutputRedirected)
{ {
Console.SetCursorPosition(0, Console.CursorTop); Console.SetCursorPosition(0, Console.CursorTop);
Console.Write(tracks.Count); Console.Write($"Loaded: {tracks.Count}");
} }
playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken; playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken;
@ -93,30 +98,6 @@ public static class YouTube
title = title.Replace("", "-"); title = title.Replace("", "-");
var stringsToRemove = new string[] { "(Official music video)", "(Official video)", "(Official audio)",
"(Lyrics)", "(Official)", "(Lyric Video)", "(Official Lyric Video)", "(Official HD Video)",
"(Official 4K Video)", "(Video)", "[HD]", "[4K]", "(Original Mix)", "(Lyric)", "(Music Video)",
"(Visualizer)", "(Audio)", "Official Lyrics" };
foreach (string s in stringsToRemove)
{
var t = title;
title = Regex.Replace(title, Regex.Escape(s), "", RegexOptions.IgnoreCase);
if (t == title)
{
if (s.Contains("["))
{
string s2 = s.Replace("[", "(").Replace("]", ")");
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
}
else if (s.Contains("("))
{
string s2 = s.Replace("(", "[").Replace(")", "]");
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
}
}
}
var trackTitle = title.Trim(); var trackTitle = title.Trim();
trackTitle = Regex.Replace(trackTitle, @"\s+", " "); trackTitle = Regex.Replace(trackTitle, @"\s+", " ");
var artist = uploader.Trim(); var artist = uploader.Trim();
@ -297,4 +278,159 @@ public static class YouTube
var playlist = await youtube.Playlists.GetAsync(url); var playlist = await youtube.Playlists.GetAsync(url);
return playlist.Id.ToString(); return playlist.Id.ToString();
} }
public class YouTubeArchiveRetriever
{
private HttpClient _client;
public YouTubeArchiveRetriever()
{
_client = new HttpClient();
_client.Timeout = TimeSpan.FromSeconds(10);
}
public async Task<List<Track>> RetrieveDeleted(string url)
{
var deletedVideoUrls = new BlockingCollection<string>();
var tracks = new ConcurrentBag<Track>();
var process = new Process()
{
StartInfo = new ProcessStartInfo
{
FileName = "yt-dlp",
Arguments = $"--ignore-no-formats-error --no-warn --match-filter \"!uploader\" --print webpage_url {url}",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true,
}
};
process.EnableRaisingEvents = true;
bool ok = false;
process.OutputDataReceived += (sender, e) =>
{
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
deletedVideoUrls.Add(e.Data);
};
process.Exited += (sender, e) =>
{
deletedVideoUrls.CompleteAdding();
};
process.Start();
process.BeginOutputReadLine();
List<Task> workers = new List<Task>();
int workerCount = 4;
for (int i = 0; i < workerCount; i++)
{
workers.Add(Task.Run(async () =>
{
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
{
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
if (!string.IsNullOrEmpty(waybackUrl))
{
var x = await GetVideoDetails(waybackUrl);
if (!string.IsNullOrEmpty(x.title))
{
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration, false);
tracks.Add(track);
if (!Console.IsOutputRedirected)
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"Deleted videos processed: {tracks.Count}");
}
}
}
}
}));
}
await Task.WhenAll(workers);
process.WaitForExit();
deletedVideoUrls.CompleteAdding();
Console.WriteLine();
return tracks.ToList();
}
private async Task<string> GetOldestArchiveUrl(string url)
{
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
HttpResponseMessage response = null;
for (int i = 0; i < 3; i++)
{
try {
response = await _client.GetAsync(url2);
break;
}
catch (Exception e) { }
}
if (response == null) return null;
if (response.IsSuccessStatusCode)
{
var content = await response.Content.ReadAsStringAsync();
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
if (lines.Any())
{
var parts = lines[0].Split(" ");
var timestamp = parts[0];
var originalUrl = parts[1];
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
return oldestArchive;
}
}
return null;
}
public async Task<(string title, string uploader, int duration)> GetVideoDetails(string url)
{
var web = new HtmlWeb();
var doc = await web.LoadFromWebAsync(url);
var titlePatterns = new[]
{
"//h1[@id='video_title']",
"//meta[@name='title']",
};
var usernamePatterns = new[]
{
"//div[@id='userInfoDiv']/b/a",
"//a[contains(@class, 'contributor')]",
"//a[@id='watch-username']",
"//a[contains(@class, 'author')]",
"//div[@class='yt-user-info']/a",
"//div[@id='upload-info']//yt-formatted-string/a",
"//span[@itemprop='author']//link[@itemprop='name']",
"//a[contains(@class, 'yt-user-name')]",
};
string getItem(string[] patterns)
{
foreach (var pattern in patterns)
{
var node = doc.DocumentNode.SelectSingleNode(pattern);
var res = "";
if (node != null)
{
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
res = node.GetAttributeValue("content", "");
else
res = node.InnerText;
if (!string.IsNullOrEmpty(res)) return res;
}
}
return "";
}
int duration = -1;
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
if (node != null)
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
}
}
} }

View file

@ -19,12 +19,13 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="Goblinfactory.ProgressBar" Version="1.0.0" /> <PackageReference Include="Goblinfactory.ProgressBar" Version="1.0.0" />
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945" /> <PackageReference Include="Google.Apis.YouTube.v3" Version="1.63.0.3205" />
<PackageReference Include="Soulseek" Version="6.1.1" /> <PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
<PackageReference Include="SpotifyAPI.Web" Version="7.0.0" /> <PackageReference Include="Soulseek" Version="6.1.3" />
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.0" /> <PackageReference Include="SpotifyAPI.Web" Version="7.0.2" />
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.2" />
<PackageReference Include="TagLibSharp" Version="2.3.0" /> <PackageReference Include="TagLibSharp" Version="2.3.0" />
<PackageReference Include="YoutubeExplode" Version="6.2.12" /> <PackageReference Include="YoutubeExplode" Version="6.3.7" />
</ItemGroup> </ItemGroup>
</Project> </Project>