1
0
Fork 0
mirror of https://github.com/fiso64/slsk-batchdl.git synced 2024-12-22 14:32:40 +00:00
This commit is contained in:
fiso64 2023-11-15 22:45:51 +01:00
parent a76abef429
commit f35f1e1e6c
5 changed files with 809 additions and 605 deletions

100
README.md
View file

@ -2,32 +2,32 @@
A batch downloader for Soulseek using Soulseek.NET. Accepts CSV files and Spotify or YouTube urls.
##### Download tracks from a csv file:
#### Download tracks from a csv file:
```
slsk-batchdl -i test.csv
```
Use `--print tracks` before downloading to check if everything has been parsed correctly. The names of the columns should be: `Artist`, `Title`, `Album`, `Length`. Only the title column is required, but any additional info improves search.
##### Download spotify likes while skipping existing songs and creating an m3u file:
#### Download spotify likes while skipping existing songs:
```
slsk-batchdl -i spotify-likes --m3u --skip-existing
slsk-batchdl -i spotify-likes --skip-existing
```
You might need to provide an id and secret when using spotify (e.g when downloading a private playlist), which you can get here https://developer.spotify.com/dashboard/applications. Create an app, then select it and add `http://localhost:48721/callback` as a redirect url in the settings.
To download private playlists or liked songs you will need to provide a client id and secret, which you can get here https://developer.spotify.com/dashboard/applications. Create an app and add `http://localhost:48721/callback` as a redirect url in its settings.
##### Download the first 10 songs of a youtube playlist:
#### Download youtube playlist (with fallback to yt-dlp), including deleted videos:
```
slsk-batchdl -n 10 -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
slsk-batchdl --get-deleted --yt-dlp -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
```
To include unavailable videos, you will need to provide an api key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.
Playlists are retrieved using the YoutubeExplode library which unfortunately doesn't always return all videos. You can use the official API by providing a key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.
##### Search & download a specific song, preferring high quality:
#### Search & download a specific song:
```
slsk-batchdl -i "title=MC MENTAL @ HIS BEST,length=242" --pref-format "flac,wav"
```
##### Find an artist's songs which aren't in your library:
#### Find an artist's songs which aren't in your library:
```
slsk-batchdl -i "artist=MC MENTAL" -a --print tracks --skip-existing --music-dir "path\to\music"
slsk-batchdl -i "artist=MC MENTAL" --aggregate --print tracks --skip-existing --music-dir "path\to\music"
```
### Options:
@ -48,47 +48,43 @@ Usage: slsk-batchdl -i <input> [OPTIONS]
Title, Album, Length. Only the title column is required, but
any extra info improves search results.
String for the track, album, or artist to search for:
Can either be any typical search text like "Artist - Title"
or a comma-separated list like "title=Song,artist=Artist"
Available fields: title, artist, album, length (in seconds).
Name of the track, album, or artist to search for:
Can either be any typical search string or a comma-separated
list like "title=Song Name,artist=Artist Name,length=215"
Allowed properties are: title, artist, album, length (sec)
Options:
--user <username> Soulseek username
--pass <password> Soulseek password
--spotify Input is a spotify url (override automatic parsing)
--spotify-id <id> spotify client ID (required for private playlists)
--spotify-secret <secret> spotify client secret (required for private playlists)
--spotify-id <id> spotify client ID
--spotify-secret <secret> spotify client secret
--youtube Input is a youtube url (override automatic parsing)
--youtube-key <key> Youtube data API key
--get-deleted Attempt to retrieve titles of deleted videos from wayback
machine. Requires yt-dlp.
--csv Input is a path to a local CSV (override automatic parsing)
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
for durations like 1:04:35.123). Default: s (seconds)
for durations like 1:04:35.123). Default: s
--yt-parse Enable if the csv file contains YouTube video titles and
channel names; attempt to parse them into proper title and
artist. If the the csv contains an "ID", "URL", or
"Description" column then those will be used for parsing as
well.
channel names; attempt to parse them into title and artist
names.
--string Input is a search string (override automatic parsing)
-a --aggregate Instead of downloading a single track matching the search
string, find and download all distinct songs associated with
the provided artist, album, or track title. Search string must
be a list of properties.
-a --aggregate When input is a string: Instead of downloading a single
track matching the search string, find and download all
distinct songs associated with the provided artist, album,
or track title. Input string must be a list of properties.
--min-users-aggregate <num> Minimum number of users sharing a track before it is
downloaded in aggregate mode. Setting it to 2 or more will
significantly reduce false positives, but may introduce false
negatives. Default: 1
downloaded in aggregate mode. Setting it to higher values
will significantly reduce false positives, but may introduce
false negatives. Default: 2
-p --path <path> Where to place downloaded files
-f --folder <name> Subfolder name
-p --path <path> Download folder
-f --folder <name> Subfolder name (default: playlist/csv name)
-n --number <maxtracks> Download the first n tracks of a playlist
-o --offset <offset> Skip a specified number of tracks
--reverse Download tracks in reverse order
--remove-from-playlist Remove downloaded tracks from playlist (spotify only)
--remove-from-playlist Remove downloaded tracks from playlist (for spotify only)
--name-format <format> Name format for downloaded tracks, e.g "{artist} - {title}"
--m3u Create an m3u8 playlist file
@ -102,54 +98,50 @@ Options:
--banned-users <list> Comma-separated list of users to ignore
--danger-words <list> Comma-separated list of words that must appear in either
both search result and track title or in neither of the
two. Case-insensitive. (default:"mix, edit, dj, cover")
two. Case-insensitive. (default:"remix, edit,cover")
--pref-format <format> Preferred file format(s), comma-separated (default: mp3)
--pref-length-tol <tol> Preferred length tolerance in seconds (default: 3)
--pref-length-tol <tol> Preferred length tolerance in seconds (default: 2)
--pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)
--pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)
--pref-max-samplerate <rate> Preferred maximum sample rate (default: 96000)
--pref-strict-title Prefer download if filename contains track title
--pref-strict-artist Prefer download if filepath contains track artist
--pref-banned-users <list> Comma-separated list of users to deprioritize
--pref-danger-words <list> Comma-separated list of words that should appear in either
both search result and track title or in neither of the
two.
two. (default: see github)
-s --skip-existing Skip if a track matching file conditions is found in the
output folder or your music library (if provided)
--skip-mode <mode> Sets the way the program checks if a track exists
name: Use only filenames
--skip-mode <mode> name: Use only filenames to check if a track exists
name-precise (default): Use filenames and check conditions
tag: Use file tags (slower)
tag-precise: Use file tags and check file conditions
--music-dir <path> Specify to skip downloading tracks found in a music library
use with --skip-existing
Use with --skip-existing
--skip-not-found Skip searching for tracks that weren't found on Soulseek
during the last run.
--remove-ft Remove "ft." or "feat." and everything after from the
track names before searching.
--remove-brackets Remove text in square brackets from track names before
searching.
track names before searching
--remove-regex <regex> Remove a regex from all track names and artist names
--no-artist-search Perform a search without artist name if nothing was
found. Only use for sources such as youtube or soundcloud
where the "artist" could just be an uploader.
--artist-search Also try to find track by searching for the artist only
--no-regex-search <reg> Perform an additional search without a regex pattern
--no-diacr-search Perform an additional search without diacritics
-d --desperate Equivalent to enabling all additional searches, slower.
--no-diacr-search Also perform a search without diacritics
--no-regex-search <regex> Also perform a search without a regex pattern
--yt-dlp Use yt-dlp to download tracks that weren't found on
Soulseek. yt-dlp must be available from the command line.
--config <path> Specify config file location
--search-timeout <ms> Max search time in ms (default: 6000)
--max-stale-time <ms> Max download time without progress in ms (default: 50000)
--concurrent-processes <num> Max concurrent searches & downloads (default: 2)
--display <option> Changes how searches and downloads are displayed.
single (default): Show transfer state and percentage.
double: Also show a progress bar.
simple: Only printing
--concurrent-downloads <num> Max concurrent searches & downloads (default: 2)
--display <option> Changes how searches and downloads are displayed:
single (default): Show transfer state and percentage
double: Transfer state and a large progress bar
simple: No download bars or changing percentages
--print <option> Only print tracks or results instead of downloading.
--print <option> Print tracks or search results instead of downloading:
tracks: Print all tracks to be downloaded
tracks-full: Print extended information about all tracks
results: Print search results satisfying file conditions

File diff suppressed because it is too large Load diff

View file

@ -85,7 +85,7 @@ public class Spotify
public async Task<List<Track>> GetLikes(int max = int.MaxValue, int offset = 0)
{
if (!loggedIn)
throw new Exception("Can't get liked music, not logged in");
throw new Exception("Can't get liked music as user is not logged in");
List<Track> res = new List<Track>();
int limit = Math.Min(max, 50);

View file

@ -3,7 +3,12 @@ using Google.Apis.Services;
using System.Xml;
using YoutubeExplode;
using System.Text.RegularExpressions;
using YoutubeExplode.Common;
using System.Diagnostics;
using HtmlAgilityPack;
using System.Text;
using System.Threading.Channels;
using System.Collections.Concurrent;
public static class YouTube
{
@ -67,10 +72,10 @@ public static class YouTube
break;
}
if (tracksDict.Count >= 200)
if (tracksDict.Count >= 200 && !Console.IsOutputRedirected)
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write(tracks.Count);
Console.Write($"Loaded: {tracks.Count}");
}
playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken;
@ -93,30 +98,6 @@ public static class YouTube
title = title.Replace("", "-");
var stringsToRemove = new string[] { "(Official music video)", "(Official video)", "(Official audio)",
"(Lyrics)", "(Official)", "(Lyric Video)", "(Official Lyric Video)", "(Official HD Video)",
"(Official 4K Video)", "(Video)", "[HD]", "[4K]", "(Original Mix)", "(Lyric)", "(Music Video)",
"(Visualizer)", "(Audio)", "Official Lyrics" };
foreach (string s in stringsToRemove)
{
var t = title;
title = Regex.Replace(title, Regex.Escape(s), "", RegexOptions.IgnoreCase);
if (t == title)
{
if (s.Contains("["))
{
string s2 = s.Replace("[", "(").Replace("]", ")");
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
}
else if (s.Contains("("))
{
string s2 = s.Replace("(", "[").Replace(")", "]");
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
}
}
}
var trackTitle = title.Trim();
trackTitle = Regex.Replace(trackTitle, @"\s+", " ");
var artist = uploader.Trim();
@ -297,4 +278,159 @@ public static class YouTube
var playlist = await youtube.Playlists.GetAsync(url);
return playlist.Id.ToString();
}
public class YouTubeArchiveRetriever
{
private HttpClient _client;
public YouTubeArchiveRetriever()
{
_client = new HttpClient();
_client.Timeout = TimeSpan.FromSeconds(10);
}
public async Task<List<Track>> RetrieveDeleted(string url)
{
var deletedVideoUrls = new BlockingCollection<string>();
var tracks = new ConcurrentBag<Track>();
var process = new Process()
{
StartInfo = new ProcessStartInfo
{
FileName = "yt-dlp",
Arguments = $"--ignore-no-formats-error --no-warn --match-filter \"!uploader\" --print webpage_url {url}",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true,
}
};
process.EnableRaisingEvents = true;
bool ok = false;
process.OutputDataReceived += (sender, e) =>
{
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
deletedVideoUrls.Add(e.Data);
};
process.Exited += (sender, e) =>
{
deletedVideoUrls.CompleteAdding();
};
process.Start();
process.BeginOutputReadLine();
List<Task> workers = new List<Task>();
int workerCount = 4;
for (int i = 0; i < workerCount; i++)
{
workers.Add(Task.Run(async () =>
{
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
{
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
if (!string.IsNullOrEmpty(waybackUrl))
{
var x = await GetVideoDetails(waybackUrl);
if (!string.IsNullOrEmpty(x.title))
{
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration, false);
tracks.Add(track);
if (!Console.IsOutputRedirected)
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"Deleted videos processed: {tracks.Count}");
}
}
}
}
}));
}
await Task.WhenAll(workers);
process.WaitForExit();
deletedVideoUrls.CompleteAdding();
Console.WriteLine();
return tracks.ToList();
}
private async Task<string> GetOldestArchiveUrl(string url)
{
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
HttpResponseMessage response = null;
for (int i = 0; i < 3; i++)
{
try {
response = await _client.GetAsync(url2);
break;
}
catch (Exception e) { }
}
if (response == null) return null;
if (response.IsSuccessStatusCode)
{
var content = await response.Content.ReadAsStringAsync();
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
if (lines.Any())
{
var parts = lines[0].Split(" ");
var timestamp = parts[0];
var originalUrl = parts[1];
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
return oldestArchive;
}
}
return null;
}
public async Task<(string title, string uploader, int duration)> GetVideoDetails(string url)
{
var web = new HtmlWeb();
var doc = await web.LoadFromWebAsync(url);
var titlePatterns = new[]
{
"//h1[@id='video_title']",
"//meta[@name='title']",
};
var usernamePatterns = new[]
{
"//div[@id='userInfoDiv']/b/a",
"//a[contains(@class, 'contributor')]",
"//a[@id='watch-username']",
"//a[contains(@class, 'author')]",
"//div[@class='yt-user-info']/a",
"//div[@id='upload-info']//yt-formatted-string/a",
"//span[@itemprop='author']//link[@itemprop='name']",
"//a[contains(@class, 'yt-user-name')]",
};
string getItem(string[] patterns)
{
foreach (var pattern in patterns)
{
var node = doc.DocumentNode.SelectSingleNode(pattern);
var res = "";
if (node != null)
{
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
res = node.GetAttributeValue("content", "");
else
res = node.InnerText;
if (!string.IsNullOrEmpty(res)) return res;
}
}
return "";
}
int duration = -1;
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
if (node != null)
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
}
}
}

View file

@ -19,12 +19,13 @@
<ItemGroup>
<PackageReference Include="Goblinfactory.ProgressBar" Version="1.0.0" />
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945" />
<PackageReference Include="Soulseek" Version="6.1.1" />
<PackageReference Include="SpotifyAPI.Web" Version="7.0.0" />
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.0" />
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.63.0.3205" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
<PackageReference Include="Soulseek" Version="6.1.3" />
<PackageReference Include="SpotifyAPI.Web" Version="7.0.2" />
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.2" />
<PackageReference Include="TagLibSharp" Version="2.3.0" />
<PackageReference Include="YoutubeExplode" Version="6.2.12" />
<PackageReference Include="YoutubeExplode" Version="6.3.7" />
</ItemGroup>
</Project>