mirror of
https://github.com/fiso64/slsk-batchdl.git
synced 2024-12-22 14:32:40 +00:00
stuff
This commit is contained in:
parent
a76abef429
commit
f35f1e1e6c
5 changed files with 809 additions and 605 deletions
100
README.md
100
README.md
|
@ -2,32 +2,32 @@
|
|||
|
||||
A batch downloader for Soulseek using Soulseek.NET. Accepts CSV files and Spotify or YouTube urls.
|
||||
|
||||
##### Download tracks from a csv file:
|
||||
#### Download tracks from a csv file:
|
||||
```
|
||||
slsk-batchdl -i test.csv
|
||||
```
|
||||
Use `--print tracks` before downloading to check if everything has been parsed correctly. The names of the columns should be: `Artist`, `Title`, `Album`, `Length`. Only the title column is required, but any additional info improves search.
|
||||
|
||||
##### Download spotify likes while skipping existing songs and creating an m3u file:
|
||||
#### Download spotify likes while skipping existing songs:
|
||||
```
|
||||
slsk-batchdl -i spotify-likes --m3u --skip-existing
|
||||
slsk-batchdl -i spotify-likes --skip-existing
|
||||
```
|
||||
You might need to provide an id and secret when using spotify (e.g when downloading a private playlist), which you can get here https://developer.spotify.com/dashboard/applications. Create an app, then select it and add `http://localhost:48721/callback` as a redirect url in the settings.
|
||||
To download private playlists or liked songs you will need to provide a client id and secret, which you can get here https://developer.spotify.com/dashboard/applications. Create an app and add `http://localhost:48721/callback` as a redirect url in its settings.
|
||||
|
||||
##### Download the first 10 songs of a youtube playlist:
|
||||
#### Download youtube playlist (with fallback to yt-dlp), including deleted videos:
|
||||
```
|
||||
slsk-batchdl -n 10 -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
|
||||
slsk-batchdl --get-deleted --yt-dlp -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
|
||||
```
|
||||
To include unavailable videos, you will need to provide an api key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.
|
||||
Playlists are retrieved using the YoutubeExplode library which unfortunately doesn't always return all videos. You can use the official API by providing a key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.
|
||||
|
||||
##### Search & download a specific song, preferring high quality:
|
||||
#### Search & download a specific song:
|
||||
```
|
||||
slsk-batchdl -i "title=MC MENTAL @ HIS BEST,length=242" --pref-format "flac,wav"
|
||||
```
|
||||
|
||||
##### Find an artist's songs which aren't in your library:
|
||||
#### Find an artist's songs which aren't in your library:
|
||||
```
|
||||
slsk-batchdl -i "artist=MC MENTAL" -a --print tracks --skip-existing --music-dir "path\to\music"
|
||||
slsk-batchdl -i "artist=MC MENTAL" --aggregate --print tracks --skip-existing --music-dir "path\to\music"
|
||||
```
|
||||
|
||||
### Options:
|
||||
|
@ -48,47 +48,43 @@ Usage: slsk-batchdl -i <input> [OPTIONS]
|
|||
Title, Album, Length. Only the title column is required, but
|
||||
any extra info improves search results.
|
||||
|
||||
String for the track, album, or artist to search for:
|
||||
Can either be any typical search text like "Artist - Title"
|
||||
or a comma-separated list like "title=Song,artist=Artist"
|
||||
Available fields: title, artist, album, length (in seconds).
|
||||
Name of the track, album, or artist to search for:
|
||||
Can either be any typical search string or a comma-separated
|
||||
list like "title=Song Name,artist=Artist Name,length=215"
|
||||
Allowed properties are: title, artist, album, length (sec)
|
||||
|
||||
Options:
|
||||
--user <username> Soulseek username
|
||||
--pass <password> Soulseek password
|
||||
|
||||
--spotify Input is a spotify url (override automatic parsing)
|
||||
--spotify-id <id> spotify client ID (required for private playlists)
|
||||
--spotify-secret <secret> spotify client secret (required for private playlists)
|
||||
--spotify-id <id> spotify client ID
|
||||
--spotify-secret <secret> spotify client secret
|
||||
|
||||
--youtube Input is a youtube url (override automatic parsing)
|
||||
--youtube-key <key> Youtube data API key
|
||||
--get-deleted Attempt to retrieve titles of deleted videos from wayback
|
||||
machine. Requires yt-dlp.
|
||||
|
||||
--csv Input is a path to a local CSV (override automatic parsing)
|
||||
--time-format <format> Time format in Length column of the csv file (e.g h:m:s.ms
|
||||
for durations like 1:04:35.123). Default: s (seconds)
|
||||
for durations like 1:04:35.123). Default: s
|
||||
--yt-parse Enable if the csv file contains YouTube video titles and
|
||||
channel names; attempt to parse them into proper title and
|
||||
artist. If the the csv contains an "ID", "URL", or
|
||||
"Description" column then those will be used for parsing as
|
||||
well.
|
||||
channel names; attempt to parse them into title and artist
|
||||
names.
|
||||
|
||||
--string Input is a search string (override automatic parsing)
|
||||
-a --aggregate Instead of downloading a single track matching the search
|
||||
string, find and download all distinct songs associated with
|
||||
the provided artist, album, or track title. Search string must
|
||||
be a list of properties.
|
||||
-a --aggregate When input is a string: Instead of downloading a single
|
||||
track matching the search string, find and download all
|
||||
distinct songs associated with the provided artist, album,
|
||||
or track title. Input string must be a list of properties.
|
||||
--min-users-aggregate <num> Minimum number of users sharing a track before it is
|
||||
downloaded in aggregate mode. Setting it to 2 or more will
|
||||
significantly reduce false positives, but may introduce false
|
||||
negatives. Default: 1
|
||||
downloaded in aggregate mode. Setting it to higher values
|
||||
will significantly reduce false positives, but may introduce
|
||||
false negatives. Default: 2
|
||||
|
||||
-p --path <path> Where to place downloaded files
|
||||
-f --folder <name> Subfolder name
|
||||
-p --path <path> Download folder
|
||||
-f --folder <name> Subfolder name (default: playlist/csv name)
|
||||
-n --number <maxtracks> Download the first n tracks of a playlist
|
||||
-o --offset <offset> Skip a specified number of tracks
|
||||
--reverse Download tracks in reverse order
|
||||
--remove-from-playlist Remove downloaded tracks from playlist (spotify only)
|
||||
--remove-from-playlist Remove downloaded tracks from playlist (for spotify only)
|
||||
--name-format <format> Name format for downloaded tracks, e.g "{artist} - {title}"
|
||||
--m3u Create an m3u8 playlist file
|
||||
|
||||
|
@ -102,54 +98,50 @@ Options:
|
|||
--banned-users <list> Comma-separated list of users to ignore
|
||||
--danger-words <list> Comma-separated list of words that must appear in either
|
||||
both search result and track title or in neither of the
|
||||
two. Case-insensitive. (default:"mix, edit, dj, cover")
|
||||
two. Case-insensitive. (default:"remix, edit,cover")
|
||||
--pref-format <format> Preferred file format(s), comma-separated (default: mp3)
|
||||
--pref-length-tol <tol> Preferred length tolerance in seconds (default: 3)
|
||||
--pref-length-tol <tol> Preferred length tolerance in seconds (default: 2)
|
||||
--pref-min-bitrate <rate> Preferred minimum bitrate (default: 200)
|
||||
--pref-max-bitrate <rate> Preferred maximum bitrate (default: 2200)
|
||||
--pref-max-samplerate <rate> Preferred maximum sample rate (default: 96000)
|
||||
--pref-strict-title Prefer download if filename contains track title
|
||||
--pref-strict-artist Prefer download if filepath contains track artist
|
||||
--pref-banned-users <list> Comma-separated list of users to deprioritize
|
||||
--pref-danger-words <list> Comma-separated list of words that should appear in either
|
||||
both search result and track title or in neither of the
|
||||
two.
|
||||
two. (default: see github)
|
||||
|
||||
-s --skip-existing Skip if a track matching file conditions is found in the
|
||||
output folder or your music library (if provided)
|
||||
--skip-mode <mode> Sets the way the program checks if a track exists
|
||||
name: Use only filenames
|
||||
--skip-mode <mode> name: Use only filenames to check if a track exists
|
||||
name-precise (default): Use filenames and check conditions
|
||||
tag: Use file tags (slower)
|
||||
tag-precise: Use file tags and check file conditions
|
||||
--music-dir <path> Specify to skip downloading tracks found in a music library
|
||||
use with --skip-existing
|
||||
Use with --skip-existing
|
||||
--skip-not-found Skip searching for tracks that weren't found on Soulseek
|
||||
during the last run.
|
||||
--remove-ft Remove "ft." or "feat." and everything after from the
|
||||
track names before searching.
|
||||
--remove-brackets Remove text in square brackets from track names before
|
||||
searching.
|
||||
track names before searching
|
||||
--remove-regex <regex> Remove a regex from all track names and artist names
|
||||
--no-artist-search Perform a search without artist name if nothing was
|
||||
found. Only use for sources such as youtube or soundcloud
|
||||
where the "artist" could just be an uploader.
|
||||
--artist-search Also try to find track by searching for the artist only
|
||||
--no-regex-search <reg> Perform an additional search without a regex pattern
|
||||
--no-diacr-search Perform an additional search without diacritics
|
||||
-d --desperate Equivalent to enabling all additional searches, slower.
|
||||
--no-diacr-search Also perform a search without diacritics
|
||||
--no-regex-search <regex> Also perform a search without a regex pattern
|
||||
--yt-dlp Use yt-dlp to download tracks that weren't found on
|
||||
Soulseek. yt-dlp must be available from the command line.
|
||||
|
||||
--config <path> Specify config file location
|
||||
--search-timeout <ms> Max search time in ms (default: 6000)
|
||||
--max-stale-time <ms> Max download time without progress in ms (default: 50000)
|
||||
--concurrent-processes <num> Max concurrent searches & downloads (default: 2)
|
||||
--display <option> Changes how searches and downloads are displayed.
|
||||
single (default): Show transfer state and percentage.
|
||||
double: Also show a progress bar.
|
||||
simple: Only printing
|
||||
--concurrent-downloads <num> Max concurrent searches & downloads (default: 2)
|
||||
--display <option> Changes how searches and downloads are displayed:
|
||||
single (default): Show transfer state and percentage
|
||||
double: Transfer state and a large progress bar
|
||||
simple: No download bars or changing percentages
|
||||
|
||||
--print <option> Only print tracks or results instead of downloading.
|
||||
--print <option> Print tracks or search results instead of downloading:
|
||||
tracks: Print all tracks to be downloaded
|
||||
tracks-full: Print extended information about all tracks
|
||||
results: Print search results satisfying file conditions
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -85,7 +85,7 @@ public class Spotify
|
|||
public async Task<List<Track>> GetLikes(int max = int.MaxValue, int offset = 0)
|
||||
{
|
||||
if (!loggedIn)
|
||||
throw new Exception("Can't get liked music, not logged in");
|
||||
throw new Exception("Can't get liked music as user is not logged in");
|
||||
|
||||
List<Track> res = new List<Track>();
|
||||
int limit = Math.Min(max, 50);
|
||||
|
|
|
@ -3,7 +3,12 @@ using Google.Apis.Services;
|
|||
using System.Xml;
|
||||
using YoutubeExplode;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
using YoutubeExplode.Common;
|
||||
using System.Diagnostics;
|
||||
using HtmlAgilityPack;
|
||||
using System.Text;
|
||||
using System.Threading.Channels;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
public static class YouTube
|
||||
{
|
||||
|
@ -67,10 +72,10 @@ public static class YouTube
|
|||
break;
|
||||
}
|
||||
|
||||
if (tracksDict.Count >= 200)
|
||||
if (tracksDict.Count >= 200 && !Console.IsOutputRedirected)
|
||||
{
|
||||
Console.SetCursorPosition(0, Console.CursorTop);
|
||||
Console.Write(tracks.Count);
|
||||
Console.Write($"Loaded: {tracks.Count}");
|
||||
}
|
||||
|
||||
playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken;
|
||||
|
@ -93,30 +98,6 @@ public static class YouTube
|
|||
|
||||
title = title.Replace("–", "-");
|
||||
|
||||
var stringsToRemove = new string[] { "(Official music video)", "(Official video)", "(Official audio)",
|
||||
"(Lyrics)", "(Official)", "(Lyric Video)", "(Official Lyric Video)", "(Official HD Video)",
|
||||
"(Official 4K Video)", "(Video)", "[HD]", "[4K]", "(Original Mix)", "(Lyric)", "(Music Video)",
|
||||
"(Visualizer)", "(Audio)", "Official Lyrics" };
|
||||
|
||||
foreach (string s in stringsToRemove)
|
||||
{
|
||||
var t = title;
|
||||
title = Regex.Replace(title, Regex.Escape(s), "", RegexOptions.IgnoreCase);
|
||||
if (t == title)
|
||||
{
|
||||
if (s.Contains("["))
|
||||
{
|
||||
string s2 = s.Replace("[", "(").Replace("]", ")");
|
||||
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
|
||||
}
|
||||
else if (s.Contains("("))
|
||||
{
|
||||
string s2 = s.Replace("(", "[").Replace(")", "]");
|
||||
title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var trackTitle = title.Trim();
|
||||
trackTitle = Regex.Replace(trackTitle, @"\s+", " ");
|
||||
var artist = uploader.Trim();
|
||||
|
@ -297,4 +278,159 @@ public static class YouTube
|
|||
var playlist = await youtube.Playlists.GetAsync(url);
|
||||
return playlist.Id.ToString();
|
||||
}
|
||||
|
||||
public class YouTubeArchiveRetriever
|
||||
{
|
||||
private HttpClient _client;
|
||||
|
||||
public YouTubeArchiveRetriever()
|
||||
{
|
||||
_client = new HttpClient();
|
||||
_client.Timeout = TimeSpan.FromSeconds(10);
|
||||
}
|
||||
|
||||
public async Task<List<Track>> RetrieveDeleted(string url)
|
||||
{
|
||||
var deletedVideoUrls = new BlockingCollection<string>();
|
||||
var tracks = new ConcurrentBag<Track>();
|
||||
|
||||
var process = new Process()
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "yt-dlp",
|
||||
Arguments = $"--ignore-no-formats-error --no-warn --match-filter \"!uploader\" --print webpage_url {url}",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
}
|
||||
};
|
||||
process.EnableRaisingEvents = true;
|
||||
bool ok = false;
|
||||
process.OutputDataReceived += (sender, e) =>
|
||||
{
|
||||
if (!ok) { Console.WriteLine("Got first video"); ok = true; }
|
||||
deletedVideoUrls.Add(e.Data);
|
||||
};
|
||||
process.Exited += (sender, e) =>
|
||||
{
|
||||
deletedVideoUrls.CompleteAdding();
|
||||
};
|
||||
|
||||
process.Start();
|
||||
process.BeginOutputReadLine();
|
||||
|
||||
List<Task> workers = new List<Task>();
|
||||
int workerCount = 4;
|
||||
for (int i = 0; i < workerCount; i++)
|
||||
{
|
||||
workers.Add(Task.Run(async () =>
|
||||
{
|
||||
foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
|
||||
{
|
||||
var waybackUrl = await GetOldestArchiveUrl(videoUrl);
|
||||
if (!string.IsNullOrEmpty(waybackUrl))
|
||||
{
|
||||
var x = await GetVideoDetails(waybackUrl);
|
||||
if (!string.IsNullOrEmpty(x.title))
|
||||
{
|
||||
var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration, false);
|
||||
tracks.Add(track);
|
||||
if (!Console.IsOutputRedirected)
|
||||
{
|
||||
Console.SetCursorPosition(0, Console.CursorTop);
|
||||
Console.Write($"Deleted videos processed: {tracks.Count}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
await Task.WhenAll(workers);
|
||||
process.WaitForExit();
|
||||
deletedVideoUrls.CompleteAdding();
|
||||
Console.WriteLine();
|
||||
return tracks.ToList();
|
||||
}
|
||||
|
||||
private async Task<string> GetOldestArchiveUrl(string url)
|
||||
{
|
||||
var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
|
||||
HttpResponseMessage response = null;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
try {
|
||||
response = await _client.GetAsync(url2);
|
||||
break;
|
||||
}
|
||||
catch (Exception e) { }
|
||||
}
|
||||
if (response == null) return null;
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var content = await response.Content.ReadAsStringAsync();
|
||||
var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
|
||||
if (lines.Any())
|
||||
{
|
||||
var parts = lines[0].Split(" ");
|
||||
var timestamp = parts[0];
|
||||
var originalUrl = parts[1];
|
||||
var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
|
||||
return oldestArchive;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public async Task<(string title, string uploader, int duration)> GetVideoDetails(string url)
|
||||
{
|
||||
var web = new HtmlWeb();
|
||||
var doc = await web.LoadFromWebAsync(url);
|
||||
|
||||
var titlePatterns = new[]
|
||||
{
|
||||
"//h1[@id='video_title']",
|
||||
"//meta[@name='title']",
|
||||
};
|
||||
|
||||
var usernamePatterns = new[]
|
||||
{
|
||||
"//div[@id='userInfoDiv']/b/a",
|
||||
"//a[contains(@class, 'contributor')]",
|
||||
"//a[@id='watch-username']",
|
||||
"//a[contains(@class, 'author')]",
|
||||
"//div[@class='yt-user-info']/a",
|
||||
"//div[@id='upload-info']//yt-formatted-string/a",
|
||||
"//span[@itemprop='author']//link[@itemprop='name']",
|
||||
"//a[contains(@class, 'yt-user-name')]",
|
||||
};
|
||||
|
||||
string getItem(string[] patterns)
|
||||
{
|
||||
foreach (var pattern in patterns)
|
||||
{
|
||||
var node = doc.DocumentNode.SelectSingleNode(pattern);
|
||||
var res = "";
|
||||
if (node != null)
|
||||
{
|
||||
if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
|
||||
res = node.GetAttributeValue("content", "");
|
||||
else
|
||||
res = node.InnerText;
|
||||
if (!string.IsNullOrEmpty(res)) return res;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
int duration = -1;
|
||||
var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
|
||||
if (node != null)
|
||||
duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
|
||||
|
||||
return (getItem(titlePatterns), getItem(usernamePatterns), duration);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,12 +19,13 @@
|
|||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Goblinfactory.ProgressBar" Version="1.0.0" />
|
||||
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945" />
|
||||
<PackageReference Include="Soulseek" Version="6.1.1" />
|
||||
<PackageReference Include="SpotifyAPI.Web" Version="7.0.0" />
|
||||
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.0" />
|
||||
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.63.0.3205" />
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
|
||||
<PackageReference Include="Soulseek" Version="6.1.3" />
|
||||
<PackageReference Include="SpotifyAPI.Web" Version="7.0.2" />
|
||||
<PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.2" />
|
||||
<PackageReference Include="TagLibSharp" Version="2.3.0" />
|
||||
<PackageReference Include="YoutubeExplode" Version="6.2.12" />
|
||||
<PackageReference Include="YoutubeExplode" Version="6.3.7" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
Loading…
Reference in a new issue