stuff

2024-12-22 22:42:41 +00:00 · 2023-11-15 22:45:51 +01:00 · 2023-11-15 22:45:51 +01:00 · f35f1e1e6c
commit f35f1e1e6c
parent a76abef429
5 changed files with 809 additions and 605 deletions
--- a/README.md
+++ b/README.md
@ -2,32 +2,32 @@
 A batch downloader for Soulseek using Soulseek.NET. Accepts CSV files and Spotify or YouTube urls.
-##### Download tracks from a csv file:
+#### Download tracks from a csv file:
 ```
 slsk-batchdl -i test.csv
 ```  
 Use `--print tracks` before downloading to check if everything has been parsed correctly. The names of the columns should be: `Artist`, `Title`, `Album`, `Length`. Only the title column is required, but any additional info improves search.
-##### Download spotify likes while skipping existing songs and creating an m3u file:
+#### Download spotify likes while skipping existing songs:
 ```
-slsk-batchdl -i spotify-likes --m3u --skip-existing
+slsk-batchdl -i spotify-likes --skip-existing
 ```
-You might need to provide an id and secret when using spotify (e.g when downloading a private playlist), which you can get here https://developer.spotify.com/dashboard/applications. Create an app, then select it and add `http://localhost:48721/callback` as a redirect url in the settings.  
+To download private playlists or liked songs you will need to provide a client id and secret, which you can get here https://developer.spotify.com/dashboard/applications. Create an app and add `http://localhost:48721/callback` as a redirect url in its settings.  
-##### Download the first 10 songs of a youtube playlist:
+#### Download youtube playlist (with fallback to yt-dlp), including deleted videos:
 ```
-slsk-batchdl -n 10 -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
+slsk-batchdl --get-deleted --yt-dlp -i "https://www.youtube.com/playlist?list=PLI_eFW8NAFzYAXZ5DrU6E6mQ_XfhaLBUX"
 ```
-To include unavailable videos, you will need to provide an api key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.  
+Playlists are retrieved using the YoutubeExplode library which unfortunately doesn't always return all videos. You can use the official API by providing a key with `--youtube-key`. Get it here https://console.cloud.google.com. Create a new project, click "Enable Api" and search for "youtube data", then follow the prompts.  
-##### Search & download a specific song, preferring high quality:
+#### Search & download a specific song:
 ```
 slsk-batchdl -i "title=MC MENTAL @ HIS BEST,length=242" --pref-format "flac,wav"
 ```  
-##### Find an artist's songs which aren't in your library:
+#### Find an artist's songs which aren't in your library:
 ```
-slsk-batchdl -i "artist=MC MENTAL" -a --print tracks --skip-existing --music-dir "path\to\music"
+slsk-batchdl -i "artist=MC MENTAL" --aggregate --print tracks --skip-existing --music-dir "path\to\music"
 ```
 ### Options:
@ -48,47 +48,43 @@ Usage: slsk-batchdl -i <input> [OPTIONS]
                                 Title, Album, Length. Only the title column is required, but
                                 any extra info improves search results.
-                                 String for the track, album, or artist to search for:
+                                 Name of the track, album, or artist to search for:
-                                 Can either be any typical search text like "Artist - Title"
+                                 Can either be any typical search string or a comma-separated
-                                 or a comma-separated list like "title=Song,artist=Artist"
+                                 list like "title=Song Name,artist=Artist Name,length=215"
-                                 Available fields: title, artist, album, length (in seconds).
+                                 Allowed properties are: title, artist, album, length (sec)
 Options:
  --user <username>              Soulseek username
  --pass <password>              Soulseek password
-  --spotify                      Input is a spotify url (override automatic parsing)
+  --spotify-id <id>              spotify client ID
-  --spotify-id <id>              spotify client ID (required for private playlists)
+  --spotify-secret <secret>      spotify client secret
  --spotify-secret <secret>      spotify client secret (required for private playlists)
  --youtube                      Input is a youtube url (override automatic parsing)
  --youtube-key <key>            Youtube data API key
  --get-deleted                  Attempt to retrieve titles of deleted videos from wayback
                                 machine. Requires yt-dlp.
  --csv                          Input is a path to a local CSV (override automatic parsing)
  --time-format <format>         Time format in Length column of the csv file (e.g h:m:s.ms
-                                 for durations like 1:04:35.123). Default: s (seconds)
+                                 for durations like 1:04:35.123). Default: s
  --yt-parse                     Enable if the csv file contains YouTube video titles and
-                                 channel names; attempt to parse them into proper title and
+                                 channel names; attempt to parse them into title and artist
-                                 artist. If the the csv contains an "ID", "URL", or
+                                 names.
                                 "Description" column then those will be used for parsing as
                                 well.
-  --string                       Input is a search string (override automatic parsing)
+  -a --aggregate                 When input is a string: Instead of downloading a single
-  -a --aggregate                 Instead of downloading a single track matching the search
+                                 track matching the search string, find and download all
-                                 string, find and download all distinct songs associated with
+                                 distinct songs associated with the provided artist, album,
-                                 the provided artist, album, or track title. Search string must
+                                 or track title. Input string must be a list of properties.
                                 be a list of properties.
  --min-users-aggregate <num>    Minimum number of users sharing a track before it is
-                                 downloaded in aggregate mode. Setting it to 2 or more will
+                                 downloaded in aggregate mode. Setting it to higher values
-                                 significantly reduce false positives, but may introduce false
+                                 will significantly reduce false positives, but may introduce
-                                 negatives. Default: 1
+                                 false negatives. Default: 2
-  -p --path <path>               Where to place downloaded files
+  -p --path <path>               Download folder
-  -f --folder <name>             Subfolder name
+  -f --folder <name>             Subfolder name (default: playlist/csv name)
  -n --number <maxtracks>        Download the first n tracks of a playlist
  -o --offset <offset>           Skip a specified number of tracks
  --reverse                      Download tracks in reverse order
-  --remove-from-playlist         Remove downloaded tracks from playlist (spotify only)
+  --remove-from-playlist         Remove downloaded tracks from playlist (for spotify only)
  --name-format <format>         Name format for downloaded tracks, e.g "{artist} - {title}"
  --m3u                          Create an m3u8 playlist file
@ -102,54 +98,50 @@ Options:
  --banned-users <list>          Comma-separated list of users to ignore
  --danger-words <list>          Comma-separated list of words that must appear in either
                                 both search result and track title or in neither of the
-                                 two. Case-insensitive. (default:"mix, edit, dj, cover")
+                                 two. Case-insensitive. (default:"remix, edit,cover")
  --pref-format <format>         Preferred file format(s), comma-separated (default: mp3)
-  --pref-length-tol <tol>        Preferred length tolerance in seconds (default: 3)
+  --pref-length-tol <tol>        Preferred length tolerance in seconds (default: 2)
  --pref-min-bitrate <rate>      Preferred minimum bitrate (default: 200)
  --pref-max-bitrate <rate>      Preferred maximum bitrate (default: 2200)
  --pref-max-samplerate <rate>   Preferred maximum sample rate (default: 96000)
  --pref-strict-title            Prefer download if filename contains track title
  --pref-strict-artist           Prefer download if filepath contains track artist
  --pref-banned-users <list>     Comma-separated list of users to deprioritize
  --pref-danger-words <list>     Comma-separated list of words that should appear in either
                                 both search result and track title or in neither of the
-                                 two.
+                                 two. (default: see github)
  -s --skip-existing             Skip if a track matching file conditions is found in the
                                 output folder or your music library (if provided)
-  --skip-mode <mode>             Sets the way the program checks if a track exists
+  --skip-mode <mode>             name: Use only filenames to check if a track exists
                                 name: Use only filenames
                                 name-precise (default): Use filenames and check conditions
                                 tag: Use file tags (slower)
                                 tag-precise: Use file tags and check file conditions
  --music-dir <path>             Specify to skip downloading tracks found in a music library
-                                 use with --skip-existing
+                                 Use with --skip-existing
  --skip-not-found               Skip searching for tracks that weren't found on Soulseek
                                 during the last run.
  --remove-ft                    Remove "ft." or "feat." and everything after from the
-                                 track names before searching.
+                                 track names before searching
-  --remove-brackets              Remove text in square brackets from track names before
+  --remove-regex <regex>         Remove a regex from all track names and artist names
                                 searching.
  --no-artist-search             Perform a search without artist name if nothing was
                                 found. Only use for sources such as youtube or soundcloud
                                 where the "artist" could just be an uploader.
  --artist-search                Also try to find track by searching for the artist only
-  --no-regex-search <reg>        Perform an additional search without a regex pattern
+  --no-diacr-search              Also perform a search without diacritics
-  --no-diacr-search              Perform an additional search without diacritics
+  --no-regex-search <regex>      Also perform a search without a regex pattern
  -d --desperate                 Equivalent to enabling all additional searches, slower.
  --yt-dlp                       Use yt-dlp to download tracks that weren't found on
                                 Soulseek. yt-dlp must be available from the command line.
  --config <path>                Specify config file location
  --search-timeout <ms>          Max search time in ms (default: 6000)
  --max-stale-time <ms>          Max download time without progress in ms (default: 50000)
-  --concurrent-processes <num>   Max concurrent searches & downloads (default: 2)
+  --concurrent-downloads <num>   Max concurrent searches & downloads (default: 2)
-  --display <option>             Changes how searches and downloads are displayed.
+  --display <option>             Changes how searches and downloads are displayed:
-                                 single (default): Show transfer state and percentage.
+                                 single (default): Show transfer state and percentage
-                                 double: Also show a progress bar.
+                                 double: Transfer state and a large progress bar
-                                 simple: Only printing
+                                 simple: No download bars or changing percentages
-  --print <option>               Only print tracks or results instead of downloading.
+  --print <option>               Print tracks or search results instead of downloading:
                                 tracks: Print all tracks to be downloaded
                                 tracks-full: Print extended information about all tracks
                                 results: Print search results satisfying file conditions
--- a/slsk-batchdl/Program.cs
+++ b/slsk-batchdl/Program.cs
--- a/slsk-batchdl/Spotify.cs
+++ b/slsk-batchdl/Spotify.cs
@ -85,7 +85,7 @@ public class Spotify
    public async Task<List<Track>> GetLikes(int max = int.MaxValue, int offset = 0)
    {
        if (!loggedIn)
-            throw new Exception("Can't get liked music, not logged in");
+            throw new Exception("Can't get liked music as user is not logged in");
        List<Track> res = new List<Track>();
        int limit = Math.Min(max, 50);
--- a/slsk-batchdl/YouTube.cs
+++ b/slsk-batchdl/YouTube.cs
@ -3,7 +3,12 @@ using Google.Apis.Services;
 using System.Xml;
 using YoutubeExplode;
 using System.Text.RegularExpressions;
-
+using YoutubeExplode.Common;
 using System.Diagnostics;
 using HtmlAgilityPack;
 using System.Text;
 using System.Threading.Channels;
 using System.Collections.Concurrent;
 public static class YouTube
 {
@ -67,10 +72,10 @@ public static class YouTube
                    break;
            }
-            if (tracksDict.Count >= 200)
+            if (tracksDict.Count >= 200 && !Console.IsOutputRedirected)
            {
                Console.SetCursorPosition(0, Console.CursorTop);
-                Console.Write(tracks.Count);
+                Console.Write($"Loaded: {tracks.Count}");
            }
            playlistItemsRequest.PageToken = playlistItemsResponse.NextPageToken;
@ -93,30 +98,6 @@ public static class YouTube
        title = title.Replace("–", "-");
        var stringsToRemove = new string[] { "(Official music video)", "(Official video)", "(Official audio)",
                    "(Lyrics)", "(Official)", "(Lyric Video)", "(Official Lyric Video)", "(Official HD Video)",
                    "(Official 4K Video)", "(Video)", "[HD]", "[4K]", "(Original Mix)", "(Lyric)", "(Music Video)", 
                    "(Visualizer)", "(Audio)", "Official Lyrics" };
        foreach (string s in stringsToRemove)
        {
            var t = title;
            title = Regex.Replace(title, Regex.Escape(s), "", RegexOptions.IgnoreCase);
            if (t == title)
            {
                if (s.Contains("["))
                {
                    string s2 = s.Replace("[", "(").Replace("]", ")");
                    title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
                }
                else if (s.Contains("("))
                {
                    string s2 = s.Replace("(", "[").Replace(")", "]");
                    title = Regex.Replace(title, Regex.Escape(s2), "", RegexOptions.IgnoreCase);
                }
            }
        }
        var trackTitle = title.Trim();
        trackTitle = Regex.Replace(trackTitle, @"\s+", " ");
        var artist = uploader.Trim();
@ -297,4 +278,159 @@ public static class YouTube
        var playlist = await youtube.Playlists.GetAsync(url);
        return playlist.Id.ToString();
    }
    public class YouTubeArchiveRetriever
    {
        private HttpClient _client;
        public YouTubeArchiveRetriever()
        {
            _client = new HttpClient();
            _client.Timeout = TimeSpan.FromSeconds(10);
        }
        public async Task<List<Track>> RetrieveDeleted(string url)
        {
            var deletedVideoUrls = new BlockingCollection<string>();
            var tracks = new ConcurrentBag<Track>();
            var process = new Process()
            {
                StartInfo = new ProcessStartInfo
                {
                    FileName = "yt-dlp",
                    Arguments = $"--ignore-no-formats-error --no-warn --match-filter \"!uploader\" --print webpage_url {url}",
                    RedirectStandardOutput = true,
                    UseShellExecute = false,
                    CreateNoWindow = true,
                }
            };
            process.EnableRaisingEvents = true;
            bool ok = false;
            process.OutputDataReceived += (sender, e) =>
            {
                if (!ok) { Console.WriteLine("Got first video"); ok = true; }
                deletedVideoUrls.Add(e.Data);
            };
            process.Exited += (sender, e) =>
            {
                deletedVideoUrls.CompleteAdding();
            };
            process.Start();
            process.BeginOutputReadLine();
            List<Task> workers = new List<Task>();
            int workerCount = 4;
            for (int i = 0; i < workerCount; i++)
            {
                workers.Add(Task.Run(async () =>
                {
                    foreach (var videoUrl in deletedVideoUrls.GetConsumingEnumerable())
                    {
                        var waybackUrl = await GetOldestArchiveUrl(videoUrl);
                        if (!string.IsNullOrEmpty(waybackUrl))
                        {
                            var x = await GetVideoDetails(waybackUrl);
                            if (!string.IsNullOrEmpty(x.title))
                            {
                                var track = await ParseTrackInfo(x.title, x.uploader, waybackUrl, x.duration, false);
                                tracks.Add(track);
                                if (!Console.IsOutputRedirected)
                                {
                                    Console.SetCursorPosition(0, Console.CursorTop);
                                    Console.Write($"Deleted videos processed: {tracks.Count}");
                                }
                            }
                        }
                    }
                }));
            }
            await Task.WhenAll(workers);
            process.WaitForExit();
            deletedVideoUrls.CompleteAdding();
            Console.WriteLine();
            return tracks.ToList();
        }
        private async Task<string> GetOldestArchiveUrl(string url)
        {
            var url2 = $"http://web.archive.org/cdx/search/cdx?url={url}&fl=timestamp,original&filter=statuscode:200&sort=timestamp:asc&limit=1";
            HttpResponseMessage response = null;
            for (int i = 0; i < 3; i++)
            {
                try {
                    response = await _client.GetAsync(url2);
                    break;
                }
                catch (Exception e) { }
            }
            if (response == null) return null;
            if (response.IsSuccessStatusCode)
            {
                var content = await response.Content.ReadAsStringAsync();
                var lines = content.Split("\n").Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
                if (lines.Any())
                {
                    var parts = lines[0].Split(" ");
                    var timestamp = parts[0];
                    var originalUrl = parts[1];
                    var oldestArchive = $"http://web.archive.org/web/{timestamp}/{originalUrl}";
                    return oldestArchive;
                }
            }
            return null;
        }
        public async Task<(string title, string uploader, int duration)> GetVideoDetails(string url)
        {
            var web = new HtmlWeb();
            var doc = await web.LoadFromWebAsync(url);
            var titlePatterns = new[]
            {
                "//h1[@id='video_title']",
                "//meta[@name='title']",
            };
            var usernamePatterns = new[]
            {
                "//div[@id='userInfoDiv']/b/a",
                "//a[contains(@class, 'contributor')]",
                "//a[@id='watch-username']",
                "//a[contains(@class, 'author')]",
                "//div[@class='yt-user-info']/a",
                "//div[@id='upload-info']//yt-formatted-string/a",
                "//span[@itemprop='author']//link[@itemprop='name']",
                "//a[contains(@class, 'yt-user-name')]",
            };
            string getItem(string[] patterns)
            {
                foreach (var pattern in patterns)
                {
                    var node = doc.DocumentNode.SelectSingleNode(pattern);
                    var res = "";
                    if (node != null)
                    {
                        if (pattern.StartsWith("//meta") || pattern.Contains("@itemprop"))
                            res = node.GetAttributeValue("content", "");
                        else
                            res = node.InnerText;
                        if (!string.IsNullOrEmpty(res)) return res;
                    }
                }
                return "";
            }
            int duration = -1;
            var node = doc.DocumentNode.SelectSingleNode("//meta[@itemprop='duration']");
            if (node != null)
                duration = (int)XmlConvert.ToTimeSpan(node.GetAttributeValue("content", "")).TotalSeconds;
            return (getItem(titlePatterns), getItem(usernamePatterns), duration);
        }
    }
 }
--- a/slsk-batchdl/slsk-batchdl.csproj
+++ b/slsk-batchdl/slsk-batchdl.csproj
@ -19,12 +19,13 @@
  <ItemGroup>
    <PackageReference Include="Goblinfactory.ProgressBar" Version="1.0.0" />
-    <PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945" />
+    <PackageReference Include="Google.Apis.YouTube.v3" Version="1.63.0.3205" />
-    <PackageReference Include="Soulseek" Version="6.1.1" />
+    <PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
-    <PackageReference Include="SpotifyAPI.Web" Version="7.0.0" />
+    <PackageReference Include="Soulseek" Version="6.1.3" />
-    <PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.0" />
+    <PackageReference Include="SpotifyAPI.Web" Version="7.0.2" />
    <PackageReference Include="SpotifyAPI.Web.Auth" Version="7.0.2" />
    <PackageReference Include="TagLibSharp" Version="2.3.0" />
-    <PackageReference Include="YoutubeExplode" Version="6.2.12" />
+    <PackageReference Include="YoutubeExplode" Version="6.3.7" />
  </ItemGroup>
 </Project>