]> njoseph.me Git - nimcoon.git/commitdiff
Remove dependency on Invidious
authorJoseph Nuthalapati <njoseph@riseup.net>
Wed, 5 Aug 2020 02:05:55 +0000 (07:35 +0530)
committerJoseph Nuthalapati <njoseph@riseup.net>
Wed, 5 Aug 2020 02:05:55 +0000 (07:35 +0530)
YouTube search pages are now scraped directly.

Signed-off-by: Joseph Nuthalapati <njoseph@riseup.net>
src/lib.nim
src/nimcoon.nim
src/types.nim [new file with mode: 0644]
src/youtube.nim [new file with mode: 0644]

index 0f6ae154a249a7bf06fc36eaa1675444fb5878f9..e43cfd7033adc2f832b90a67fc248c2d86813006 100644 (file)
@@ -1,5 +1,4 @@
 import
-  htmlparser,
   httpClient,
   json,
   os,
@@ -8,22 +7,12 @@ import
   sequtils,
   std/[terminal],
   strformat,
-  strtabs,
   strutils,
-  sugar,
-  tables,
-  uri,
-  xmltree
+  tables
 
-import config
-
-
-type
-  Options* = Table[string, bool]
-  SearchResult* = tuple[title: string, url: string]
-  SearchResults* = seq[tuple[title: string, url: string]]
-  CommandLineOptions* = tuple[searchQuery: string, options: Options]
-  SelectionRange* = tuple[begin: int, until: int]
+import
+  config,
+  types
 
 
 let
@@ -44,13 +33,6 @@ proc selectMediaPlayer*(): string =
     return availablePlayers[0]
 
 
-proc getYoutubePage*(searchQuery: string): string =
-  let queryParam = encodeUrl(searchQuery)
-  let client = newHttpClient()
-  let response = get(client, &"https://invidious.snopyta.org/search?q={queryParam}")
-  $response.body
-
-
 proc getPeerTubeMagnetLink(url: string): string =
   ## Gets the magnet link of the best possible resolution from PeerTube
   let uuid = url.substr(find(url, PEERTUBE_REGEX) + "videos/watch/".len)
@@ -62,32 +44,13 @@ proc getPeerTubeMagnetLink(url: string): string =
   jsonNode["files"][0]["magnetUri"].getStr()
 
 
-func extractTitlesAndUrls*(html: string): SearchResults =
-  {.noSideEffect.}:
-    # TODO Pick an invidious instance from config. Using YouTube directly for now.
-    parseHtml(html).findAll("a").
-      filter(a => "watch" in a.attrs["href"] and len(a) == 1).
-      map(a => (innerText(a), "https://www.youtube.com" & a.attrs["href"]))
-
-
 proc presentVideoOptions*(searchResults: SearchResults) =
   eraseScreen()
   for index, (title, url) in searchResults:
     styledEcho $index, ". ", styleBright, fgMagenta, title, "\n", resetStyle, fgCyan, "   ", url, "\n"
 
 
-func isPlaylist(url: string): bool =
-  ##[ Identifies if video is part of a playlist.
-      Only YouTube playlists are supported for now. ]##
-  "www.youtube.com" in url and "&list=" in url
-
-
 func buildPlayerArgs(url: string, options: Table[string, bool], player: string): seq[string] =
-  let url =
-    # Playlists are only supported by MPV player. VLC needs a plugin.
-    if isPlaylist(url) and player == "mpv":
-      "https://www.youtube.com/playlist?" & url.split('&')[1]
-    else: url
   let musicOnly = if options["musicOnly"]: "--no-video" else: ""
   let fullScreen = if options["fullScreen"]: "--fullscreen" else: ""
   filterIt([url, musicOnly, fullScreen], it != "")
@@ -129,7 +92,7 @@ func rewriteInvidiousToYouTube(url: string): string =
     if rewriteInvidiousURLs: url.replace("invidio.us", "www.youtube.com") else: url
 
 
-func stripZshEscaping(url: string): string = url.replace("\\", "")
+func stripZshEscaping(url: string): string = url.strip(chars={'\\'})
 
 
 func sanitizeURL*(url: string): string =
index e860ebebfe897beb62c65a0c79e8b77662dd7229..f3b29c6d0a4892df888f3d67127f050cde403b21 100644 (file)
@@ -4,8 +4,11 @@ import
   strutils,
   tables
 
-import config
-import lib
+import
+  config,
+  lib,
+  types,
+  youtube
 
 
 proc parseArguments(): CommandLineOptions =
@@ -52,7 +55,7 @@ proc main() =
       directPlay(sanitizeURL(searchQuery), player, options)
     quit(0)
 
-  let searchResults = extractTitlesAndUrls(getYoutubePage(searchQuery))
+  let searchResults = getSearchResults(searchQuery)
   let numResults = min(limit, len(searchResults))
 
   present(searchResults, options, (0, numResults-1), player)
diff --git a/src/types.nim b/src/types.nim
new file mode 100644 (file)
index 0000000..ee7abc7
--- /dev/null
@@ -0,0 +1,8 @@
+import tables
+
+type
+  Options* = Table[string, bool]
+  SearchResult* = tuple[title: string, url: string]
+  SearchResults* = seq[tuple[title: string, url: string]]
+  CommandLineOptions* = tuple[searchQuery: string, options: Options]
+  SelectionRange* = tuple[begin: int, until: int]
diff --git a/src/youtube.nim b/src/youtube.nim
new file mode 100644 (file)
index 0000000..fd98d41
--- /dev/null
@@ -0,0 +1,42 @@
+import
+  httpClient,
+  json,
+  strformat,
+  strutils,
+  sequtils,
+  uri
+
+import types
+
+proc getYouTubePage(searchQuery: string): string =
+  let queryParam = encodeUrl(searchQuery)
+  let client = newHttpClient()
+  let response = get(client, &"https://www.youtube.com/results?search_query={queryParam}")
+  $response.body
+
+
+proc getSearchResults*(searchQuery: string): SearchResults =
+  let html = getYouTubePage(searchQuery)
+  let lines = html.split('\n').filterIt(it.contains("ytInitialData"))
+  let line = lines[0]
+  let jsonString = line.split('=', maxsplit=1)[1].strip().strip(chars={';'})
+  let jsonData = parseJson(jsonString)
+
+  let videos = jsonData["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"]
+
+  var searchResults: SearchResults = @[]
+
+  for video in videos:
+    if video.hasKey("videoRenderer"):
+      let title = ($video["videoRenderer"]["title"]["runs"][0]["text"]).strip(chars={'"'})
+      let videoId = ($video["videoRenderer"]["videoId"]).strip(chars={'"'})
+      let videoUrl = &"https://www.youtube.com/watch?v={videoId}"
+      searchResults.add((title, videoUrl))
+
+    elif video.hasKey("playlistRenderer"):
+      let title = ($video["playlistRenderer"]["title"]["simpleText"]).strip(chars={'"'})
+      let playlistId = ($video["playlistRenderer"]["playlistId"]).strip(chars={'"'})
+      let playlistUrl = &"https://www.youtube.com/playlist?list={playlistId}"
+      searchResults.add((title, playlistUrl))
+
+  searchResults