From ec39ea48be6a76d3b257d8630a03cc5bc5d78483 Mon Sep 17 00:00:00 2001 From: Brett Spaulding Date: Tue, 13 Aug 2024 11:53:54 -0400 Subject: [PATCH] [FIX] php: Handling for albums section if no link --- .../app/Http/Controllers/ApiController.php | 20 +++-- php/src/app/Models/ArtistQueue.php | 8 +- php/src/app/Models/WebScraper.php | 73 +++++++++++++------ php/src/public/js/app.js | 5 +- php/src/routes/console.php | 1 - python/app.py | 15 ++-- 6 files changed, 83 insertions(+), 39 deletions(-) diff --git a/php/src/app/Http/Controllers/ApiController.php b/php/src/app/Http/Controllers/ApiController.php index 12e8088..4c04b72 100644 --- a/php/src/app/Http/Controllers/ApiController.php +++ b/php/src/app/Http/Controllers/ApiController.php @@ -2,6 +2,7 @@ namespace App\Http\Controllers; +use Illuminate\Support\Facades\Artisan; use App\Models\AlbumQueue; use App\Models\Artist; use App\Models\WebDriver; @@ -56,6 +57,8 @@ class ApiController extends Controller public function queue_artist_run() { + \Log::info('==========================='); + \Log::info('Queue running for Artists..'); Artisan::queue('app:process-artist-queue'); } @@ -79,15 +82,18 @@ class ApiController extends Controller public function queue_waiting() { + \Log::info('==========================='); + \Log::info('Queue running for Albums..'); + $data = array('queue' => false); $queue = AlbumQueue::where('state', 'pending')->first(); - $album = $queue->album; - $artist = $album->artist; + if (!is_null($queue)) { + $album = $queue->album; + $artist = $album->artist; + $queue->state = 'in_progress'; + $queue->save(); + $data = array('queue' => $queue->toArray(), 'album' => $album->toArray(), 'artist' => $artist->toArray()); - \Log::info('======================'); - \Log::info('Queue running for album: ' . $album->name); - $queue->state = 'in_progress'; - $queue->save(); - $data = array('queue' => $queue->toArray(), 'album' => $album->toArray(), 'artist' => $artist->toArray()); + } return json_encode($data); } diff --git a/php/src/app/Models/ArtistQueue.php b/php/src/app/Models/ArtistQueue.php index 0a152e8..ca3f7cb 100644 --- a/php/src/app/Models/ArtistQueue.php +++ b/php/src/app/Models/ArtistQueue.php @@ -24,12 +24,17 @@ class ArtistQueue extends Model return $result; } + public function artist() + { + return $this->belongsTo(Artist::class); + } + public function process_artist() { // Scrape the artist page for image, and album data (image, url, name) $driver = WebDriver::setUp(); - $artist_id = Artist::where('id', $this->artist_id)->get()->first(); + $artist_id = $this->artist; if ($artist_id->count() > 0) { try { $album_count = WebScraper::scrapeAlbums($driver, $artist_id); @@ -38,6 +43,7 @@ class ArtistQueue extends Model } catch (Exception $e) { \Log::warning('Failed to scrape albums: ' . $e->getMessage()); } finally { + $artist_id->change_state('done'); $driver->quit(); } } else { diff --git a/php/src/app/Models/WebScraper.php b/php/src/app/Models/WebScraper.php index 6433c9a..5db760f 100644 --- a/php/src/app/Models/WebScraper.php +++ b/php/src/app/Models/WebScraper.php @@ -101,6 +101,29 @@ class WebScraper return $response; } + public static function processAlbums($albumContainer, $artist) + { + $albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a')); + $albumHref = $albumLink->getAttribute('href'); + $albumTitle = $albumLink->getAttribute('title'); + $albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src'); + + // Resize image and save to file, provide path to data + $imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail); + $imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album'); + + $data = [ + 'name' => $albumTitle, + 'artist_id' => $artist->id, + 'thumbnail' => $albumThumbnail, + 'url_remote' => $albumHref, + 'image' => $imageFileUrl, + ]; + $album_id = Album::findOrCreateByName($artist, $albumTitle, $data); + $album_queue = new AlbumQueue(); + $album_queue->enqueue($album_id); + } + /** * Scrape the album data from given artist page, create new album records and queue those records for download * @@ -112,9 +135,11 @@ class WebScraper $driver->get($url); $response = 0; try { - $albumBtn = $driver->findElement(WebDriverBy::xpath('//a[text()="Albums"]')); + \Log::info('Looking for Albums button..'); + $albumBtn = $driver->findElements(WebDriverBy::xpath('//a[text()="Albums"]')); if ($albumBtn) { - $albumBtn->click(); + \Log::info('Clicking on located Albums button..'); + $albumBtn[0]->click(); sleep(3); $itemsContainer = $driver->findElements(WebDriverBy::cssSelector('#items')); foreach ($itemsContainer as $item) { @@ -122,29 +147,33 @@ class WebScraper if ($albumContainers) { foreach ($albumContainers as $albumContainer) { $response += 1; - $albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a')); - $albumHref = $albumLink->getAttribute('href'); - $albumTitle = $albumLink->getAttribute('title'); - $albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src'); - - // Resize image and save to file, provide path to data - $imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail); - $imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album'); - - $data = [ - 'name' => $albumTitle, - 'artist_id' => $artist_id->id, - 'thumbnail' => $albumThumbnail, - 'url_remote' => $albumHref, - 'image' => $imageFileUrl, - ]; - $album_id = Album::findOrCreateByName($artist_id, $albumTitle, $data); - - $album_queue = new AlbumQueue(); - $album_queue->enqueue($album_id); + WebScraper::processAlbums($albumContainer, $artist_id); } } } + } else { + \Log::info('Could not locate Albums button'); + + $ytRows = $driver->findElements(WebDriverBy::cssSelector('ytmusic-carousel-shelf-renderer')); + foreach ($ytRows as $ytRow) { + $contentGroup = $ytRow->findElements(WebDriverBy::cssSelector('#content-group')); + foreach ($contentGroup as $group) { + $groupName = $group->getText(); + if ($groupName == 'Albums') { + $itemsContainer = $ytRow->findElements(WebDriverBy::cssSelector('#items')); + foreach ($itemsContainer as $item) { + $albumContainers = $item->findElements(WebDriverBy::cssSelector('ytmusic-two-row-item-renderer')); + if ($albumContainers) { + foreach ($albumContainers as $albumContainer) { + WebScraper::processAlbums($albumContainer, $artist_id); + } + } + } + } + } + + } + } } catch (\Exception $e) { \Log::warning('Failed to scrape albums: ---------'); diff --git a/php/src/public/js/app.js b/php/src/public/js/app.js index 094d6d2..37909ff 100644 --- a/php/src/public/js/app.js +++ b/php/src/public/js/app.js @@ -160,9 +160,10 @@ $(document).ready(function () { { data: 'id', orderable: false, render: (data, type, row) => { let stateDiable = row.state === 'in_progress' ? 'disabled' : ''; - let stateClass = row.state === 'in_progress' ? '' : 'btn-primary'; + let stateClass = row.state === 'done' ? 'btn-success' : 'btn-primary'; let artist_name = row.name; - return `` + let button_icon = row.state === 'done' ? '' : ''; + return `` } } ], diff --git a/php/src/routes/console.php b/php/src/routes/console.php index 45dc913..eb67c83 100644 --- a/php/src/routes/console.php +++ b/php/src/routes/console.php @@ -7,4 +7,3 @@ Artisan::command('inspire', function () { $this->comment(Inspiring::quote()); })->purpose('Display an inspiring quote')->hourly(); -Artisan::command('app:process-artist-queue')->everyMinute(); diff --git a/python/app.py b/python/app.py index 61fcaa2..2e235a5 100644 --- a/python/app.py +++ b/python/app.py @@ -13,25 +13,28 @@ app = Flask(__name__) redis = Redis(host='redis', port=6379) -# def process_artist_queue(): -# requests.get('http://nginx/api/queue/artists/run') -# return +def process_artist_queue(): + print('Running Artist Queue process..') + print('---') + requests.get('http://nginx/api/queue/artists/run') + return def process_album_queue(): print('Running Album Queue Process..') print('---') response = requests.get('http://nginx/api/album/queue') data = response.json() - artist = data.get('artist') - album = data.get('album') + artist = data.get('artist', False) + album = data.get('album', False) queue = data.get('queue') - if artist and album and queue: + if not queue == False and artist and album: result = download_album(album, artist) requests.post('http://nginx/api/album/queue/update/%s' % queue.get('id'), json=result) return cron = BackgroundScheduler({'apscheduler.job_defaults.max_instances': 1}, daemon=True) cron.add_job(process_album_queue, 'interval', minutes=1) +cron.add_job(process_artist_queue, 'interval', minutes=1) cron.start() if __name__ == "__main__":