From 2dfe5429047a3ba8bacc5410c6b913e655385f9e Mon Sep 17 00:00:00 2001 From: Brett Spaulding Date: Mon, 5 Aug 2024 22:02:23 -0400 Subject: [PATCH] [WIP] Artist scraping working --- .../app/Http/Controllers/SearchController.php | 61 ++++++++++++++++--- ...9_add_artist_fields_thumbnail_selected.php | 29 +++++++++ php/src/public/js/app.js | 8 +-- 3 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 php/src/database/migrations/2024_08_06_015329_add_artist_fields_thumbnail_selected.php diff --git a/php/src/app/Http/Controllers/SearchController.php b/php/src/app/Http/Controllers/SearchController.php index 32f530d..d6fd948 100644 --- a/php/src/app/Http/Controllers/SearchController.php +++ b/php/src/app/Http/Controllers/SearchController.php @@ -2,6 +2,7 @@ namespace App\Http\Controllers; +use Facebook\WebDriver\WebDriverExpectedCondition; use Illuminate\Http\Request; use Facebook\WebDriver\Remote\DesiredCapabilities; @@ -12,10 +13,12 @@ use Facebook\WebDriver\WebDriverBy; class SearchController extends Controller { - protected function setUp() { + protected function setUp() + { $host = 'http://selenium-hub:4444'; $capabilities = DesiredCapabilities::chrome(); $chromeOptions = new ChromeOptions(); + // TODO: Add '--headless' back in to arguments $chromeOptions->addArguments(['--no-sandbox', '--disable-dev-shm-usage']); $capabilities->setCapability(ChromeOptions::CAPABILITY_W3C, $chromeOptions); $driver = RemoteWebDriver::create($host, $capabilities); @@ -25,8 +28,6 @@ class SearchController extends Controller public function search_artist(Request $request, string $artist) { - \Log::info($artist); - \Log::info($request); \Log::info('Getting Artist: ' . $artist); // $url = 'https://example.com'; $url = 'https://music.youtube.com/search?q=' . str_replace(' ', '+', $artist); @@ -36,12 +37,58 @@ class SearchController extends Controller // the URL to the local Selenium Server $driver = $this->setUp(); $driver->get($url); - $html = $driver->getPageSource(); - \Log::info($html); - \Log::info('========================================='); + // Click the artist button to force a "structure" of results + $artistBtnXpath = '//a[@title="Show artist results"]'; + $driver->wait(10, 500)->until( + WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::xpath($artistBtnXpath)) + ); + $driver->findElement(WebDriverBy::xpath($artistBtnXpath))->click(); + // Youtube has multiple elements with the same ID (Naughty!). We will give a reasonable analog time to render. + sleep(5); - $driver->quit(); + $contentDivs = $driver->findElements(WebDriverBy::cssSelector('#contents')); + $divCount = 0; + foreach ($contentDivs as $content) { + $divCount += 1; + + $artists = $content->findElements(WebDriverBy::xpath('//ytmusic-responsive-list-item-renderer')); + + if ($artists) { + $resultCap = 6; + $resultIndex = 0; + foreach ($artists as $artist) { + // There are a bunch of elements with no text in them; just a quick and dirty filter + $hasText = $artist->getText(); + if ($hasText) { + $resultIndex += 1; + \Log::info('==================================================================================================================================='); + \Log::info('==================================================================================================================================='); +// \Log::info($artist->getDomProperty('innerHTML')); + + // Artist Data Targeting + $artistThumbnail = $artist->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src'); + $artistLink = $artist->findElements(WebDriverBy::cssSelector('a')); + $artistHref = $artistLink[0]->getAttribute('href'); + $artistName = $artistLink[0]->getAttribute('aria-label'); + + \Log::info($artistName . ': ' . $artistHref); + \Log::info($artistThumbnail); + if($resultCap <= $resultIndex) { + break; + } + + } + } + + if ($divCount === 1) { + break; + } + + } + } + + $driver->quit(); } } diff --git a/php/src/database/migrations/2024_08_06_015329_add_artist_fields_thumbnail_selected.php b/php/src/database/migrations/2024_08_06_015329_add_artist_fields_thumbnail_selected.php new file mode 100644 index 0000000..6043c5b --- /dev/null +++ b/php/src/database/migrations/2024_08_06_015329_add_artist_fields_thumbnail_selected.php @@ -0,0 +1,29 @@ +string('thumbnail')->nullable()->after('name'); + $table->boolean('selected')->default(false)->after('thumbnail'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('artists', function (Blueprint $table) { + // + }); + } +}; diff --git a/php/src/public/js/app.js b/php/src/public/js/app.js index 8d06134..f8d72d6 100644 --- a/php/src/public/js/app.js +++ b/php/src/public/js/app.js @@ -19,15 +19,11 @@ $('#queue_btn').on('click', () => { }) $('#download_btn').on('click', () => { - console.log('Blocking UI'); loader.fadeIn(300); let artist = $('#search_bar').val(); - // Prevent - $('#search_bar').val(''); let icon = 'error'; let title = 'What the flip?!'; - let text = 'You need to add an artist bro..'; setTimeout(() => { if (artist) { @@ -40,7 +36,7 @@ $('#download_btn').on('click', () => { console.log('==========='); icon = 'success'; title = 'Shazam!'; - proc_notification(icon, title, text); + proc_notification(icon, title, 'Artist found'); loader.fadeOut(700); }, error: (response) => { @@ -53,7 +49,7 @@ $('#download_btn').on('click', () => { }); } else { - proc_notification(icon, title, text); + proc_notification(icon, title, 'You need to add an artist, c\'mon man!'); loader.fadeOut(700); } }, 100);