|
|
|
@ -5,6 +5,7 @@ namespace App\Models;
|
|
|
|
use App\Utils\ImageUrl;
|
|
|
|
use App\Utils\ImageUrl;
|
|
|
|
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
|
|
|
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
|
|
|
use Facebook\WebDriver\WebDriverBy;
|
|
|
|
use Facebook\WebDriver\WebDriverBy;
|
|
|
|
|
|
|
|
use Facebook\WebDriver\WebDriverAction;
|
|
|
|
use Facebook\WebDriver\WebDriverExpectedCondition;
|
|
|
|
use Facebook\WebDriver\WebDriverExpectedCondition;
|
|
|
|
|
|
|
|
|
|
|
|
class WebScraper
|
|
|
|
class WebScraper
|
|
|
|
@ -101,6 +102,29 @@ class WebScraper
|
|
|
|
return $response;
|
|
|
|
return $response;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public static function processAlbums($albumContainer, $artist)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
$albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a'));
|
|
|
|
|
|
|
|
$albumHref = $albumLink->getAttribute('href');
|
|
|
|
|
|
|
|
$albumTitle = $albumLink->getAttribute('title');
|
|
|
|
|
|
|
|
$albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Resize image and save to file, provide path to data
|
|
|
|
|
|
|
|
$imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail);
|
|
|
|
|
|
|
|
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$data = [
|
|
|
|
|
|
|
|
'name' => $albumTitle,
|
|
|
|
|
|
|
|
'artist_id' => $artist->id,
|
|
|
|
|
|
|
|
'thumbnail' => $albumThumbnail,
|
|
|
|
|
|
|
|
'url_remote' => $albumHref, // TODO: Check here if the image is a 'gif' and not a URL
|
|
|
|
|
|
|
|
'image' => $imageFileUrl,
|
|
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
$album_id = Album::findOrCreateByName($artist, $albumTitle, $data);
|
|
|
|
|
|
|
|
$queued = AlbumQueue::addQueue($album_id);
|
|
|
|
|
|
|
|
return $queued;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Scrape the album data from given artist page, create new album records and queue those records for download
|
|
|
|
* Scrape the album data from given artist page, create new album records and queue those records for download
|
|
|
|
*
|
|
|
|
*
|
|
|
|
@ -111,37 +135,57 @@ class WebScraper
|
|
|
|
$url = 'https://music.youtube.com/' . $artist_id->url_remote;
|
|
|
|
$url = 'https://music.youtube.com/' . $artist_id->url_remote;
|
|
|
|
$driver->get($url);
|
|
|
|
$driver->get($url);
|
|
|
|
$response = 0;
|
|
|
|
$response = 0;
|
|
|
|
|
|
|
|
sleep(3);
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
$albumBtn = $driver->findElement(WebDriverBy::xpath('//a[text()="Albums"]'));
|
|
|
|
$albumBtn = $driver->findElements(WebDriverBy::xpath('//a[text()="Albums"]'));
|
|
|
|
if ($albumBtn) {
|
|
|
|
if ($albumBtn) {
|
|
|
|
$albumBtn->click();
|
|
|
|
$albumBtn[0]->click();
|
|
|
|
sleep(3);
|
|
|
|
sleep(5);
|
|
|
|
$itemsContainer = $driver->findElements(WebDriverBy::cssSelector('#items'));
|
|
|
|
$itemsContainer = $driver->findElements(WebDriverBy::cssSelector('#items'));
|
|
|
|
foreach ($itemsContainer as $item) {
|
|
|
|
foreach ($itemsContainer as $item) {
|
|
|
|
$albumContainers = $item->findElements(WebDriverBy::cssSelector('.ytmusic-grid-renderer'));
|
|
|
|
$albumContainers = $item->findElements(WebDriverBy::cssSelector('.ytmusic-grid-renderer'));
|
|
|
|
if ($albumContainers) {
|
|
|
|
if ($albumContainers) {
|
|
|
|
foreach ($albumContainers as $albumContainer) {
|
|
|
|
foreach ($albumContainers as $albumContainer) {
|
|
|
|
$response += 1;
|
|
|
|
$response += 1;
|
|
|
|
$albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a'));
|
|
|
|
WebScraper::processAlbums($albumContainer, $artist_id);
|
|
|
|
$albumHref = $albumLink->getAttribute('href');
|
|
|
|
}
|
|
|
|
$albumTitle = $albumLink->getAttribute('title');
|
|
|
|
}
|
|
|
|
$albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src');
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// Resize image and save to file, provide path to data
|
|
|
|
$ytRows = $driver->findElements(WebDriverBy::cssSelector('ytmusic-carousel-shelf-renderer'));
|
|
|
|
$imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail);
|
|
|
|
foreach ($ytRows as $ytRow) {
|
|
|
|
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album');
|
|
|
|
$contentGroup = $ytRow->findElements(WebDriverBy::cssSelector('#content-group'));
|
|
|
|
|
|
|
|
foreach ($contentGroup as $group) {
|
|
|
|
$data = [
|
|
|
|
$groupName = $group->getText();
|
|
|
|
'name' => $albumTitle,
|
|
|
|
if ($groupName == 'Albums') {
|
|
|
|
'artist_id' => $artist_id->id,
|
|
|
|
// Sometimes we don't have the option to click the albums button to filter
|
|
|
|
'thumbnail' => $albumThumbnail,
|
|
|
|
// Yet, the albums are in a carousel and the images won't load unless they are in view
|
|
|
|
'url_remote' => $albumHref,
|
|
|
|
$caroselNextButton = $driver->findElements(WebDriverBy::cssSelector('#next-items-button'));
|
|
|
|
'image' => $imageFileUrl,
|
|
|
|
try {
|
|
|
|
];
|
|
|
|
if ($caroselNextButton) {
|
|
|
|
$album_id = Album::findOrCreateByName($artist_id, $albumTitle, $data);
|
|
|
|
// Youtube is smart enough to block this without an action
|
|
|
|
|
|
|
|
for ($i = 0; $i <= 3; $i++) {
|
|
|
|
$album_queue = new AlbumQueue();
|
|
|
|
if ($caroselNextButton[0]->isEnabled()) {
|
|
|
|
$album_queue->enqueue($album_id);
|
|
|
|
$action = $driver->action();
|
|
|
|
|
|
|
|
$action->moveToElement($caroselNextButton[0])->click()->perform();
|
|
|
|
|
|
|
|
sleep(5);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
sleep(2);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (\Exception $e) {
|
|
|
|
|
|
|
|
\Log::info($e);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$itemsContainer = $ytRow->findElements(WebDriverBy::cssSelector('#items'));
|
|
|
|
|
|
|
|
foreach ($itemsContainer as $item) {
|
|
|
|
|
|
|
|
$albumContainers = $item->findElements(WebDriverBy::cssSelector('ytmusic-two-row-item-renderer'));
|
|
|
|
|
|
|
|
if ($albumContainers) {
|
|
|
|
|
|
|
|
foreach ($albumContainers as $albumContainer) {
|
|
|
|
|
|
|
|
WebScraper::processAlbums($albumContainer, $artist_id);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|