Compare commits

...

6 Commits

Author SHA1 Message Date
Brett Spaulding 3c11270d4e [FIX] php: YT CDN to local files
1 year ago
Brett Spaulding 5de3803c55 [FIX] php: Another edge case for album info
1 year ago
Brett Spaulding 1eba36d32b [IMP] docker-compose.yml: Back off selenium
1 year ago
Brett Spaulding 2504740bd1 [FIX] Seems like the sleep needed to be increased
1 year ago
Brett Spaulding dce3b47372 [IMP] Extra handling for missing link case
1 year ago
Brett Spaulding ec39ea48be [FIX] php: Handling for albums section if no link
1 year ago

@ -132,7 +132,7 @@ services:
chrome:
image: selenium/node-chrome:nightly
shm_size: 8gb
shm_size: 4gb
networks:
- laravel
depends_on:
@ -141,8 +141,8 @@ services:
- SE_EVENT_BUS_HOST=selenium-hub
- SE_EVENT_BUS_PUBLISH_PORT=4442
- SE_EVENT_BUS_SUBSCRIBE_PORT=4443
- SE_NODE_MAX_SESSIONS=15
- SE_NODE_MAX_SESSION=15
- SE_NODE_MAX_SESSIONS=2
- SE_NODE_MAX_SESSION=2
# edge:
# image: selenium/node-edge:nightly
@ -177,7 +177,7 @@ services:
networks:
- laravel
environment:
JAVA_OPTS: "-Xmx8g -Xms2g"
JAVA_OPTS: "-Xmx4g -Xms2g"
container_name: selenium-hub
ports:
- "4442:4442"

@ -28,10 +28,10 @@ class ProcessArtistQueue extends Command
public function handle()
{
// This queue will prompt the scraping of all artist albums, mark done when complete
$artists = ArtistQueue::where('state', 'pending')->get();
$bar = new ProgressBar($this->output, count($artists));
$artist_queue = ArtistQueue::where('state', 'pending')->get();
$bar = new ProgressBar($this->output, count($artist_queue));
$bar->start();
foreach ($artists as $artist) {
foreach ($artist_queue as $artist) {
$artist->state = 'in_progress';
$artist->save();
$artist->process_artist();

@ -2,6 +2,7 @@
namespace App\Http\Controllers;
use App\Jobs\RunArtistQueue;
use App\Models\AlbumQueue;
use App\Models\Artist;
use App\Models\WebDriver;
@ -23,7 +24,7 @@ class ApiController extends Controller
'name' => $artist->name,
'url_remote' => $artist->url_remote,
'state' => $artist->state,
'thumbnail' => $artist->thumbnail,
'thumbnail' => str_replace('/var/www/html/public', '', $artist->image),
];
}
$response = json_encode(array('data' => $data));
@ -37,15 +38,17 @@ class ApiController extends Controller
foreach ($album_queue as $queue) {
$album = $queue->album;
$artist = $album->artist;
if ($album && $artist) {
$response[] = [
'name' => $album->name,
'artist_id' => $artist->toArray(),
'url_remote' => $album->url_remote,
'thumbnail' => $album->thumbnail,
'image' => $album->image,
'thumbnail' => str_replace('/var/www/html/public', '', $album->image),
'image' => str_replace('/var/www/html/public', '', $album->image),
'state' => $queue->state,
];
}
}
return json_encode($response);
}
@ -56,7 +59,7 @@ class ApiController extends Controller
public function queue_artist_run()
{
Artisan::queue('app:process-artist-queue');
ArtistQueue::run_queue();
}
public function search_artist(string $artist)
@ -79,15 +82,16 @@ class ApiController extends Controller
public function queue_waiting()
{
$data = array('queue' => false);
$queue = AlbumQueue::where('state', 'pending')->first();
if (!is_null($queue)) {
$album = $queue->album;
$artist = $album->artist;
\Log::info('======================');
\Log::info('Queue running for album: ' . $album->name);
$queue->state = 'in_progress';
$queue->save();
$data = array('queue' => $queue->toArray(), 'album' => $album->toArray(), 'artist' => $artist->toArray());
}
return json_encode($data);
}

@ -21,6 +21,11 @@ class Album extends Model
$this->save();
}
public function getAlbumImageLocation()
{
return str_replace('/var/www/html', '', $this->image);
}
public static function findByArtistTitle(Artist $artist, string $name)
{
return self::where('name', '=', $name)->where('artist_id', '=', $artist->id)->first();

@ -9,18 +9,25 @@ class AlbumQueue extends Model
{
use HasFactory;
public function enqueue($album_id): bool
public function enqueue($album): bool
{
$result = false;
$album_queued = AlbumQueue::where('album_id', $album_id->id)->first();
if (is_null($album_queued) && $album_id->state === 'pending') {
$this->album_id = $album_id->id;
$album_queued = AlbumQueue::where('album_id', $album->id)->first();
if (is_null($album_queued)) {
$this->album_id = $album->id;
$this->save();
$result = true;
}
return $result;
}
public static function addQueue($album_id): bool
{
$queue = new AlbumQueue();
$queue->enqueue($album_id);
return true;
}
public function album()
{
return $this->belongsTo(Album::class);

@ -30,6 +30,11 @@ class Artist extends Model
return self::where('id', '=', $id)->get();
}
public function getArtistImageLocation()
{
return str_replace('/var/www/html', '', $this->image);
}
public static function addArtist(string $name, string $thumbnail, string $url_remote, string $image)
{
$artist = new Artist();

@ -24,12 +24,17 @@ class ArtistQueue extends Model
return $result;
}
public function artist()
{
return $this->belongsTo(Artist::class);
}
public function process_artist()
{
// Scrape the artist page for image, and album data (image, url, name)
$driver = WebDriver::setUp();
$artist_id = Artist::where('id', $this->artist_id)->get()->first();
$artist_id = $this->artist;
if ($artist_id->count() > 0) {
try {
$album_count = WebScraper::scrapeAlbums($driver, $artist_id);
@ -38,6 +43,7 @@ class ArtistQueue extends Model
} catch (Exception $e) {
\Log::warning('Failed to scrape albums: ' . $e->getMessage());
} finally {
$artist_id->change_state('done');
$driver->quit();
}
} else {
@ -45,5 +51,16 @@ class ArtistQueue extends Model
}
}
public static function run_queue()
{
// This queue will prompt the scraping of all artist albums, mark done when complete
$artist_queue = ArtistQueue::where('state', 'pending')->get();
foreach ($artist_queue as $queue) {
$queue->state = 'in_progress';
$queue->save();
$queue->process_artist();
$queue->state = 'done';
$queue->save();
}
}
}

@ -5,6 +5,7 @@ namespace App\Models;
use App\Utils\ImageUrl;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverAction;
use Facebook\WebDriver\WebDriverExpectedCondition;
class WebScraper
@ -101,6 +102,29 @@ class WebScraper
return $response;
}
public static function processAlbums($albumContainer, $artist)
{
$albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a'));
$albumHref = $albumLink->getAttribute('href');
$albumTitle = $albumLink->getAttribute('title');
$albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src');
// Resize image and save to file, provide path to data
$imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album');
$data = [
'name' => $albumTitle,
'artist_id' => $artist->id,
'thumbnail' => $albumThumbnail,
'url_remote' => $albumHref, // TODO: Check here if the image is a 'gif' and not a URL
'image' => $imageFileUrl,
];
$album_id = Album::findOrCreateByName($artist, $albumTitle, $data);
$queued = AlbumQueue::addQueue($album_id);
return $queued;
}
/**
* Scrape the album data from given artist page, create new album records and queue those records for download
*
@ -111,37 +135,57 @@ class WebScraper
$url = 'https://music.youtube.com/' . $artist_id->url_remote;
$driver->get($url);
$response = 0;
sleep(3);
try {
$albumBtn = $driver->findElement(WebDriverBy::xpath('//a[text()="Albums"]'));
$albumBtn = $driver->findElements(WebDriverBy::xpath('//a[text()="Albums"]'));
if ($albumBtn) {
$albumBtn->click();
sleep(3);
$albumBtn[0]->click();
sleep(5);
$itemsContainer = $driver->findElements(WebDriverBy::cssSelector('#items'));
foreach ($itemsContainer as $item) {
$albumContainers = $item->findElements(WebDriverBy::cssSelector('.ytmusic-grid-renderer'));
if ($albumContainers) {
foreach ($albumContainers as $albumContainer) {
$response += 1;
$albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a'));
$albumHref = $albumLink->getAttribute('href');
$albumTitle = $albumLink->getAttribute('title');
$albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src');
// Resize image and save to file, provide path to data
$imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album');
$data = [
'name' => $albumTitle,
'artist_id' => $artist_id->id,
'thumbnail' => $albumThumbnail,
'url_remote' => $albumHref,
'image' => $imageFileUrl,
];
$album_id = Album::findOrCreateByName($artist_id, $albumTitle, $data);
WebScraper::processAlbums($albumContainer, $artist_id);
}
}
}
} else {
$ytRows = $driver->findElements(WebDriverBy::cssSelector('ytmusic-carousel-shelf-renderer'));
foreach ($ytRows as $ytRow) {
$contentGroup = $ytRow->findElements(WebDriverBy::cssSelector('#content-group'));
foreach ($contentGroup as $group) {
$groupName = $group->getText();
if ($groupName == 'Albums') {
// Sometimes we don't have the option to click the albums button to filter
// Yet, the albums are in a carousel and the images won't load unless they are in view
$caroselNextButton = $driver->findElements(WebDriverBy::cssSelector('#next-items-button'));
try {
if ($caroselNextButton) {
// Youtube is smart enough to block this without an action
for ($i = 0; $i <= 3; $i++) {
if ($caroselNextButton[0]->isEnabled()) {
$action = $driver->action();
$action->moveToElement($caroselNextButton[0])->click()->perform();
sleep(5);
}
sleep(2);
}
}
} catch (\Exception $e) {
\Log::info($e);
}
$album_queue = new AlbumQueue();
$album_queue->enqueue($album_id);
$itemsContainer = $ytRow->findElements(WebDriverBy::cssSelector('#items'));
foreach ($itemsContainer as $item) {
$albumContainers = $item->findElements(WebDriverBy::cssSelector('ytmusic-two-row-item-renderer'));
if ($albumContainers) {
foreach ($albumContainers as $albumContainer) {
WebScraper::processAlbums($albumContainer, $artist_id);
}
}
}
}
}
}

@ -13,12 +13,14 @@ function requestQueue() {
}
function template_artist_result(element) {
let image_src = element.image.replace('/var/www/html/public', '');
console.log(image_src);
return `
<div class="card w-100 p-2 mb-2">
<div class="container-fluid">
<div class="row">
<div class="col-3">
<img src="${element.thumbnail}" width="72px" height="72px" style="border-radius: 12px;"/>
<img src="${image_src}" width="72px" height="72px" style="border-radius: 12px;"/>
</div>
<div class="col-9 m-auto">
<h4>${element.name}</h4>
@ -64,7 +66,7 @@ function artist_queue_toggle(element) {
url: `/api/queue/artist/${self.data('artist_id')}`,
success: () => {
proc_notification('success', 'Queued Download', `Artist ${artist_name} Queued for Download!`);
ArtistTable.ajax.reload();
// ArtistTable.ajax.reload();
},
error: (response) => {
console.log(response);
@ -88,39 +90,32 @@ function bind_action_buttons() {
});
$('#download_btn').on('click', () => {
loader.fadeIn(300);
let artist = $('#search_bar').val();
// Send request to server
setTimeout(() => {
if (artist) {
console.log('Sending search request...');
if (artist == '') {
return proc_notification('error', 'Whoopsie!', 'You need to add an artist, c\'mon man!');;
}
loader.fadeIn(300);
$.ajax({
url: `/artist/${artist}`,
success: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
icon = 'success';
let html = construct_artist_result_html(response);
proc_notification(icon, 'Shazam!', html);
proc_notification('success', 'Shazam!', html);
ArtistTable.ajax.reload();
$('#search_bar').val('');
loader.fadeOut(700);
},
error: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
proc_notification(icon, 'What the flip?!', response.statusText);
proc_notification('error', 'What the flip?!', response.statusText);
loader.fadeOut(700);
}
});
} else {
proc_notification(icon, 'Whoopsie!', 'You need to add an artist, c\'mon man!');
loader.fadeOut(700);
}
}, 10);
});
@ -160,9 +155,10 @@ $(document).ready(function () {
{
data: 'id', orderable: false, render: (data, type, row) => {
let stateDiable = row.state === 'in_progress' ? 'disabled' : '';
let stateClass = row.state === 'in_progress' ? '' : 'btn-primary';
let stateClass = row.state === 'done' ? 'btn-success' : 'btn-primary';
let artist_name = row.name;
return `<button class="btn ${stateClass}" style="float: right;" data-artist_name="${artist_name}" data-artist_id="${data}" onclick="artist_queue_toggle(this)" ${stateDiable}><i class="las la-cloud-download-alt"></i> Download</button>`
let button_icon = row.state === 'done' ? '<i class="las la-redo-alt"></i>' : '<i class="las la-cloud-download-alt"></i>';
return `<button class="btn ${stateClass}" style="float: right;" data-artist_name="${artist_name}" data-artist_id="${data}" onclick="artist_queue_toggle(this)" ${stateDiable}>${button_icon} Download</button>`
}
}
],

@ -17,7 +17,7 @@
</div>
</template>
<!-- Album Art -->
<img :src="album.thumbnail" class="img-fluid rounded-start"
<img :src="album.image" class="img-fluid rounded-start"
:alt="album.name" style="width: 100%; height: 100%; min-height: 180px;">
</div>

@ -13,7 +13,7 @@
<div id="modal_content">
<div class="card">
<table id="artistsCatalogDatatable">
<table id="artistsCatalogDatatable" class="stripe">
<thead>
<tr>
<th></th>

@ -7,4 +7,3 @@ Artisan::command('inspire', function () {
$this->comment(Inspiring::quote());
})->purpose('Display an inspiring quote')->hourly();
Artisan::command('app:process-artist-queue')->everyMinute();

@ -13,25 +13,28 @@ app = Flask(__name__)
redis = Redis(host='redis', port=6379)
# def process_artist_queue():
# requests.get('http://nginx/api/queue/artists/run')
# return
def process_artist_queue():
print('Running Artist Queue process..')
print('---')
requests.get('http://nginx/api/queue/artists/run')
return
def process_album_queue():
print('Running Album Queue Process..')
print('---')
response = requests.get('http://nginx/api/album/queue')
data = response.json()
artist = data.get('artist')
album = data.get('album')
artist = data.get('artist', False)
album = data.get('album', False)
queue = data.get('queue')
if artist and album and queue:
if not queue == False and artist and album:
result = download_album(album, artist)
requests.post('http://nginx/api/album/queue/update/%s' % queue.get('id'), json=result)
return
cron = BackgroundScheduler({'apscheduler.job_defaults.max_instances': 1}, daemon=True)
cron.add_job(process_album_queue, 'interval', minutes=1)
cron.add_job(process_artist_queue, 'interval', minutes=1)
cron.start()
if __name__ == "__main__":

Loading…
Cancel
Save