[IMP] php: Process queue for artists, scraping for albums

refactor_total
Brett Spaulding 1 year ago
parent 0641818c1e
commit 4e35c6acae

@ -19,3 +19,4 @@ yarn-error.log
/.idea
/.vscode
public/images/artist
public/images/album

@ -4,6 +4,7 @@ namespace App\Console\Commands;
use App\Models\ArtistQueue;
use Illuminate\Console\Command;
use Symfony\Component\Console\Helper\ProgressBar;
class ProcessArtistQueue extends Command
{
@ -26,9 +27,12 @@ class ProcessArtistQueue extends Command
*/
public function handle()
{
$records = ArtistQueue::where('state', 'in_progress')->get();
$records = ArtistQueue::where('state', 'pending')->get();
$bar = new ProgressBar($this->output, count($records));
$bar->start();
foreach ($records as $record) {
$record->process_artist();
$bar->advance();
}
}
}

@ -4,8 +4,51 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Mockery\Exception;
class Album extends Model
{
use HasFactory;
public function change_state(string $state)
{
$available_states = array("pending", "in_progress", "done");
if (!in_array($state, $available_states)){
throw new Exception('Invalid state');
}
$this->state = $state;
$this->save();
}
public static function findByName($name)
{
return self::where('name', '=', $name)->get();
}
public static function findById($id)
{
return self::where('id', '=', $id)->get();
}
public static function addAlbum(string $name, string $thumbnail, string $url_remote, string $image, $artist_id)
{
$album = new Album();
$album->name = $name;
$album->artist_id = $artist_id;
$album->url_remote = $url_remote;
$album->thumbnail = $thumbnail;
$album->image = $image;
$album->save();
return $album;
}
public static function findOrCreateByName(string $name, array $data = [])
{
$album = self::findByName($name)->first();
if (!$album && $data) {
$album = self::addAlbum($data['name'], $data['thumbnail'], $data['url_remote'], $data['image'], $data['artist_id']);
}
return $album;
}
}

@ -9,12 +9,19 @@ class AlbumQueue extends Model
{
use HasFactory;
public function enqueue()
public function enqueue(int $id): bool
{
// Add albums to queue for download
$result = false;
$album_id = Album::findById($id)->first();
if ($album_id->count() > 0 && $album_id->state === 'pending') {
$this->album_id = $album_id->id;
$this->save();
$result = true;
}
return $result;
}
public function process_queue()
public function process_album()
{
// Either python pings to process the queue or laravel will send the data to python for processing
}

@ -29,9 +29,9 @@ class ArtistQueue extends Model
{
// Scrape the artist page for image, and album data (image, url, name)
$driver = WebDriver::setUp();
$artist_id = Artist::where('id', $this->artist_id)->get();
$artist_id = Artist::where('id', $this->artist_id)->get()->first();
if ($artist_id->count() > 0) {
$response = WebScraper::scrapeAlbums($driver, $artist_id);
} else {
throw new Exception('The Artist ID provided to the queue does not exist.');
}

@ -25,7 +25,7 @@ class WebScraper
// Resize image and save to file, provide path to data
$imageUrl = ImageUrl::modifyGoogleImageUrl($artistThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl);
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'artist');
$data = [
'name' => $artistName,
@ -76,7 +76,7 @@ class WebScraper
// Resize image and save to file, provide path to data
$imageUrl = ImageUrl::modifyGoogleImageUrl($artistThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl);
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'artist');
// Create if we don't have it yet
$data = [
@ -100,4 +100,50 @@ class WebScraper
return $response;
}
/**
* Scrape the album data from given artist page, create new album records and queue those records for download
*
* @return RemoteWebDriver
*/
public static function scrapeAlbums($driver, $artist_id): array
{
$url = 'https://music.youtube.com/' . $artist_id->url_remote;
$driver->get($url);
$response = [];
$albumBtn = $driver->findElement(WebDriverBy::xpath('//a[text()="Albums"]'));
if ($albumBtn) {
$albumBtn->click();
sleep(3);
$itemsContainer = $driver->findElements(WebDriverBy::cssSelector('#items'));
foreach ($itemsContainer as $item) {
$albumContainers = $item->findElements(WebDriverBy::cssSelector('.ytmusic-grid-renderer'));
if ($albumContainers) {
foreach ($albumContainers as $albumContainer) {
$albumLink = $albumContainer->findElement(WebDriverBy::cssSelector('a'));
$albumHref = $albumLink->getAttribute('href');
$albumTitle = $albumLink->getAttribute('title');
$albumThumbnail = $albumLink->findElement(WebDriverBy::cssSelector('img'))->getAttribute('src');
// Resize image and save to file, provide path to data
$imageUrl = ImageUrl::modifyGoogleImageUrl($albumThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl, 'album');
$data = [
'name' => $albumTitle,
'artist_id' => $artist_id->id,
'thumbnail' => $albumThumbnail,
'url_remote' => $albumHref,
'image' => $imageFileUrl,
];
$album_id = Album::findOrCreateByName($albumTitle, $data);
$album_queue = new AlbumQueue();
$album_queue_id = $album_queue->enqueue($album_id->id);
}
}
}
}
return $response;
}
}

@ -27,15 +27,17 @@ class ImageUrl
* Save an image from a Google Image URL to the local filesystem.
*
* @param string $url The modified URL of the image to download.
* @param string $type The public directory to save to, needs to be either 'artist' or 'album'.
* @return string The path to the saved image file, or empty string if the file already exists.
*/
public static function save_img_url(string $url): string
public static function save_img_url(string $url, string $type): string
{
// Get the filename from the URL
$filename = basename($url);
// Create a directory for the images (if it doesn't exist)
$imagesDir = public_path('images/artist');
$imagesDir = public_path('images/' . $type);
if (!is_dir($imagesDir)) {
mkdir($imagesDir, 0777, true);
}

@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('albums', function (Blueprint $table) {
$table->string('thumbnail')->nullable()->after('name');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('albums', function (Blueprint $table) {
//
});
}
};

@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('albums', function (Blueprint $table) {
$table->foreignId('artist_id')->change()->nullable()->constrained('artists');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('albums', function (Blueprint $table) {
//
});
}
};

@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration {
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('albums', function (Blueprint $table) {
$table->dropForeign(['artist_id']);
$table->dropColumn('artist_id');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('albums', function (Blueprint $table) {
//
});
}
};

@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('albums', function (Blueprint $table) {
$table->foreignId('artist_id')->constrained('artists')->onDelete('cascade');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('albums', function (Blueprint $table) {
//
});
}
};
Loading…
Cancel
Save