[IMP] php: refactor webscraper, such organized.

refactor_total
Brett Spaulding 1 year ago
parent b57edf05f4
commit 0641818c1e

@ -0,0 +1,34 @@
<?php
namespace App\Console\Commands;
use App\Models\ArtistQueue;
use Illuminate\Console\Command;
class ProcessArtistQueue extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'app:process-artist-queue';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Runs queue responsible for scraping artist pages for album data, then queuing the albums';
/**
* Execute the console command.
*/
public function handle()
{
$records = ArtistQueue::where('state', 'in_progress')->get();
foreach ($records as $record) {
}
}
}

@ -3,7 +3,10 @@
namespace App\Http\Controllers;
use App\Models\Artist;
use App\Models\WebDriver;
use App\Models\WebScraper;
use Illuminate\Http\Request;
use App\Models\ArtistQueue;
class ApiController extends Controller
{
@ -22,13 +25,31 @@ class ApiController extends Controller
'thumbnail' => $artist->thumbnail,
];
}
$response = json_encode( array('data' => $data));
$response = json_encode(array('data' => $data));
return $response;
}
public function queue_artist($id, ArtistQueue $artistQueue)
public function queue_artist($id, ArtistQueue $artistQueue): bool
{
$artistQueue->enqueue($id);
return $artistQueue->enqueue($id);
}
public function search_artist(string $artist)
{
$url = 'https://music.youtube.com/search?q=' . str_replace(' ', '+', $artist);
$driver = WebDriver::setUp();
$driver->get($url);
// Add handling for no artist button; Some artists searches don't have this option (Ex The Black Dahlia Murder)
try {
$response = WebScraper::scrapeArtists($driver);
} catch (\Exception) {
\Log::warning('Could not get list of artists, attempting to get single artist card..');
$response = WebScraper::scrapeArtist($driver);
} finally {
$driver->quit();
}
return response()->json($response);
}
}

@ -4,16 +4,32 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Mockery\Exception;
class Artist extends Model
{
use HasFactory;
public function change_state(string $state)
{
$available_states = array("pending", "in_progress", "done");
if (!in_array($state, $available_states)){
throw new Exception('Invalid state');
}
$this->state = $state;
$this->save();
}
public static function findByName($name)
{
return self::where('name', '=', $name)->get();
}
public static function findById($id)
{
return self::where('id', '=', $id)->get();
}
public static function addArtist(string $name, string $thumbnail, string $url_remote, string $image)
{
$artist = new Artist();

@ -4,21 +4,37 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Mockery\Exception;
class ArtistQueue extends Model
{
use HasFactory;
public function enqueue($artist_id)
public function enqueue($id): bool
{
$this->artist_id = $artist_id;
$this->save();
$result = false;
$artist_id = Artist::findById($id)->first();
// Artists that are 'done' can be run through the queue again. Prevent in progress, though.
if ($artist_id->count() > 0 && $artist_id->state !== 'in_progress') {
$this->artist_id = $artist_id->id;
$this->save();
$artist_id->change_state('in_progress');
$result = true;
}
return $result;
}
public function process_queue()
public function process_artist()
{
// Scrape the artist page for image, and album data (image, url, name)
$driver = WebDriver::setUp();
$artist_id = Artist::where('id', $this->artist_id)->get();
if ($artist_id->count() > 0) {
} else {
throw new Exception('The Artist ID provided to the queue does not exist.');
}
}

@ -20,4 +20,5 @@ class WebDriver extends Model
$driver->manage()->window()->maximize();
return $driver;
}
}

@ -1,35 +1,20 @@
<?php
namespace App\Http\Controllers;
namespace App\Models;
use App\Models\Artist;
use App\Models\WebDriver;
use App\Utils\ImageUrl;
use Facebook\WebDriver\WebDriverExpectedCondition;
use Illuminate\Http\Request;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\WebDriverBy;
use Illuminate\Support\Facades\App;
use Nette\Utils\Image;
use Facebook\WebDriver\WebDriverExpectedCondition;
class SearchController extends Controller
class WebScraper
{
/**
* The default Artist data to be returned from this controller.
*
* @return array<string, string>
*/
public $defaultArtistData = ['id', 'name', 'thumbnail', 'url_remote'];
/**
* Fallback scrape option for the youtube music search page; in some cases there are no additional artists available
*
* @return RemoteWebDriver
*/
protected function scrapeArtist($driver)
public static function scrapeArtist($driver)
{
$response = [];
$artistContainer = $driver->findElement(WebDriverBy::cssSelector('.main-card-content-container'));
@ -42,10 +27,6 @@ class SearchController extends Controller
$imageUrl = ImageUrl::modifyGoogleImageUrl($artistThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl);
\Log::info('============================');
\Log::info($imageUrl);
\Log::info($imageFileUrl);
$data = [
'name' => $artistName,
'thumbnail' => $artistThumbnail,
@ -62,7 +43,7 @@ class SearchController extends Controller
*
* @return RemoteWebDriver
*/
protected function scrapeArtists($driver)
public static function scrapeArtists($driver)
{
$response = [];
// Click the artist button to force a "structure" of results
@ -97,10 +78,6 @@ class SearchController extends Controller
$imageUrl = ImageUrl::modifyGoogleImageUrl($artistThumbnail);
$imageFileUrl = ImageUrl::save_img_url($imageUrl);
\Log::info('============================');
\Log::info($imageUrl);
\Log::info($imageFileUrl);
// Create if we don't have it yet
$data = [
'name' => $artistName,
@ -123,23 +100,4 @@ class SearchController extends Controller
return $response;
}
public function search_artist(string $artist)
{
$url = 'https://music.youtube.com/search?q=' . str_replace(' ', '+', $artist);
$driver = WebDriver::setUp();
$driver->get($url);
// Add handling for no artist button; Some artists searches don't have this option (Ex The Black Dahlia Murder)
try {
$response = $this->scrapeArtists($driver);
} catch (\Exception) {
\Log::warning('Could not get list of artists, attempting to get single artist card..');
$response = $this->scrapeArtist($driver);
} finally {
$driver->quit();
}
return response()->json($response);
}
}

@ -20,7 +20,7 @@ function template_artist_result(element) {
}
function construct_artist_result_html(artist_list) {
let html = '<h3>Found Artist!</h3>';
let html = '<h3>Found Artist</h3>';
let index = 0;
if (artist_list.length > 1) {
artist_list.forEach((element) => {
@ -53,7 +53,7 @@ function artist_queue_toggle(element) {
let artist_name = self.data('artist_name');
self.prop('disabled', true)
$.ajax({
url: `/api/artist/queue/${self.data('artist_id')}`,
url: `/api/queue/artist/${self.data('artist_id')}`,
success: () => {
proc_notification('success', 'Queued Download', `Artist ${artist_name} Queued for Download!`);
},
@ -65,57 +65,56 @@ function artist_queue_toggle(element) {
})
}
$('#settings_btn').on('click', () => {
$('#modalSettings').modal('toggle');
});
$('#catalog_btn').on('click', () => {
$('#modalCatalog').modal('toggle');
});
function bind_action_buttons() {
$('#settings_btn').on('click', () => {
$('#modalSettings').modal('toggle');
});
$('#queue_btn').on('click', () => {
appModal.modal('toggle');
});
$('#catalog_btn').on('click', () => {
$('#modalCatalog').modal('toggle');
});
$('#download_btn').on('click', () => {
loader.fadeIn(300);
let artist = $('#search_bar').val();
let icon = 'error';
let title = 'What the flip?!';
// Send request to server
setTimeout(() => {
if (artist) {
console.log('Sending search request...');
$.ajax({
url: `/artist/${artist}`,
success: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
icon = 'success';
let html = construct_artist_result_html(response);
proc_notification(icon, title, html);
$('#search_bar').val('');
loader.fadeOut(700);
},
error: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
proc_notification(icon, title, response.statusText);
loader.fadeOut(700);
}
});
$('#queue_btn').on('click', () => {
appModal.modal('toggle');
});
} else {
proc_notification(icon, title, 'You need to add an artist, c\'mon man!');
loader.fadeOut(700);
}
}, 10);
$('#download_btn').on('click', () => {
loader.fadeIn(300);
let artist = $('#search_bar').val();
// Send request to server
setTimeout(() => {
if (artist) {
console.log('Sending search request...');
$.ajax({
url: `/artist/${artist}`,
success: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
icon = 'success';
let html = construct_artist_result_html(response);
proc_notification(icon, 'Shazam!', html);
$('#search_bar').val('');
loader.fadeOut(700);
},
error: (response) => {
console.log('Receiving response...');
console.log(response);
console.log('===========');
proc_notification(icon, 'What the flip?!', response.statusText);
loader.fadeOut(700);
}
});
} else {
proc_notification(icon, 'Whoopsie!', 'You need to add an artist, c\'mon man!');
loader.fadeOut(700);
}
}, 10);
});
});
}
document.addEventListener('alpine:init', () => {
console.log('Alpine:init');
@ -134,6 +133,9 @@ document.addEventListener('alpine:init', () => {
});
$(document).ready(function () {
bind_action_buttons();
//Datatable for 'Catalog' menu
let ArtistTable = $('#artistsCatalogDatatable').DataTable({
ajax: '/api/artists',
@ -163,8 +165,8 @@ $(document).ready(function () {
],
});
// Polling for table update
const getArtistTableInterval = setInterval(function() {
table.ajax.reload();
const getArtistTableInterval = setInterval(function () {
ArtistTable.ajax.reload();
}, 5000);
});

@ -1,14 +1,16 @@
<?php
use App\Http\Controllers\ApiController;
use App\Http\Controllers\SearchController;
use Illuminate\Support\Facades\Route;
Route::get('/', function () {
return view('pages.main');
});
Route::get('/artist/{artist}', [SearchController::class, 'search_artist'])->name('api.search.artist');
Route::get('/artist/{artist}', [ApiController::class, 'search_artist'])->name('api.search.artist');
// Get all artists
Route::get('api/artists/', [ApiController::class, 'get_artists'])->name('api.artist');
Route::get('api/artists/queue/{id}', [ApiController::class, 'queue_artist'])->name('api.artist.queue');
// Queue single artist
Route::get('api/queue/artist/{id}', [ApiController::class, 'queue_artist'])->name('api.artist.queue');

Loading…
Cancel
Save