|
|
|
|
@ -7,6 +7,7 @@ use App\Services\Crawl\Contracts\CrawlerAdapterInterface;
|
|
|
|
|
use App\Services\Crawl\CrawlAuthorParser;
|
|
|
|
|
use App\Services\Crawl\CrawlItemDto;
|
|
|
|
|
use App\Services\Crawl\CrawlKeywordParser;
|
|
|
|
|
use Illuminate\Http\Client\Response;
|
|
|
|
|
use Illuminate\Support\Facades\Http;
|
|
|
|
|
use Illuminate\Support\Str;
|
|
|
|
|
|
|
|
|
|
@ -83,15 +84,16 @@ class FacultyListHtmlAdapter implements CrawlerAdapterInterface
|
|
|
|
|
foreach ($pending as $externalId => $item) {
|
|
|
|
|
$pool->as($externalId)
|
|
|
|
|
->timeout($timeout)
|
|
|
|
|
->connectTimeout(min(10, $timeout))
|
|
|
|
|
->retry(1, 500, fn ($exception) => $exception instanceof \Illuminate\Http\Client\ConnectionException, throw: false)
|
|
|
|
|
->withHeaders($headers)
|
|
|
|
|
->get($item->canonicalUrl);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
foreach ($pending as $externalId => $item) {
|
|
|
|
|
$response = $responses[$externalId] ?? null;
|
|
|
|
|
if ($response && $response->successful()) {
|
|
|
|
|
$body = (string) $response->body();
|
|
|
|
|
$body = $this->responseBodyFromPoolResult($responses[$externalId] ?? null);
|
|
|
|
|
if ($body !== null) {
|
|
|
|
|
$email = $this->extractEmailFromProfileHtml($body);
|
|
|
|
|
if ($email) {
|
|
|
|
|
$item = $this->applyEmailToItem($item, $email);
|
|
|
|
|
@ -105,6 +107,15 @@ class FacultyListHtmlAdapter implements CrawlerAdapterInterface
|
|
|
|
|
return $enriched;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function responseBodyFromPoolResult(mixed $result): ?string
|
|
|
|
|
{
|
|
|
|
|
if ($result instanceof Response && $result->successful()) {
|
|
|
|
|
return (string) $result->body();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function itemHasEmail(CrawlItemDto $item): bool
|
|
|
|
|
{
|
|
|
|
|
$lead = $item->extra['lead_author'] ?? null;
|
|
|
|
|
|