You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.2 KiB
77 lines
2.2 KiB
<?php
|
|
|
|
namespace Tests\Unit;
|
|
|
|
use App\Models\CrawlSource;
|
|
use App\Services\Crawl\Adapters\ArxivApiAdapter;
|
|
use App\Services\Crawl\ArxivAbsEnricher;
|
|
use App\Services\Crawl\ArxivRequestGate;
|
|
use Illuminate\Support\Facades\Http;
|
|
use Tests\TestCase;
|
|
|
|
class ArxivApiAdapterTest extends TestCase
|
|
{
|
|
public function test_fetches_papers_from_category_list_html_when_api_empty(): void
|
|
{
|
|
Http::fake([
|
|
'export.arxiv.org/*' => Http::response('Rate exceeded.', 200),
|
|
'arxiv.org/list/cs/new*' => Http::response($this->sampleListHtml(), 200),
|
|
]);
|
|
|
|
$enricher = $this->createMock(ArxivAbsEnricher::class);
|
|
$enricher->method('enrichMany')->willReturnCallback(fn (array $items) => $items);
|
|
|
|
$adapter = new ArxivApiAdapter(
|
|
new ArxivRequestGate(0),
|
|
$enricher,
|
|
);
|
|
$source = new CrawlSource([
|
|
'adapter_code' => 'arxiv_api',
|
|
'target_type' => 'paper',
|
|
]);
|
|
|
|
$items = $adapter->fetch('https://arxiv.org/list/cs/new', $source, [
|
|
'max_results' => 5,
|
|
'max_pages' => 1,
|
|
'skip_imported' => false,
|
|
]);
|
|
|
|
$this->assertCount(2, $items);
|
|
$this->assertSame('arxiv:2606.23690', $items[0]->externalId);
|
|
$this->assertSame('Sample CS Paper One', $items[0]->title);
|
|
$this->assertSame('list_html', $items[0]->extra['source']);
|
|
}
|
|
|
|
protected function sampleListHtml(): string
|
|
{
|
|
return <<<'HTML'
|
|
<dl>
|
|
<dt>
|
|
<a href ="/abs/2606.23690" title="Abstract" id="2606.23690">arXiv:2606.23690</a>
|
|
</dt>
|
|
<dd>
|
|
<div class='meta'>
|
|
<div class='list-title mathjax'><span class='descriptor'>Title:</span>
|
|
Sample CS Paper One
|
|
</div>
|
|
<div class='list-authors'><a href="#">Alice Author</a></div>
|
|
<p class='mathjax'>Abstract one.</p>
|
|
</div>
|
|
</dd>
|
|
<dt>
|
|
<a href ="/abs/2606.23691" title="Abstract" id="2606.23691">arXiv:2606.23691</a>
|
|
</dt>
|
|
<dd>
|
|
<div class='meta'>
|
|
<div class='list-title mathjax'><span class='descriptor'>Title:</span>
|
|
Sample CS Paper Two
|
|
</div>
|
|
<div class='list-authors'><a href="#">Bob Author</a></div>
|
|
<p class='mathjax'>Abstract two.</p>
|
|
</div>
|
|
</dd>
|
|
</dl>
|
|
HTML;
|
|
}
|
|
}
|