|
|
<?php
|
|
|
|
|
|
namespace Tests\Unit;
|
|
|
|
|
|
use App\Models\CrawlAddress;
|
|
|
use App\Models\CrawlSource;
|
|
|
use App\Services\Crawl\CrawlSourceResolver;
|
|
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
|
|
use Tests\TestCase;
|
|
|
|
|
|
class CrawlSourceResolverTest extends TestCase
|
|
|
{
|
|
|
use RefreshDatabase;
|
|
|
|
|
|
public function test_resolves_ai_sjtu_center_to_research_center_api(): void
|
|
|
{
|
|
|
CrawlSource::query()->create([
|
|
|
'name' => '师资列表页(通用 HTML)',
|
|
|
'target_type' => 'teacher',
|
|
|
'adapter_code' => 'faculty_list_html',
|
|
|
'entry_url' => 'https://',
|
|
|
'match_domains' => ['*'],
|
|
|
'status' => 1,
|
|
|
'sort' => 30,
|
|
|
]);
|
|
|
|
|
|
CrawlSource::query()->create([
|
|
|
'name' => '交大人工智能研究院研究中心',
|
|
|
'target_type' => 'teacher',
|
|
|
'adapter_code' => 'ai_sjtu_research_center_api',
|
|
|
'entry_url' => 'https://ai.sjtu.edu.cn/center',
|
|
|
'match_domains' => ['ai.sjtu.edu.cn'],
|
|
|
'status' => 1,
|
|
|
'sort' => 25,
|
|
|
]);
|
|
|
|
|
|
$resolver = app(CrawlSourceResolver::class);
|
|
|
$source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher');
|
|
|
|
|
|
$this->assertNotNull($source);
|
|
|
$this->assertSame('ai_sjtu_research_center_api', $source->adapter_code);
|
|
|
}
|
|
|
|
|
|
public function test_prefers_crawl_address_bound_source_over_wildcard(): void
|
|
|
{
|
|
|
$wildcard = CrawlSource::query()->create([
|
|
|
'name' => '师资列表页(通用 HTML)',
|
|
|
'target_type' => 'teacher',
|
|
|
'adapter_code' => 'faculty_list_html',
|
|
|
'entry_url' => 'https://',
|
|
|
'match_domains' => ['*'],
|
|
|
'status' => 1,
|
|
|
'sort' => 30,
|
|
|
]);
|
|
|
|
|
|
$aiSource = CrawlSource::query()->create([
|
|
|
'name' => '交大人工智能研究院研究中心',
|
|
|
'target_type' => 'teacher',
|
|
|
'adapter_code' => 'ai_sjtu_research_center_api',
|
|
|
'entry_url' => 'https://ai.sjtu.edu.cn/center',
|
|
|
'match_domains' => ['ai.sjtu.edu.cn'],
|
|
|
'status' => 1,
|
|
|
'sort' => 25,
|
|
|
]);
|
|
|
|
|
|
CrawlAddress::query()->create([
|
|
|
'target_type' => 'teacher',
|
|
|
'name' => '交大 AI 中心',
|
|
|
'request_url' => 'https://ai.sjtu.edu.cn/center',
|
|
|
'crawl_source_id' => $aiSource->id,
|
|
|
'status' => 1,
|
|
|
]);
|
|
|
|
|
|
$resolver = app(CrawlSourceResolver::class);
|
|
|
$source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher');
|
|
|
|
|
|
$this->assertSame('ai_sjtu_research_center_api', $source?->adapter_code);
|
|
|
$this->assertNotSame($wildcard->id, $source?->id);
|
|
|
}
|
|
|
}
|