You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.7 KiB

1 day ago
<?php
namespace Tests\Unit;
use App\Models\CrawlAddress;
1 day ago
use App\Models\CrawlSource;
use App\Services\Crawl\CrawlSourceResolver;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Tests\TestCase;
class CrawlSourceResolverTest extends TestCase
{
use RefreshDatabase;
public function test_resolves_ai_sjtu_center_to_research_center_api(): void
{
CrawlSource::query()->create([
'name' => '师资列表页(通用 HTML',
'target_type' => 'teacher',
'adapter_code' => 'faculty_list_html',
'entry_url' => 'https://',
'match_domains' => ['*'],
'status' => 1,
'sort' => 30,
]);
CrawlSource::query()->create([
'name' => '交大人工智能研究院研究中心',
'target_type' => 'teacher',
'adapter_code' => 'ai_sjtu_research_center_api',
'entry_url' => 'https://ai.sjtu.edu.cn/center',
'match_domains' => ['ai.sjtu.edu.cn'],
'status' => 1,
'sort' => 25,
]);
$resolver = app(CrawlSourceResolver::class);
1 day ago
$source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher');
$this->assertNotNull($source);
$this->assertSame('ai_sjtu_research_center_api', $source->adapter_code);
}
public function test_prefers_crawl_address_bound_source_over_wildcard(): void
{
$wildcard = CrawlSource::query()->create([
'name' => '师资列表页(通用 HTML',
'target_type' => 'teacher',
'adapter_code' => 'faculty_list_html',
'entry_url' => 'https://',
'match_domains' => ['*'],
'status' => 1,
'sort' => 30,
]);
$aiSource = CrawlSource::query()->create([
'name' => '交大人工智能研究院研究中心',
'target_type' => 'teacher',
'adapter_code' => 'ai_sjtu_research_center_api',
'entry_url' => 'https://ai.sjtu.edu.cn/center',
'match_domains' => ['ai.sjtu.edu.cn'],
'status' => 1,
'sort' => 25,
]);
CrawlAddress::query()->create([
'target_type' => 'teacher',
'name' => '交大 AI 中心',
'request_url' => 'https://ai.sjtu.edu.cn/center',
'crawl_source_id' => $aiSource->id,
'status' => 1,
]);
$resolver = app(CrawlSourceResolver::class);
$source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher');
$this->assertSame('ai_sjtu_research_center_api', $source?->adapter_code);
$this->assertNotSame($wildcard->id, $source?->id);
}
1 day ago
}