You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.7 KiB

1 week ago
<?php
namespace Database\Seeders;
use App\Models\CrawlSource;
use Illuminate\Database\Seeder;
class CrawlSourcesSeeder extends Seeder
{
public function run(): void
{
$paperSchema = [
'fields' => [
[
'key' => 'keyword',
'type' => 'textarea',
'label' => '搜索关键词',
'required' => true,
'placeholder' => '多个关键词用逗号或换行分隔graph neural, AI',
],
[
'key' => 'max_results',
'type' => 'number',
'label' => '条数上限',
'default' => 20,
'min' => 1,
'max' => 50,
],
],
];
CrawlSource::query()->updateOrCreate(
['adapter_code' => 'arxiv_api', 'target_type' => 'paper'],
[
'name' => 'arXiv',
'entry_url' => 'https://arxiv.org/',
'match_domains' => ['arxiv.org', 'export.arxiv.org'],
'config' => ['api_base' => 'https://export.arxiv.org/api/query'],
'param_schema' => $paperSchema,
'status' => 1,
'sort' => 10,
]
);
$newsSchema = [
'fields' => [
[
'key' => 'keyword',
'type' => 'textarea',
'label' => '搜索关键词',
'required' => true,
'placeholder' => '多个关键词用空格、逗号或换行分隔,如:融资 科创板 AI',
],
[
'key' => 'max_results',
'type' => 'number',
'label' => '条数上限',
'default' => 30,
'min' => 1,
'max' => 50,
],
],
];
CrawlSource::query()->updateOrCreate(
['adapter_code' => 'pedaily_html', 'target_type' => 'industry_news'],
[
'name' => '投资界',
'entry_url' => 'https://www.pedaily.cn/all/',
'match_domains' => ['pedaily.cn', 'www.pedaily.cn'],
'config' => [],
'param_schema' => $newsSchema,
'status' => 1,
'sort' => 20,
]
);
$teacherSchema = [
'fields' => [
[
'key' => 'keyword',
'type' => 'textarea',
'label' => '搜索关键词(选填)',
'required' => false,
'placeholder' => '留空则抓取页面内全部邮箱条目;填写则在姓名/院系/研究方向等文本中过滤',
],
[
'key' => 'max_results',
'type' => 'number',
'label' => '条数上限',
'default' => 30,
'min' => 1,
'max' => 100,
],
],
];
CrawlSource::query()->updateOrCreate(
['adapter_code' => 'faculty_list_html', 'target_type' => 'teacher'],
[
'name' => '师资列表页(通用 HTML',
'entry_url' => 'https://',
'match_domains' => ['*'],
'config' => [],
'param_schema' => $teacherSchema,
'status' => 1,
'sort' => 30,
]
);
$this->command?->info('采集源 arXiv / 投资界 / 师资列表 已写入。');
}
}