|
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
|
|
namespace Database\Seeders;
|
|
|
|
|
|
|
|
|
|
|
|
use App\Models\CrawlSource;
|
|
|
|
|
|
use Illuminate\Database\Seeder;
|
|
|
|
|
|
|
|
|
|
|
|
class CrawlSourcesSeeder extends Seeder
|
|
|
|
|
|
{
|
|
|
|
|
|
public function run(): void
|
|
|
|
|
|
{
|
|
|
|
|
|
$paperSchema = [
|
|
|
|
|
|
'fields' => [
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'keyword',
|
|
|
|
|
|
'type' => 'textarea',
|
|
|
|
|
|
'label' => '搜索关键词',
|
|
|
|
|
|
'required' => true,
|
|
|
|
|
|
'placeholder' => '多个关键词用逗号或换行分隔,如:graph neural, AI',
|
|
|
|
|
|
],
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'max_results',
|
|
|
|
|
|
'type' => 'number',
|
|
|
|
|
|
'label' => '条数上限',
|
|
|
|
|
|
'default' => 20,
|
|
|
|
|
|
'min' => 1,
|
|
|
|
|
|
'max' => 50,
|
|
|
|
|
|
],
|
|
|
|
|
|
],
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
CrawlSource::query()->updateOrCreate(
|
|
|
|
|
|
['adapter_code' => 'arxiv_api', 'target_type' => 'paper'],
|
|
|
|
|
|
[
|
|
|
|
|
|
'name' => 'arXiv',
|
|
|
|
|
|
'entry_url' => 'https://arxiv.org/',
|
|
|
|
|
|
'match_domains' => ['arxiv.org', 'export.arxiv.org'],
|
|
|
|
|
|
'config' => ['api_base' => 'https://export.arxiv.org/api/query'],
|
|
|
|
|
|
'param_schema' => $paperSchema,
|
|
|
|
|
|
'status' => 1,
|
|
|
|
|
|
'sort' => 10,
|
|
|
|
|
|
]
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
$newsSchema = [
|
|
|
|
|
|
'fields' => [
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'keyword',
|
|
|
|
|
|
'type' => 'textarea',
|
|
|
|
|
|
'label' => '搜索关键词',
|
|
|
|
|
|
'required' => true,
|
|
|
|
|
|
'placeholder' => '多个关键词用空格、逗号或换行分隔,如:融资 科创板 AI',
|
|
|
|
|
|
],
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'max_results',
|
|
|
|
|
|
'type' => 'number',
|
|
|
|
|
|
'label' => '条数上限',
|
|
|
|
|
|
'default' => 30,
|
|
|
|
|
|
'min' => 1,
|
|
|
|
|
|
'max' => 50,
|
|
|
|
|
|
],
|
|
|
|
|
|
],
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
CrawlSource::query()->updateOrCreate(
|
|
|
|
|
|
['adapter_code' => 'pedaily_html', 'target_type' => 'industry_news'],
|
|
|
|
|
|
[
|
|
|
|
|
|
'name' => '投资界',
|
|
|
|
|
|
'entry_url' => 'https://www.pedaily.cn/all/',
|
|
|
|
|
|
'match_domains' => ['pedaily.cn', 'www.pedaily.cn'],
|
|
|
|
|
|
'config' => [],
|
|
|
|
|
|
'param_schema' => $newsSchema,
|
|
|
|
|
|
'status' => 1,
|
|
|
|
|
|
'sort' => 20,
|
|
|
|
|
|
]
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
$teacherSchema = [
|
|
|
|
|
|
'fields' => [
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'keyword',
|
|
|
|
|
|
'type' => 'textarea',
|
|
|
|
|
|
'label' => '搜索关键词(选填)',
|
|
|
|
|
|
'required' => false,
|
|
|
|
|
|
'placeholder' => '留空则抓取页面内全部邮箱条目;填写则在姓名/院系/研究方向等文本中过滤',
|
|
|
|
|
|
],
|
|
|
|
|
|
[
|
|
|
|
|
|
'key' => 'max_results',
|
|
|
|
|
|
'type' => 'number',
|
|
|
|
|
|
'label' => '条数上限',
|
|
|
|
|
|
'default' => 30,
|
|
|
|
|
|
'min' => 1,
|
|
|
|
|
|
'max' => 100,
|
|
|
|
|
|
],
|
|
|
|
|
|
],
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
CrawlSource::query()->updateOrCreate(
|
|
|
|
|
|
['adapter_code' => 'faculty_list_html', 'target_type' => 'teacher'],
|
|
|
|
|
|
[
|
|
|
|
|
|
'name' => '师资列表页(通用 HTML)',
|
|
|
|
|
|
'entry_url' => 'https://',
|
|
|
|
|
|
'match_domains' => ['*'],
|
|
|
|
|
|
'config' => [],
|
|
|
|
|
|
'param_schema' => $teacherSchema,
|
|
|
|
|
|
'status' => 1,
|
|
|
|
|
|
'sort' => 30,
|
|
|
|
|
|
]
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
$this->command?->info('采集源 arXiv / 投资界 / 师资列表 已写入。');
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|