[ [ 'key' => 'keyword', 'type' => 'textarea', 'label' => '搜索关键词(选填)', 'required' => false, 'placeholder' => '多个关键词用逗号或换行分隔,如:graph neural, AI', ], [ 'key' => 'max_results', 'type' => 'number', 'label' => '条数上限', 'default' => 20, 'min' => 1, 'max' => 50, ], ], ]; CrawlSource::query()->updateOrCreate( ['adapter_code' => 'arxiv_api', 'target_type' => 'paper'], [ 'name' => 'arXiv', 'entry_url' => 'https://arxiv.org/', 'match_domains' => ['arxiv.org', 'export.arxiv.org'], 'config' => ['api_base' => 'https://export.arxiv.org/api/query'], 'param_schema' => $paperSchema, 'status' => 1, 'sort' => 10, ] ); $newsSchema = [ 'fields' => [ [ 'key' => 'keyword', 'type' => 'textarea', 'label' => '搜索关键词(选填)', 'required' => false, 'placeholder' => '多个关键词用空格、逗号或换行分隔,如:融资 科创板 AI', ], [ 'key' => 'max_pages', 'type' => 'number', 'label' => '抓取页数', 'default' => 5, 'min' => 1, 'max' => 50, 'placeholder' => '列表分页时连续抓取多页', ], [ 'key' => 'max_results', 'type' => 'number', 'label' => '条数上限', 'default' => 30, 'min' => 1, 'max' => 50, ], ], ]; CrawlSource::query()->updateOrCreate( ['adapter_code' => 'pedaily_html', 'target_type' => 'industry_news'], [ 'name' => '投资界', 'entry_url' => 'https://www.pedaily.cn/all/', 'match_domains' => ['pedaily.cn', 'www.pedaily.cn', '*.pedaily.cn'], 'config' => [], 'param_schema' => $newsSchema, 'status' => 1, 'sort' => 20, ] ); CrawlSource::query()->updateOrCreate( ['adapter_code' => 'generic_news_html', 'target_type' => 'industry_news'], [ 'name' => '通用资讯 HTML', 'entry_url' => 'https://', 'match_domains' => ['*'], 'config' => [], 'param_schema' => $newsSchema, 'status' => 1, 'sort' => 100, ] ); CrawlSource::query()->updateOrCreate( ['adapter_code' => 'generic_paper_html', 'target_type' => 'paper'], [ 'name' => '通用论文 HTML', 'entry_url' => 'https://', 'match_domains' => ['*'], 'config' => [], 'param_schema' => $paperSchema, 'status' => 1, 'sort' => 100, ] ); $teacherSchema = [ 'fields' => [ [ 'key' => 'keyword', 'type' => 'textarea', 'label' => '搜索关键词(选填)', 'required' => false, 'placeholder' => '多个关键词用空格、逗号或换行分隔', ], [ 'key' => 'max_pages', 'type' => 'number', 'label' => '抓取页数', 'default' => 5, 'min' => 1, 'max' => 50, 'placeholder' => '列表分页时连续抓取多页', ], [ 'key' => 'max_results', 'type' => 'number', 'label' => '条数上限', 'default' => 200, 'min' => 1, 'max' => 500, ], ], ]; CrawlSource::query()->updateOrCreate( ['adapter_code' => 'faculty_list_html', 'target_type' => 'teacher'], [ 'name' => '师资列表页(通用 HTML)', 'entry_url' => 'https://', 'match_domains' => ['*'], 'config' => [], 'param_schema' => $teacherSchema, 'status' => 1, 'sort' => 30, ] ); $this->command?->info('采集源 arXiv / 投资界 / 通用 HTML / 师资列表 已写入。'); } }