From 9050460fc4aca031efed44fb71bf6b9d7ffdf23a Mon Sep 17 00:00:00 2001 From: lion <120344285@qq.com> Date: Mon, 22 Jun 2026 18:12:52 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api/admin/assets.ts | 1 + src/api/admin/crawl-addresses.ts | 72 ++++ src/views/assets/crawler/index.vue | 133 ++++++- src/views/system/crawl-addresses/index.vue | 341 ++++++++++++++++++ .../components/TeacherDetailDialog.vue | 13 + 5 files changed, 547 insertions(+), 13 deletions(-) create mode 100644 src/api/admin/crawl-addresses.ts create mode 100644 src/views/system/crawl-addresses/index.vue diff --git a/src/api/admin/assets.ts b/src/api/admin/assets.ts index 9505663..ff14216 100644 --- a/src/api/admin/assets.ts +++ b/src/api/admin/assets.ts @@ -213,6 +213,7 @@ export async function triggerCrawlJob(payload: { params?: Record teacher_defaults?: { university_id?: number + department?: string city?: string research_direction_ids?: number[] } diff --git a/src/api/admin/crawl-addresses.ts b/src/api/admin/crawl-addresses.ts new file mode 100644 index 0000000..9c6f3e4 --- /dev/null +++ b/src/api/admin/crawl-addresses.ts @@ -0,0 +1,72 @@ +import { http } from '@/utils/http' +import type { ApiBody, Paginated } from '@/api/types' + +export type CrawlAddressTargetType = 'paper' | 'industry_news' | 'teacher' + +export interface CrawlAddressRow { + id: number + target_type: CrawlAddressTargetType + name: string + request_url: string + keyword?: string | null + category_dict_item_id?: number | null + category_label?: string | null + university_id?: number | null + university_name?: string | null + department?: string | null + sort: number + status: number + created_at?: string | null +} + +export async function fetchCrawlAddresses(params: Record) { + const { data } = await http.get>>('/admin/v1/crawl-addresses', { + params, + }) + return data.data +} + +export async function fetchCrawlAddressOptions(target_type?: CrawlAddressTargetType) { + const { data } = await http.get>( + '/admin/v1/crawl-addresses/options', + { params: target_type ? { target_type } : undefined }, + ) + return data.data.items +} + +export async function createCrawlAddress(payload: { + target_type: CrawlAddressTargetType + name: string + request_url: string + keyword?: string | null + category_dict_item_id?: number | null + university_id?: number | null + department?: string | null + sort?: number + status: number +}) { + const { data } = await http.post>('/admin/v1/crawl-addresses', payload) + return data.data +} + +export async function updateCrawlAddress( + id: number, + payload: Partial<{ + target_type: CrawlAddressTargetType + name: string + request_url: string + keyword?: string | null + category_dict_item_id?: number | null + university_id?: number | null + sort: number + status: number + }>, +) { + const { data } = await http.put>(`/admin/v1/crawl-addresses/${id}`, payload) + return data.data +} + +export async function deleteCrawlAddress(id: number) { + const { data } = await http.delete>(`/admin/v1/crawl-addresses/${id}`) + return data +} diff --git a/src/views/assets/crawler/index.vue b/src/views/assets/crawler/index.vue index 4427f05..e2e815d 100644 --- a/src/views/assets/crawler/index.vue +++ b/src/views/assets/crawler/index.vue @@ -18,6 +18,10 @@ import { type WeeklyBriefRow, } from '@/api/admin/assets' import { fetchDictByCode } from '@/api/admin/dict' +import { + fetchCrawlAddressOptions, + type CrawlAddressRow, +} from '@/api/admin/crawl-addresses' import { ElMessage } from 'element-plus' type TargetType = 'paper' | 'industry_news' | 'teacher' @@ -40,6 +44,12 @@ const teacherLeadItems = ref([]) const teacherItems = ref([]) const resultLoading = ref(false) const newsCategoryOptions = ref([]) +const crawlAddressOptions = ref([]) +const selectedCrawlDefaults = ref<{ + category_dict_item_id?: number + university_id?: number + department?: string +}>({}) const briefLoading = ref(false) const briefGenerating = ref(false) const briefItems = ref([]) @@ -62,13 +72,22 @@ const BUILTIN_PARAM_FIELDS: Record<'paper' | 'industry_news', CrawlParamField[]> required: false, placeholder: '多个关键词用逗号或换行分隔,如:graph neural, AI', }, + { + key: 'max_pages', + type: 'number', + label: '抓取页数', + default: 1, + min: 1, + max: 20, + placeholder: 'arXiv 按提交时间分页,每页 50 条', + }, { key: 'max_results', type: 'number', label: '条数上限', - default: 20, + default: 50, min: 1, - max: 50, + max: 200, }, ], industry_news: [ @@ -220,6 +239,38 @@ function normalizeRequestUrl(url: string): string { return u } +function syncFromCrawlAddress(url: string, options?: { fillKeyword?: boolean }) { + const normalized = normalizeRequestUrl(url) + const matched = crawlAddressOptions.value.find( + (a) => normalizeRequestUrl(a.request_url) === normalized, + ) + if (matched) { + if (options?.fillKeyword && matched.keyword) { + form.value.params.keyword = matched.keyword + } + selectedCrawlDefaults.value = {} + if (matched.category_dict_item_id) { + selectedCrawlDefaults.value.category_dict_item_id = matched.category_dict_item_id + } + if (matched.university_id) { + selectedCrawlDefaults.value.university_id = matched.university_id + } + if (matched.department) { + selectedCrawlDefaults.value.department = matched.department + } + } else { + selectedCrawlDefaults.value = {} + } +} + +async function loadCrawlAddresses(type: TargetType) { + try { + crawlAddressOptions.value = await fetchCrawlAddressOptions(type) + } catch { + crawlAddressOptions.value = [] + } +} + function clearCrawlResult() { lastResult.value = null resultDialog.value = false @@ -228,7 +279,7 @@ function clearCrawlResult() { teacherItems.value = [] } -function resetPage() { +async function resetPage() { clearCrawlResult() previewTab.value = 'paper' submitting.value = false @@ -239,8 +290,10 @@ function resetPage() { } resolved.value = null resolvedRequestUrl.value = null + selectedCrawlDefaults.value = {} paramFields.value = [] ensureBuiltinParams('paper') + await loadCrawlAddresses('paper') void initResolve() } @@ -313,7 +366,15 @@ async function ensureResolved(): Promise { return !!resolved.value } -async function onUrlBlur() { +async function onRequestUrlChange() { + if (form.value.request_url.trim()) { + const matched = crawlAddressOptions.value.some( + (a) => normalizeRequestUrl(a.request_url) === normalizeRequestUrl(form.value.request_url), + ) + syncFromCrawlAddress(form.value.request_url, { fillKeyword: matched }) + } else { + selectedCrawlDefaults.value = {} + } await initResolve() } @@ -323,13 +384,15 @@ watch( clearCrawlResult() resolved.value = null resolvedRequestUrl.value = null + selectedCrawlDefaults.value = {} paramFields.value = [] form.value.request_url = defaultUrl(type) if (type === 'teacher') { ensureTeacherParams() - } else if (type === 'paper' || type === 'industry_news') { + } else if (type === 'paper' || type === 'industry_news') { applyBuiltinParamFields(type, false) } + void loadCrawlAddresses(type) void initResolve() }, ) @@ -341,11 +404,33 @@ async function onSubmit() { } submitting.value = true try { - lastResult.value = await triggerCrawlJob({ + syncFromCrawlAddress(form.value.request_url) + const payload: Parameters[0] = { target_type: form.value.target_type, request_url: form.value.request_url, params: form.value.params, - }) + } + if ( + form.value.target_type === 'industry_news' && + selectedCrawlDefaults.value.category_dict_item_id + ) { + payload.news_defaults = { + category_dict_item_id: selectedCrawlDefaults.value.category_dict_item_id, + } + } + if (form.value.target_type === 'teacher') { + const teacherDefaults: NonNullable[0]['teacher_defaults']> = {} + if (selectedCrawlDefaults.value.university_id) { + teacherDefaults.university_id = selectedCrawlDefaults.value.university_id + } + if (selectedCrawlDefaults.value.department) { + teacherDefaults.department = selectedCrawlDefaults.value.department + } + if (Object.keys(teacherDefaults).length > 0) { + payload.teacher_defaults = teacherDefaults + } + } + lastResult.value = await triggerCrawlJob(payload) ElMessage.success(lastResult.value.result_summary || `抓取完成,已入库 ${lastResult.value.items_imported ?? 0} 条`) } finally { submitting.value = false @@ -500,12 +585,23 @@ usePageLoad(async () => { - + filterable + allow-create + default-first-option + clearable + placeholder="从爬虫地址选择或输入 URL" + class="crawl-url-select" + @change="onRequestUrlChange" + > + +

正在识别采集源…

已识别采集源:{{ resolved.source_name }} @@ -525,6 +621,14 @@ usePageLoad(async () => { " /> + + +

arXiv 按提交时间倒序,每页约 50 条;增大页数可抓取更早论文

+
{ @@ -777,6 +881,9 @@ usePageLoad(async () => { diff --git a/src/views/teachers/components/TeacherDetailDialog.vue b/src/views/teachers/components/TeacherDetailDialog.vue index d55e4a0..4178c1b 100644 --- a/src/views/teachers/components/TeacherDetailDialog.vue +++ b/src/views/teachers/components/TeacherDetailDialog.vue @@ -140,6 +140,18 @@ async function refreshDirectionOptions() { : filters.research_directions } +function mergeTeacherDirectionOptions(detail: { + research_directions?: { id: number; name: string }[] +}) { + const known = new Set(directionOptions.value.map((d) => d.id)) + for (const d of detail.research_directions || []) { + if (!known.has(d.id)) { + directionOptions.value.push({ id: d.id, name: d.name }) + known.add(d.id) + } + } +} + function openUniversityCreate() { universityForm.value = { name: '', @@ -197,6 +209,7 @@ async function load() { const detail = await fetchTeacher(props.teacherId) teacher.value = detail loadedStarId.value = detail.star_level_dict_item_id ?? null + mergeTeacherDirectionOptions(detail) form.value = { name: detail.name || '', university_id: detail.university_id ?? undefined,