master
lion 3 days ago
parent 031d9e57f6
commit 9050460fc4

@ -213,6 +213,7 @@ export async function triggerCrawlJob(payload: {
params?: Record<string, unknown>
teacher_defaults?: {
university_id?: number
department?: string
city?: string
research_direction_ids?: number[]
}

@ -0,0 +1,72 @@
import { http } from '@/utils/http'
import type { ApiBody, Paginated } from '@/api/types'
export type CrawlAddressTargetType = 'paper' | 'industry_news' | 'teacher'
export interface CrawlAddressRow {
id: number
target_type: CrawlAddressTargetType
name: string
request_url: string
keyword?: string | null
category_dict_item_id?: number | null
category_label?: string | null
university_id?: number | null
university_name?: string | null
department?: string | null
sort: number
status: number
created_at?: string | null
}
export async function fetchCrawlAddresses(params: Record<string, unknown>) {
const { data } = await http.get<ApiBody<Paginated<CrawlAddressRow>>>('/admin/v1/crawl-addresses', {
params,
})
return data.data
}
export async function fetchCrawlAddressOptions(target_type?: CrawlAddressTargetType) {
const { data } = await http.get<ApiBody<{ items: CrawlAddressRow[] }>>(
'/admin/v1/crawl-addresses/options',
{ params: target_type ? { target_type } : undefined },
)
return data.data.items
}
export async function createCrawlAddress(payload: {
target_type: CrawlAddressTargetType
name: string
request_url: string
keyword?: string | null
category_dict_item_id?: number | null
university_id?: number | null
department?: string | null
sort?: number
status: number
}) {
const { data } = await http.post<ApiBody<CrawlAddressRow>>('/admin/v1/crawl-addresses', payload)
return data.data
}
export async function updateCrawlAddress(
id: number,
payload: Partial<{
target_type: CrawlAddressTargetType
name: string
request_url: string
keyword?: string | null
category_dict_item_id?: number | null
university_id?: number | null
sort: number
status: number
}>,
) {
const { data } = await http.put<ApiBody<CrawlAddressRow>>(`/admin/v1/crawl-addresses/${id}`, payload)
return data.data
}
export async function deleteCrawlAddress(id: number) {
const { data } = await http.delete<ApiBody<null>>(`/admin/v1/crawl-addresses/${id}`)
return data
}

@ -18,6 +18,10 @@ import {
type WeeklyBriefRow,
} from '@/api/admin/assets'
import { fetchDictByCode } from '@/api/admin/dict'
import {
fetchCrawlAddressOptions,
type CrawlAddressRow,
} from '@/api/admin/crawl-addresses'
import { ElMessage } from 'element-plus'
type TargetType = 'paper' | 'industry_news' | 'teacher'
@ -40,6 +44,12 @@ const teacherLeadItems = ref<CrawlJobItemRow[]>([])
const teacherItems = ref<CrawlJobItemRow[]>([])
const resultLoading = ref(false)
const newsCategoryOptions = ref<NewsCategoryOpt[]>([])
const crawlAddressOptions = ref<CrawlAddressRow[]>([])
const selectedCrawlDefaults = ref<{
category_dict_item_id?: number
university_id?: number
department?: string
}>({})
const briefLoading = ref(false)
const briefGenerating = ref(false)
const briefItems = ref<WeeklyBriefRow[]>([])
@ -62,13 +72,22 @@ const BUILTIN_PARAM_FIELDS: Record<'paper' | 'industry_news', CrawlParamField[]>
required: false,
placeholder: '多个关键词用逗号或换行分隔graph neural, AI',
},
{
key: 'max_pages',
type: 'number',
label: '抓取页数',
default: 1,
min: 1,
max: 20,
placeholder: 'arXiv 按提交时间分页,每页 50 条',
},
{
key: 'max_results',
type: 'number',
label: '条数上限',
default: 20,
default: 50,
min: 1,
max: 50,
max: 200,
},
],
industry_news: [
@ -220,6 +239,38 @@ function normalizeRequestUrl(url: string): string {
return u
}
function syncFromCrawlAddress(url: string, options?: { fillKeyword?: boolean }) {
const normalized = normalizeRequestUrl(url)
const matched = crawlAddressOptions.value.find(
(a) => normalizeRequestUrl(a.request_url) === normalized,
)
if (matched) {
if (options?.fillKeyword && matched.keyword) {
form.value.params.keyword = matched.keyword
}
selectedCrawlDefaults.value = {}
if (matched.category_dict_item_id) {
selectedCrawlDefaults.value.category_dict_item_id = matched.category_dict_item_id
}
if (matched.university_id) {
selectedCrawlDefaults.value.university_id = matched.university_id
}
if (matched.department) {
selectedCrawlDefaults.value.department = matched.department
}
} else {
selectedCrawlDefaults.value = {}
}
}
async function loadCrawlAddresses(type: TargetType) {
try {
crawlAddressOptions.value = await fetchCrawlAddressOptions(type)
} catch {
crawlAddressOptions.value = []
}
}
function clearCrawlResult() {
lastResult.value = null
resultDialog.value = false
@ -228,7 +279,7 @@ function clearCrawlResult() {
teacherItems.value = []
}
function resetPage() {
async function resetPage() {
clearCrawlResult()
previewTab.value = 'paper'
submitting.value = false
@ -239,8 +290,10 @@ function resetPage() {
}
resolved.value = null
resolvedRequestUrl.value = null
selectedCrawlDefaults.value = {}
paramFields.value = []
ensureBuiltinParams('paper')
await loadCrawlAddresses('paper')
void initResolve()
}
@ -313,7 +366,15 @@ async function ensureResolved(): Promise<boolean> {
return !!resolved.value
}
async function onUrlBlur() {
async function onRequestUrlChange() {
if (form.value.request_url.trim()) {
const matched = crawlAddressOptions.value.some(
(a) => normalizeRequestUrl(a.request_url) === normalizeRequestUrl(form.value.request_url),
)
syncFromCrawlAddress(form.value.request_url, { fillKeyword: matched })
} else {
selectedCrawlDefaults.value = {}
}
await initResolve()
}
@ -323,13 +384,15 @@ watch(
clearCrawlResult()
resolved.value = null
resolvedRequestUrl.value = null
selectedCrawlDefaults.value = {}
paramFields.value = []
form.value.request_url = defaultUrl(type)
if (type === 'teacher') {
ensureTeacherParams()
} else if (type === 'paper' || type === 'industry_news') {
} else if (type === 'paper' || type === 'industry_news') {
applyBuiltinParamFields(type, false)
}
void loadCrawlAddresses(type)
void initResolve()
},
)
@ -341,11 +404,33 @@ async function onSubmit() {
}
submitting.value = true
try {
lastResult.value = await triggerCrawlJob({
syncFromCrawlAddress(form.value.request_url)
const payload: Parameters<typeof triggerCrawlJob>[0] = {
target_type: form.value.target_type,
request_url: form.value.request_url,
params: form.value.params,
})
}
if (
form.value.target_type === 'industry_news' &&
selectedCrawlDefaults.value.category_dict_item_id
) {
payload.news_defaults = {
category_dict_item_id: selectedCrawlDefaults.value.category_dict_item_id,
}
}
if (form.value.target_type === 'teacher') {
const teacherDefaults: NonNullable<Parameters<typeof triggerCrawlJob>[0]['teacher_defaults']> = {}
if (selectedCrawlDefaults.value.university_id) {
teacherDefaults.university_id = selectedCrawlDefaults.value.university_id
}
if (selectedCrawlDefaults.value.department) {
teacherDefaults.department = selectedCrawlDefaults.value.department
}
if (Object.keys(teacherDefaults).length > 0) {
payload.teacher_defaults = teacherDefaults
}
}
lastResult.value = await triggerCrawlJob(payload)
ElMessage.success(lastResult.value.result_summary || `抓取完成,已入库 ${lastResult.value.items_imported ?? 0}`)
} finally {
submitting.value = false
@ -500,12 +585,23 @@ usePageLoad(async () => {
</el-form-item>
<el-form-item label="目标地址" required>
<el-input
<el-select
v-model="form.request_url"
type="url"
placeholder="https:// 资讯/论文列表页或单篇详情页"
@blur="onUrlBlur"
/>
filterable
allow-create
default-first-option
clearable
placeholder="从爬虫地址选择或输入 URL"
class="crawl-url-select"
@change="onRequestUrlChange"
>
<el-option
v-for="addr in crawlAddressOptions"
:key="addr.id"
:label="addr.name"
:value="addr.request_url"
/>
</el-select>
<p v-if="resolving" class="crawl-resolve-hint"></p>
<p v-else-if="resolved?.source_name" class="crawl-resolve-hint">
已识别采集源{{ resolved.source_name }}
@ -525,6 +621,14 @@ usePageLoad(async () => {
"
/>
</el-form-item>
<el-form-item v-if="form.target_type === 'paper'" label="抓取页数">
<el-input-number
v-model="form.params.max_pages as number"
:min="1"
:max="20"
/>
<p class="crawl-resolve-hint">arXiv 按提交时间倒序每页约 50 增大页数可抓取更早论文</p>
</el-form-item>
<el-form-item v-if="form.target_type === 'industry_news'" label="抓取页数">
<el-input-number
v-model="form.params.max_pages as number"
@ -536,7 +640,7 @@ usePageLoad(async () => {
<el-input-number
v-model="form.params.max_results as number"
:min="1"
:max="50"
:max="form.target_type === 'paper' ? 200 : 50"
/>
</el-form-item>
</template>
@ -777,6 +881,9 @@ usePageLoad(async () => {
</template>
<style scoped>
.crawl-url-select {
width: 100%;
}
.crawler-form {
max-width: 640px;
}

@ -0,0 +1,341 @@
<script setup lang="ts">
import PageTitle from '@/components/PageTitle.vue'
import { computed, ref } from 'vue'
import { usePageLoad } from '@/composables/usePageLoad'
import {
fetchCrawlAddresses,
createCrawlAddress,
updateCrawlAddress,
deleteCrawlAddress,
type CrawlAddressRow,
type CrawlAddressTargetType,
} from '@/api/admin/crawl-addresses'
import { fetchDictByCode } from '@/api/admin/dict'
import { fetchUniversities } from '@/api/admin/teachers'
import { enabledStatusClass } from '@/utils/admin-list'
import { ElMessage, ElMessageBox } from 'element-plus'
const TARGET_TYPE_LABELS: Record<CrawlAddressTargetType, string> = {
paper: '论文 → 论文库',
industry_news: '行业资讯 → 资讯管理',
teacher: '老师库 → 老师库',
}
const loading = ref(false)
const items = ref<CrawlAddressRow[]>([])
const meta = ref({ current_page: 1, per_page: 20, total: 0 })
const keyword = ref('')
const filterTargetType = ref<CrawlAddressTargetType | ''>('')
const page = ref(1)
const newsCategoryOptions = ref<{ id: number; label: string }[]>([])
const universityOptions = ref<{ id: number; name: string }[]>([])
const dialog = ref(false)
const editing = ref<CrawlAddressRow | null>(null)
const form = ref({
target_type: 'paper' as CrawlAddressTargetType,
name: '',
request_url: '',
keyword: '',
category_dict_item_id: null as number | null,
university_id: null as number | null,
department: '',
sort: 0,
status: 1,
})
const showCategoryField = computed(() => form.value.target_type === 'industry_news')
const showUniversityField = computed(() => form.value.target_type === 'teacher')
function targetTypeLabel(type: CrawlAddressTargetType) {
return TARGET_TYPE_LABELS[type] || type
}
async function loadOptionsForForm() {
if (newsCategoryOptions.value.length === 0) {
try {
const res = await fetchDictByCode('news_category')
newsCategoryOptions.value = res.items.map((o) => ({ id: o.id, label: o.label }))
} catch {
// ignore
}
}
if (universityOptions.value.length === 0) {
try {
const res = await fetchUniversities({ page: 1, page_size: 500, simple: 1 })
universityOptions.value = res.items.map((u) => ({ id: u.id, name: u.name }))
} catch {
// ignore
}
}
}
async function load() {
loading.value = true
try {
const res = await fetchCrawlAddresses({
page: page.value,
page_size: meta.value.per_page,
keyword: keyword.value || undefined,
target_type: filterTargetType.value || undefined,
})
items.value = res.items
meta.value = res.meta
} finally {
loading.value = false
}
}
function resetFormFieldsByType(type: CrawlAddressTargetType) {
if (type !== 'industry_news') {
form.value.category_dict_item_id = null
}
if (type !== 'teacher') {
form.value.university_id = null
form.value.department = ''
}
}
function openCreate() {
editing.value = null
form.value = {
target_type: 'paper',
name: '',
request_url: '',
keyword: '',
category_dict_item_id: null,
university_id: null,
department: '',
sort: 0,
status: 1,
}
dialog.value = true
void loadOptionsForForm()
}
function openEdit(row: CrawlAddressRow) {
editing.value = row
form.value = {
target_type: row.target_type,
name: row.name,
request_url: row.request_url,
keyword: row.keyword || '',
category_dict_item_id: row.category_dict_item_id ?? null,
university_id: row.university_id ?? null,
department: row.department || '',
sort: row.sort,
status: row.status,
}
dialog.value = true
void loadOptionsForForm()
}
async function save() {
if (!form.value.name.trim()) {
ElMessage.warning('请填写地址名称')
return
}
if (!form.value.request_url.trim()) {
ElMessage.warning('请填写目标地址')
return
}
const payload = {
target_type: form.value.target_type,
name: form.value.name.trim(),
request_url: form.value.request_url.trim(),
keyword: form.value.keyword.trim() || null,
category_dict_item_id: showCategoryField.value ? form.value.category_dict_item_id : null,
university_id: showUniversityField.value ? form.value.university_id : null,
department: showUniversityField.value ? form.value.department.trim() || null : null,
sort: form.value.sort,
status: form.value.status,
}
if (editing.value) {
await updateCrawlAddress(editing.value.id, payload)
} else {
await createCrawlAddress(payload)
}
ElMessage.success('已保存')
dialog.value = false
await load()
}
async function remove(row: CrawlAddressRow) {
await ElMessageBox.confirm(`确定删除爬虫地址「${row.name}」?`, '提示', { type: 'warning' })
await deleteCrawlAddress(row.id)
ElMessage.success('已删除')
await load()
}
function search() {
page.value = 1
void load()
}
function resetFilters() {
keyword.value = ''
filterTargetType.value = ''
page.value = 1
void load()
}
function onFormTargetTypeChange(type: CrawlAddressTargetType) {
resetFormFieldsByType(type)
}
usePageLoad(load)
</script>
<template>
<div class="list-page">
<div class="page-header">
<PageTitle />
<el-button type="primary" size="small" class="btn-create" @click="openCreate"></el-button>
</div>
<el-card shadow="never" class="admin-list-card">
<div class="list-filter-bar">
<el-select
v-model="filterTargetType"
clearable
placeholder="入库类型"
class="filter-select"
@change="search"
>
<el-option label="论文 → 论文库" value="paper" />
<el-option label="行业资讯 → 资讯管理" value="industry_news" />
<el-option label="老师库 → 老师库" value="teacher" />
</el-select>
<el-input
v-model="keyword"
placeholder="名称 / 地址 / 关键词"
clearable
class="filter-search"
@keyup.enter="search"
/>
<el-button type="primary" @click="search"></el-button>
<el-button @click="resetFilters"></el-button>
</div>
<el-table v-loading="loading" :data="items" row-key="id">
<el-table-column label="入库类型" width="160">
<template #default="{ row }">
{{ targetTypeLabel(row.target_type) }}
</template>
</el-table-column>
<el-table-column prop="name" label="地址名称" min-width="140" />
<el-table-column prop="request_url" label="目标地址" min-width="220" show-overflow-tooltip />
<el-table-column prop="keyword" label="关键词" min-width="120" show-overflow-tooltip />
<el-table-column label="资讯分类" width="120" show-overflow-tooltip>
<template #default="{ row }">
{{ row.category_label || '—' }}
</template>
</el-table-column>
<el-table-column label="高校" width="140" show-overflow-tooltip>
<template #default="{ row }">
{{ row.university_name || '—' }}
</template>
</el-table-column>
<el-table-column label="学院" width="160" show-overflow-tooltip>
<template #default="{ row }">
{{ row.department || '—' }}
</template>
</el-table-column>
<el-table-column prop="sort" label="排序" width="80" align="center" />
<el-table-column label="状态" width="90" align="center">
<template #default="{ row }">
<span class="status-badge" :class="enabledStatusClass(row.status)">
{{ row.status === 1 ? '启用' : '停用' }}
</span>
</template>
</el-table-column>
<el-table-column label="操作" width="160" fixed="right">
<template #default="{ row }">
<div class="table-row-actions">
<el-button class="btn-action-primary" @click="openEdit(row)"></el-button>
<el-button class="btn-action-brand" @click="remove(row)"></el-button>
</div>
</template>
</el-table-column>
</el-table>
<div class="list-pager">
<el-pagination
v-model:current-page="page"
layout="total, prev, pager, next"
:total="meta.total"
@current-change="load"
/>
</div>
</el-card>
<el-dialog v-model="dialog" :title="editing ? '编辑爬虫地址' : '新增爬虫地址'" width="560px">
<el-form label-width="100px">
<el-form-item label="入库类型" required>
<el-radio-group v-model="form.target_type" @change="onFormTargetTypeChange">
<el-radio label="paper">论文</el-radio>
<el-radio label="industry_news">行业资讯</el-radio>
<el-radio label="teacher">老师库</el-radio>
</el-radio-group>
</el-form-item>
<el-form-item label="地址名称" required>
<el-input v-model="form.name" maxlength="128" show-word-limit />
</el-form-item>
<el-form-item label="目标地址" required>
<el-input v-model="form.request_url" type="url" placeholder="https://" />
</el-form-item>
<el-form-item label="关键词">
<el-input
v-model="form.keyword"
type="textarea"
:rows="2"
placeholder="多个关键词用逗号或换行分隔"
/>
</el-form-item>
<el-form-item v-if="showCategoryField" label="资讯分类">
<el-select v-model="form.category_dict_item_id" clearable filterable placeholder="请选择">
<el-option
v-for="opt in newsCategoryOptions"
:key="opt.id"
:label="opt.label"
:value="opt.id"
/>
</el-select>
</el-form-item>
<el-form-item v-if="showUniversityField" label="高校">
<el-select v-model="form.university_id" clearable filterable placeholder="请选择">
<el-option
v-for="opt in universityOptions"
:key="opt.id"
:label="opt.name"
:value="opt.id"
/>
</el-select>
</el-form-item>
<el-form-item v-if="showUniversityField" label="学院">
<el-input v-model="form.department" maxlength="128" placeholder="如:电子信息与电气工程学院" />
</el-form-item>
<el-form-item label="排序">
<el-input-number v-model="form.sort" :min="0" />
</el-form-item>
<el-form-item label="状态">
<el-radio-group v-model="form.status">
<el-radio :label="1">启用</el-radio>
<el-radio :label="0">停用</el-radio>
</el-radio-group>
</el-form-item>
</el-form>
<template #footer>
<el-button @click="dialog = false">取消</el-button>
<el-button type="primary" @click="save"></el-button>
</template>
</el-dialog>
</div>
</template>
<style scoped>
.filter-select {
width: 180px;
}
</style>

@ -140,6 +140,18 @@ async function refreshDirectionOptions() {
: filters.research_directions
}
function mergeTeacherDirectionOptions(detail: {
research_directions?: { id: number; name: string }[]
}) {
const known = new Set(directionOptions.value.map((d) => d.id))
for (const d of detail.research_directions || []) {
if (!known.has(d.id)) {
directionOptions.value.push({ id: d.id, name: d.name })
known.add(d.id)
}
}
}
function openUniversityCreate() {
universityForm.value = {
name: '',
@ -197,6 +209,7 @@ async function load() {
const detail = await fetchTeacher(props.teacherId)
teacher.value = detail
loadedStarId.value = detail.star_level_dict_item_id ?? null
mergeTeacherDirectionOptions(detail)
form.value = {
name: detail.name || '',
university_id: detail.university_id ?? undefined,

Loading…
Cancel
Save