|
|
|
|
@ -47,12 +47,13 @@
|
|
|
|
|
|
|
|
|
|
<view v-if="form.target_type === 'paper' || form.target_type === 'industry_news' || form.target_type === 'teacher'" class="field">
|
|
|
|
|
<text class="form-label">抓取页数</text>
|
|
|
|
|
<input v-model.number="maxPages" class="input" type="number" />
|
|
|
|
|
<input v-model.number="maxPages" class="input" type="number" :disabled="isAiSjtuResearchCenter" />
|
|
|
|
|
<text v-if="form.target_type === 'paper'" class="hint">arXiv 按提交时间倒序,每页约 50 条;增大页数可抓取更早论文</text>
|
|
|
|
|
<text v-if="form.target_type === 'paper'" class="hint">已入库论文自动跳过,不计入条数上限</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'industry_news'" class="hint">虎嗅、投资界、清科等列表页建议 3~5 页;正文将自动补全入库</text>
|
|
|
|
|
<text v-else-if="isAiSjtuResearchCenter" class="hint">交大 AI 研究院研究中心为 API 一次性拉取,无需分页</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'teacher'" class="hint">多页列表(如 Sudy CMS、博山 CMS、交大 tsites)请适当增大页数</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'teacher'" class="hint">大批量抓取时仅部分老师会访问主页补邮箱,避免请求超时</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'teacher' && !isAiSjtuResearchCenter" class="hint">大批量抓取时仅部分老师会访问主页补邮箱,避免请求超时</text>
|
|
|
|
|
</view>
|
|
|
|
|
|
|
|
|
|
<view class="field">
|
|
|
|
|
@ -60,6 +61,7 @@
|
|
|
|
|
<input v-model.number="maxResults" class="input" type="number" />
|
|
|
|
|
<text v-if="form.target_type === 'paper'" class="hint">论文最多 200 条</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'teacher'" class="hint">师资列表最多 500 条</text>
|
|
|
|
|
<text v-if="isAiSjtuResearchCenter" class="hint">将抓取各研究中心「研究团队」成员,含邮箱、电话、研究方向</text>
|
|
|
|
|
<text v-else-if="form.target_type === 'industry_news'" class="hint">资讯最多 50 条;同 URL 已入库将跳过(不重写正文,空正文需先删旧记录再重抓)</text>
|
|
|
|
|
<text v-if="form.target_type === 'industry_news'" class="hint">来源将使用爬虫地址名称(如交大要闻),不会填「通用资讯 HTML」</text>
|
|
|
|
|
</view>
|
|
|
|
|
@ -74,6 +76,9 @@
|
|
|
|
|
<text v-if="lastResult.adapter_code" class="result-line">适配器:{{ formatAdapterLabel(lastResult.adapter_code) }}</text>
|
|
|
|
|
<text v-if="lastResult.result_summary" class="result-line">{{ lastResult.result_summary }}</text>
|
|
|
|
|
<text v-else class="result-line">已入库 {{ lastResult.items_imported ?? 0 }} 条</text>
|
|
|
|
|
<text v-if="lastResult.teacher_duplicates_skipped" class="result-line hint-line">
|
|
|
|
|
跳过 {{ lastResult.teacher_duplicates_skipped }} 位:老师库中已有相同邮箱,或同校同院系同名老师
|
|
|
|
|
</text>
|
|
|
|
|
<text v-if="lastResult.items_fetched" class="result-line">共抓取 {{ lastResult.items_fetched }} 条</text>
|
|
|
|
|
</view>
|
|
|
|
|
</view>
|
|
|
|
|
@ -118,14 +123,20 @@ const ADAPTER_LABELS: Record<string, string> = {
|
|
|
|
|
faculty_list_html: '师资 HTML',
|
|
|
|
|
generic_news_html: '通用资讯',
|
|
|
|
|
arxiv_api: 'arXiv API',
|
|
|
|
|
ai_sjtu_research_center_api: '交大 AI 研究中心',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function formatAdapterLabel(code: string) {
|
|
|
|
|
return ADAPTER_LABELS[code] || code
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const isAiSjtuResearchCenter = computed(
|
|
|
|
|
() => resolvedAdapter.value === 'ai_sjtu_research_center_api',
|
|
|
|
|
)
|
|
|
|
|
const lastResult = ref<Awaited<ReturnType<typeof crawlerApi.submit>> | null>(null)
|
|
|
|
|
const addressOptions = ref<CrawlAddressOption[]>([])
|
|
|
|
|
const addressIndex = ref(0)
|
|
|
|
|
const selectedCrawlAddressId = ref<number | null>(null)
|
|
|
|
|
const crawlDefaults = ref<{
|
|
|
|
|
category_dict_item_id?: number
|
|
|
|
|
category_label?: string
|
|
|
|
|
@ -133,6 +144,7 @@ const crawlDefaults = ref<{
|
|
|
|
|
university_id?: number
|
|
|
|
|
university_name?: string
|
|
|
|
|
department?: string
|
|
|
|
|
adapter_code?: string
|
|
|
|
|
}>({})
|
|
|
|
|
|
|
|
|
|
const addressLabels = computed(() => [
|
|
|
|
|
@ -161,6 +173,9 @@ const selectedAddressHint = computed(() => {
|
|
|
|
|
if (crawlDefaults.value.university_name) {
|
|
|
|
|
parts.push(`默认高校:${crawlDefaults.value.university_name}`)
|
|
|
|
|
}
|
|
|
|
|
if (crawlDefaults.value.adapter_code) {
|
|
|
|
|
parts.push(`采集适配器:${formatAdapterLabel(crawlDefaults.value.adapter_code)}`)
|
|
|
|
|
}
|
|
|
|
|
return parts.length > 0 ? parts.join(';') : ''
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
@ -214,6 +229,7 @@ async function loadAddresses() {
|
|
|
|
|
addressOptions.value = []
|
|
|
|
|
}
|
|
|
|
|
addressIndex.value = 0
|
|
|
|
|
selectedCrawlAddressId.value = null
|
|
|
|
|
crawlDefaults.value = {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -235,6 +251,9 @@ function applyCrawlDefaults(addr: CrawlAddressOption) {
|
|
|
|
|
if (addr.department) {
|
|
|
|
|
crawlDefaults.value.department = addr.department
|
|
|
|
|
}
|
|
|
|
|
if (addr.adapter_code) {
|
|
|
|
|
crawlDefaults.value.adapter_code = addr.adapter_code
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function syncFromCrawlAddress(url: string, options?: { fillKeyword?: boolean }) {
|
|
|
|
|
@ -244,10 +263,12 @@ function syncFromCrawlAddress(url: string, options?: { fillKeyword?: boolean })
|
|
|
|
|
)
|
|
|
|
|
if (!matched) {
|
|
|
|
|
addressIndex.value = 0
|
|
|
|
|
selectedCrawlAddressId.value = null
|
|
|
|
|
crawlDefaults.value = {}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
addressIndex.value = addressOptions.value.indexOf(matched) + 1
|
|
|
|
|
selectedCrawlAddressId.value = matched.id
|
|
|
|
|
if (options?.fillKeyword && matched.keyword) {
|
|
|
|
|
keyword.value = matched.keyword
|
|
|
|
|
}
|
|
|
|
|
@ -291,11 +312,13 @@ function onTypeChange(event: UniHelper.PickerChangeEvent) {
|
|
|
|
|
function onAddressPick(event: UniHelper.PickerChangeEvent) {
|
|
|
|
|
addressIndex.value = Number(event.detail.value)
|
|
|
|
|
if (addressIndex.value <= 0) {
|
|
|
|
|
selectedCrawlAddressId.value = null
|
|
|
|
|
crawlDefaults.value = {}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
const addr = addressOptions.value[addressIndex.value - 1]
|
|
|
|
|
if (!addr) return
|
|
|
|
|
selectedCrawlAddressId.value = addr.id
|
|
|
|
|
form.request_url = addr.request_url
|
|
|
|
|
if (addr.keyword) {
|
|
|
|
|
keyword.value = addr.keyword
|
|
|
|
|
@ -311,6 +334,7 @@ async function onUrlBlur() {
|
|
|
|
|
resolvedAdapter.value = ''
|
|
|
|
|
resolvedUrl.value = ''
|
|
|
|
|
addressIndex.value = 0
|
|
|
|
|
selectedCrawlAddressId.value = null
|
|
|
|
|
crawlDefaults.value = {}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
@ -327,10 +351,15 @@ async function onUrlBlur() {
|
|
|
|
|
const res = await crawlerApi.resolveUrl({
|
|
|
|
|
request_url: normalized,
|
|
|
|
|
target_type: form.target_type,
|
|
|
|
|
crawl_address_id: selectedCrawlAddressId.value ?? undefined,
|
|
|
|
|
})
|
|
|
|
|
resolvedName.value = res.source_name
|
|
|
|
|
resolvedAdapter.value = res.adapter_code || ''
|
|
|
|
|
resolvedUrl.value = normalized
|
|
|
|
|
if (res.adapter_code === 'ai_sjtu_research_center_api') {
|
|
|
|
|
maxPages.value = 1
|
|
|
|
|
maxResults.value = Math.max(maxResults.value, 200)
|
|
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
resolvedName.value = ''
|
|
|
|
|
resolvedAdapter.value = ''
|
|
|
|
|
@ -371,24 +400,24 @@ function buildParams(): Record<string, unknown> {
|
|
|
|
|
return params
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function buildSuccessMessage(result: Awaited<ReturnType<typeof crawlerApi.submit>>): string {
|
|
|
|
|
if (result.result_summary) {
|
|
|
|
|
return result.result_summary
|
|
|
|
|
function buildToastMessage(result: Awaited<ReturnType<typeof crawlerApi.submit>>): string {
|
|
|
|
|
if (result.target_type === 'teacher') {
|
|
|
|
|
const imported = result.items_imported ?? 0
|
|
|
|
|
const skipped = result.teacher_duplicates_skipped ?? 0
|
|
|
|
|
if (skipped > 0) {
|
|
|
|
|
return `已入库${imported}位,跳过${skipped}位`
|
|
|
|
|
}
|
|
|
|
|
return `已入库${imported}位老师`
|
|
|
|
|
}
|
|
|
|
|
if (result.target_type === 'paper') {
|
|
|
|
|
const papers = result.papers_imported ?? result.items_imported ?? 0
|
|
|
|
|
const leads = result.teacher_leads_imported ?? 0
|
|
|
|
|
return `已入库 ${papers} 篇论文、${leads} 位作者`
|
|
|
|
|
return `已入库${papers}篇论文`
|
|
|
|
|
}
|
|
|
|
|
if (result.target_type === 'industry_news') {
|
|
|
|
|
const news = result.items_imported ?? 0
|
|
|
|
|
return `已入库 ${news} 条资讯`
|
|
|
|
|
}
|
|
|
|
|
if (result.target_type === 'teacher') {
|
|
|
|
|
const teachers = result.items_imported ?? 0
|
|
|
|
|
return `已入库 ${teachers} 位老师`
|
|
|
|
|
return `已入库${news}条资讯`
|
|
|
|
|
}
|
|
|
|
|
return `已入库 ${result.items_imported ?? 0} 条`
|
|
|
|
|
return '抓取完成'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveNewsSourceName(url: string): string {
|
|
|
|
|
@ -428,6 +457,9 @@ async function submit() {
|
|
|
|
|
request_url: normalizedUrl,
|
|
|
|
|
params: buildParams(),
|
|
|
|
|
}
|
|
|
|
|
if (selectedCrawlAddressId.value) {
|
|
|
|
|
payload.crawl_address_id = selectedCrawlAddressId.value
|
|
|
|
|
}
|
|
|
|
|
if (form.target_type === 'industry_news') {
|
|
|
|
|
const newsDefaults = buildNewsDefaults(normalizedUrl)
|
|
|
|
|
if (newsDefaults) {
|
|
|
|
|
@ -448,8 +480,9 @@ async function submit() {
|
|
|
|
|
}
|
|
|
|
|
lastResult.value = await crawlerApi.submit(payload)
|
|
|
|
|
uni.showToast({
|
|
|
|
|
title: buildSuccessMessage(lastResult.value),
|
|
|
|
|
icon: 'success',
|
|
|
|
|
title: buildToastMessage(lastResult.value),
|
|
|
|
|
icon: 'none',
|
|
|
|
|
duration: 2500,
|
|
|
|
|
})
|
|
|
|
|
} catch (error) {
|
|
|
|
|
uni.showToast({
|
|
|
|
|
@ -540,6 +573,11 @@ async function submit() {
|
|
|
|
|
font-size: 28rpx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.input[disabled] {
|
|
|
|
|
background: #f3f4f6;
|
|
|
|
|
color: #9ca3af;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.textarea {
|
|
|
|
|
min-height: 240rpx;
|
|
|
|
|
padding: 24rpx;
|
|
|
|
|
@ -590,6 +628,11 @@ async function submit() {
|
|
|
|
|
line-height: 1.5;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.hint-line {
|
|
|
|
|
color: #6b7280;
|
|
|
|
|
font-size: 24rpx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.result-summary {
|
|
|
|
|
margin-top: 8rpx;
|
|
|
|
|
line-height: 1.6;
|
|
|
|
|
|