You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

417 lines
14 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
namespace App\Services\Brief;
use App\Models\News;
use App\Models\Paper;
use App\Models\Teacher;
use App\Models\WeeklyBrief;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Str;
class WeeklyBriefService
{
/** @var list<string> */
protected const LAB_KEYWORDS = ['实验室', '研究院', '人工智能学院', 'PI', '入职', '加入', '团队', '学院', '研究中心'];
/** @var list<string> */
protected const COOP_KEYWORDS = ['校企', '合作', '联合实验室', '联合', '入驻', '签约', '成立'];
/** @var list<string> */
protected const TECH_KEYWORDS = ['突破', '发布', '开源', '模型', '大模型', 'Token', '智能体', '算力', 'DeepSeek', 'Kimi', 'Qwen'];
/** @var list<string> */
protected const RANKING_KEYWORDS = ['排名', 'CSRankings', '软科', 'AIRankings', '学科实力', '上榜', '登顶'];
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolvePreviousWeek(?Carbon $reference = null): array
{
$ref = ($reference ?? now())->copy()->timezone('Asia/Shanghai')->startOfDay();
$lastSunday = $ref->copy()->previous(Carbon::SUNDAY);
if ($ref->isSunday()) {
$lastSunday = $ref->copy();
}
$weekStart = $lastSunday->copy()->startOfWeek(Carbon::MONDAY);
$weekEnd = $lastSunday->copy()->endOfDay();
return [$weekStart, $weekEnd];
}
public function generate(Carbon $weekStart, Carbon $weekEnd, ?int $adminUserId = null, bool $replace = true): WeeklyBrief
{
$weekStart = $weekStart->copy()->timezone('Asia/Shanghai')->startOfDay();
$weekEnd = $weekEnd->copy()->timezone('Asia/Shanghai')->endOfDay();
$papers = $this->fetchPapers($weekStart, $weekEnd);
$newsItems = $this->fetchNews($weekStart, $weekEnd);
$teachers = $this->fetchTeachers($weekStart, $weekEnd);
$groupedNews = $this->groupNews($newsItems);
$labNews = $groupedNews['lab'];
$coopNews = $groupedNews['coop'];
$techNews = $groupedNews['tech'];
$rankingNews = $groupedNews['ranking'];
$otherNews = $groupedNews['other'];
$generatedAt = now()->timezone('Asia/Shanghai');
$titleDate = $this->formatChineseDate($generatedAt);
$periodMonth = $weekStart->format('Y年n月');
$title = "中国高校AI科技成果简报 | {$titleDate}";
$lines = [];
$lines[] = "# {$title}";
$lines[] = '';
$lines[] = '> 本简报汇总'.$periodMonth.'国内重点高校在人工智能领域的最新研究论文、技术突破、校企合作项目及实验室动态。';
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$refIndex = 1;
$references = [];
$lines[] = '## 一、重要AI论文与研究成果';
$lines[] = '';
if ($papers->isEmpty() && $otherNews->isEmpty()) {
$lines[] = '_本周暂无相关论文入库。_';
$lines[] = '';
} else {
$index = 1;
foreach ($papers as $paper) {
$lines = array_merge($lines, $this->renderPaperEntry($index, $paper, $refIndex, $references));
$index++;
}
foreach ($otherNews as $item) {
$lines = array_merge($lines, $this->renderNewsEntry($index, $item, $refIndex, $references, '核心成果'));
$index++;
}
}
$lines[] = '---';
$lines[] = '';
$lines[] = '## 二、高校AI实验室与研究院动态';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($labNews, $teachers, $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 三、校企合作项目与产业落地';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($coopNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 四、国内AI领域突破性技术进展';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($techNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 五、高校排名与学科实力';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($rankingNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 参考来源汇总';
$lines[] = '';
foreach ($references as $i => $ref) {
$num = $i + 1;
$lines[] = "{$num}. {$ref['label']} {$ref['url']}";
}
if ($references === []) {
$lines[] = '_本周暂无外部来源链接。_';
}
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$lines[] = '*简报生成时间:'.$generatedAt->format('Y年n月j日 H:i').' (Asia/Shanghai)*';
$markdown = implode("\n", $lines);
$stats = [
'papers_count' => $papers->count(),
'news_count' => $newsItems->count(),
'teachers_count' => $teachers->count(),
'references_count' => count($references),
'sections' => [
'papers' => $papers->count(),
'lab' => $labNews->count() + $teachers->count(),
'coop' => $coopNews->count(),
'tech' => $techNews->count(),
'ranking' => $rankingNews->count(),
'other' => $otherNews->count(),
],
];
if ($replace) {
WeeklyBrief::query()
->whereDate('week_start', $weekStart->toDateString())
->whereDate('week_end', $weekEnd->toDateString())
->delete();
}
return WeeklyBrief::query()->create([
'week_start' => $weekStart->toDateString(),
'week_end' => $weekEnd->toDateString(),
'title' => $title,
'markdown' => $markdown,
'stats_json' => $stats,
'admin_user_id' => $adminUserId,
'generated_at' => $generatedAt,
]);
}
/**
* @return Collection<int, Paper>
*/
protected function fetchPapers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Paper::query()
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
->limit(30)
->get();
}
/**
* @return Collection<int, News>
*/
protected function fetchNews(Carbon $weekStart, Carbon $weekEnd): Collection
{
return News::query()
->with('categoryItem')
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
->limit(50)
->get();
}
/**
* @return Collection<int, Teacher>
*/
protected function fetchTeachers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Teacher::query()
->with('university')
->whereBetween('created_at', [$weekStart, $weekEnd])
->where(function ($q) {
$q->whereNotNull('bio')
->orWhereNotNull('department')
->orWhereNotNull('title');
})
->orderByDesc('id')
->limit(20)
->get();
}
/**
* @param Collection<int, News> $newsItems
* @return array{lab: Collection, coop: Collection, tech: Collection, ranking: Collection, other: Collection}
*/
protected function groupNews(Collection $newsItems): array
{
$lab = collect();
$coop = collect();
$tech = collect();
$ranking = collect();
$other = collect();
foreach ($newsItems as $item) {
$text = $item->title.' '.($item->summary ?? '');
if ($this->matchesKeywords($text, self::RANKING_KEYWORDS)) {
$ranking->push($item);
} elseif ($this->matchesKeywords($text, self::COOP_KEYWORDS)) {
$coop->push($item);
} elseif ($this->matchesKeywords($text, self::LAB_KEYWORDS)) {
$lab->push($item);
} elseif ($this->matchesKeywords($text, self::TECH_KEYWORDS)) {
$tech->push($item);
} else {
$other->push($item);
}
}
return compact('lab', 'coop', 'tech', 'ranking', 'other');
}
/**
* @param list<string> $keywords
*/
protected function matchesKeywords(string $text, array $keywords): bool
{
foreach ($keywords as $keyword) {
if ($keyword !== '' && Str::contains($text, $keyword)) {
return true;
}
}
return false;
}
/**
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderPaperEntry(int $index, Paper $paper, int &$refIndex, array &$references): array
{
$school = trim((string) ($paper->school_name ?: '国内高校'));
$headline = "{$school} | {$paper->title}";
$lines = [];
$lines[] = "### {$index}. {$headline}";
if ($paper->published_at) {
$lines[] = '- **发表时间**'.$paper->published_at->format('Y年n月j日');
}
$summary = $this->cleanText($paper->summary);
if ($summary !== '') {
$lines[] = '- **核心成果**'.$this->truncate($summary, 280);
}
if ($paper->authors) {
$lines[] = '- **研究团队**'.$this->truncate($this->cleanText($paper->authors), 120);
}
$source = $this->appendReference($paper->source_site ?: '论文来源', $paper->url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
return $lines;
}
/**
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderNewsEntry(int $index, News $news, int &$refIndex, array &$references, string $summaryLabel = '核心内容'): array
{
$headline = $news->title;
$lines = [];
$lines[] = "### {$index}. {$headline}";
if ($news->published_at) {
$lines[] = '- **时间**'.$news->published_at->timezone('Asia/Shanghai')->format('Y年n月j日');
}
$summary = $this->cleanText($news->summary);
if ($summary !== '') {
$lines[] = "- **{$summaryLabel}**".$this->truncate($summary, 280);
}
if ($news->source) {
$lines[] = '- **来源媒体**'.$this->cleanText($news->source);
}
$label = $news->source_site ?: ($news->source ?: '资讯来源');
$source = $this->appendReference($label, $news->source_url ?: $news->cover_url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
return $lines;
}
/**
* @param Collection<int, News> $newsItems
* @param Collection<int, Teacher> $teachers
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderNewsSection(Collection $newsItems, Collection $teachers, int &$refIndex, array &$references, int $startIndex): array
{
$lines = [];
$index = $startIndex;
if ($newsItems->isEmpty() && $teachers->isEmpty()) {
$lines[] = '_本周暂无相关动态。_';
$lines[] = '';
return $lines;
}
foreach ($newsItems as $item) {
$lines = array_merge($lines, $this->renderNewsEntry($index, $item, $refIndex, $references));
$index++;
}
foreach ($teachers as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$lines[] = "### {$index}. {$uni} | {$teacher->name}";
if ($teacher->title) {
$lines[] = '- **职称**'.$this->cleanText($teacher->title);
}
if ($teacher->department) {
$lines[] = '- **院系**'.$this->cleanText($teacher->department);
}
if ($teacher->bio) {
$lines[] = '- **简介**'.$this->truncate($this->cleanText($teacher->bio), 220);
}
$lines[] = '';
$index++;
}
return $lines;
}
/**
* @param list<array{label: string, url: string}> $references
*/
protected function appendReference(string $label, ?string $url, int &$refIndex, array &$references): string
{
$label = $this->cleanText($label) ?: '来源';
$url = trim((string) $url);
if ($url === '' || ! preg_match('#^https?://#i', $url)) {
return $label;
}
foreach ($references as $ref) {
if ($ref['url'] === $url) {
return "[{$label}]({$url})";
}
}
$references[] = ['label' => $label.' - '.$this->briefTitleFromUrl($url), 'url' => $url];
$refIndex++;
return "[{$label}]({$url})";
}
protected function briefTitleFromUrl(string $url): string
{
$host = parse_url($url, PHP_URL_HOST) ?: $url;
return Str::limit($host, 48, '');
}
protected function formatChineseDate(Carbon $date): string
{
return $date->format('Y年n月j日');
}
protected function cleanText(?string $text): string
{
if ($text === null || trim($text) === '') {
return '';
}
return trim(preg_replace('/\s+/u', ' ', strip_tags($text)) ?? '');
}
protected function truncate(string $text, int $limit): string
{
if (mb_strlen($text) <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit - 1).'…';
}
}