You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

417 lines
14 KiB

2 days ago
<?php
namespace App\Services\Brief;
use App\Models\News;
use App\Models\Paper;
use App\Models\Teacher;
use App\Models\WeeklyBrief;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Str;
class WeeklyBriefService
{
/** @var list<string> */
protected const LAB_KEYWORDS = ['实验室', '研究院', '人工智能学院', 'PI', '入职', '加入', '团队', '学院', '研究中心'];
/** @var list<string> */
protected const COOP_KEYWORDS = ['校企', '合作', '联合实验室', '联合', '入驻', '签约', '成立'];
/** @var list<string> */
protected const TECH_KEYWORDS = ['突破', '发布', '开源', '模型', '大模型', 'Token', '智能体', '算力', 'DeepSeek', 'Kimi', 'Qwen'];
/** @var list<string> */
protected const RANKING_KEYWORDS = ['排名', 'CSRankings', '软科', 'AIRankings', '学科实力', '上榜', '登顶'];
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolvePreviousWeek(?Carbon $reference = null): array
{
$ref = ($reference ?? now())->copy()->timezone('Asia/Shanghai')->startOfDay();
$lastSunday = $ref->copy()->previous(Carbon::SUNDAY);
if ($ref->isSunday()) {
$lastSunday = $ref->copy();
}
$weekStart = $lastSunday->copy()->startOfWeek(Carbon::MONDAY);
$weekEnd = $lastSunday->copy()->endOfDay();
return [$weekStart, $weekEnd];
}
public function generate(Carbon $weekStart, Carbon $weekEnd, ?int $adminUserId = null, bool $replace = true): WeeklyBrief
{
$weekStart = $weekStart->copy()->timezone('Asia/Shanghai')->startOfDay();
$weekEnd = $weekEnd->copy()->timezone('Asia/Shanghai')->endOfDay();
$papers = $this->fetchPapers($weekStart, $weekEnd);
$newsItems = $this->fetchNews($weekStart, $weekEnd);
$teachers = $this->fetchTeachers($weekStart, $weekEnd);
$groupedNews = $this->groupNews($newsItems);
$labNews = $groupedNews['lab'];
$coopNews = $groupedNews['coop'];
$techNews = $groupedNews['tech'];
$rankingNews = $groupedNews['ranking'];
$otherNews = $groupedNews['other'];
$generatedAt = now()->timezone('Asia/Shanghai');
$titleDate = $this->formatChineseDate($generatedAt);
$periodMonth = $weekStart->format('Y年n月');
$title = "中国高校AI科技成果简报 | {$titleDate}";
$lines = [];
$lines[] = "# {$title}";
$lines[] = '';
$lines[] = '> 本简报汇总'.$periodMonth.'国内重点高校在人工智能领域的最新研究论文、技术突破、校企合作项目及实验室动态。';
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$refIndex = 1;
$references = [];
$lines[] = '## 一、重要AI论文与研究成果';
$lines[] = '';
if ($papers->isEmpty() && $otherNews->isEmpty()) {
$lines[] = '_本周暂无相关论文入库。_';
$lines[] = '';
} else {
$index = 1;
foreach ($papers as $paper) {
$lines = array_merge($lines, $this->renderPaperEntry($index, $paper, $refIndex, $references));
$index++;
}
foreach ($otherNews as $item) {
$lines = array_merge($lines, $this->renderNewsEntry($index, $item, $refIndex, $references, '核心成果'));
$index++;
}
}
$lines[] = '---';
$lines[] = '';
$lines[] = '## 二、高校AI实验室与研究院动态';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($labNews, $teachers, $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 三、校企合作项目与产业落地';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($coopNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 四、国内AI领域突破性技术进展';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($techNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 五、高校排名与学科实力';
$lines[] = '';
$lines = array_merge($lines, $this->renderNewsSection($rankingNews, collect(), $refIndex, $references, 1));
$lines[] = '---';
$lines[] = '';
$lines[] = '## 参考来源汇总';
$lines[] = '';
foreach ($references as $i => $ref) {
$num = $i + 1;
$lines[] = "{$num}. {$ref['label']} {$ref['url']}";
}
if ($references === []) {
$lines[] = '_本周暂无外部来源链接。_';
}
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$lines[] = '*简报生成时间:'.$generatedAt->format('Y年n月j日 H:i').' (Asia/Shanghai)*';
$markdown = implode("\n", $lines);
$stats = [
'papers_count' => $papers->count(),
'news_count' => $newsItems->count(),
'teachers_count' => $teachers->count(),
'references_count' => count($references),
'sections' => [
'papers' => $papers->count(),
'lab' => $labNews->count() + $teachers->count(),
'coop' => $coopNews->count(),
'tech' => $techNews->count(),
'ranking' => $rankingNews->count(),
'other' => $otherNews->count(),
],
];
if ($replace) {
WeeklyBrief::query()
->whereDate('week_start', $weekStart->toDateString())
->whereDate('week_end', $weekEnd->toDateString())
->delete();
}
return WeeklyBrief::query()->create([
'week_start' => $weekStart->toDateString(),
'week_end' => $weekEnd->toDateString(),
'title' => $title,
'markdown' => $markdown,
'stats_json' => $stats,
'admin_user_id' => $adminUserId,
'generated_at' => $generatedAt,
]);
}
/**
* @return Collection<int, Paper>
*/
protected function fetchPapers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Paper::query()
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
->limit(30)
->get();
}
/**
* @return Collection<int, News>
*/
protected function fetchNews(Carbon $weekStart, Carbon $weekEnd): Collection
{
return News::query()
->with('categoryItem')
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
->limit(50)
->get();
}
/**
* @return Collection<int, Teacher>
*/
protected function fetchTeachers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Teacher::query()
->with('university')
->whereBetween('created_at', [$weekStart, $weekEnd])
->where(function ($q) {
$q->whereNotNull('bio')
->orWhereNotNull('department')
->orWhereNotNull('title');
})
->orderByDesc('id')
->limit(20)
->get();
}
/**
* @param Collection<int, News> $newsItems
* @return array{lab: Collection, coop: Collection, tech: Collection, ranking: Collection, other: Collection}
*/
protected function groupNews(Collection $newsItems): array
{
$lab = collect();
$coop = collect();
$tech = collect();
$ranking = collect();
$other = collect();
foreach ($newsItems as $item) {
$text = $item->title.' '.($item->summary ?? '');
if ($this->matchesKeywords($text, self::RANKING_KEYWORDS)) {
$ranking->push($item);
} elseif ($this->matchesKeywords($text, self::COOP_KEYWORDS)) {
$coop->push($item);
} elseif ($this->matchesKeywords($text, self::LAB_KEYWORDS)) {
$lab->push($item);
} elseif ($this->matchesKeywords($text, self::TECH_KEYWORDS)) {
$tech->push($item);
} else {
$other->push($item);
}
}
return compact('lab', 'coop', 'tech', 'ranking', 'other');
}
/**
* @param list<string> $keywords
*/
protected function matchesKeywords(string $text, array $keywords): bool
{
foreach ($keywords as $keyword) {
if ($keyword !== '' && Str::contains($text, $keyword)) {
return true;
}
}
return false;
}
/**
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderPaperEntry(int $index, Paper $paper, int &$refIndex, array &$references): array
{
$school = trim((string) ($paper->school_name ?: '国内高校'));
$headline = "{$school} | {$paper->title}";
$lines = [];
$lines[] = "### {$index}. {$headline}";
if ($paper->published_at) {
$lines[] = '- **发表时间**'.$paper->published_at->format('Y年n月j日');
}
$summary = $this->cleanText($paper->summary);
if ($summary !== '') {
$lines[] = '- **核心成果**'.$this->truncate($summary, 280);
}
if ($paper->authors) {
$lines[] = '- **研究团队**'.$this->truncate($this->cleanText($paper->authors), 120);
}
$source = $this->appendReference($paper->source_site ?: '论文来源', $paper->url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
return $lines;
}
/**
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderNewsEntry(int $index, News $news, int &$refIndex, array &$references, string $summaryLabel = '核心内容'): array
{
$headline = $news->title;
$lines = [];
$lines[] = "### {$index}. {$headline}";
if ($news->published_at) {
$lines[] = '- **时间**'.$news->published_at->timezone('Asia/Shanghai')->format('Y年n月j日');
}
$summary = $this->cleanText($news->summary);
if ($summary !== '') {
$lines[] = "- **{$summaryLabel}**".$this->truncate($summary, 280);
}
if ($news->source) {
$lines[] = '- **来源媒体**'.$this->cleanText($news->source);
}
$label = $news->source_site ?: ($news->source ?: '资讯来源');
$source = $this->appendReference($label, $news->source_url ?: $news->cover_url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
return $lines;
}
/**
* @param Collection<int, News> $newsItems
* @param Collection<int, Teacher> $teachers
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
protected function renderNewsSection(Collection $newsItems, Collection $teachers, int &$refIndex, array &$references, int $startIndex): array
{
$lines = [];
$index = $startIndex;
if ($newsItems->isEmpty() && $teachers->isEmpty()) {
$lines[] = '_本周暂无相关动态。_';
$lines[] = '';
return $lines;
}
foreach ($newsItems as $item) {
$lines = array_merge($lines, $this->renderNewsEntry($index, $item, $refIndex, $references));
$index++;
}
foreach ($teachers as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$lines[] = "### {$index}. {$uni} | {$teacher->name}";
if ($teacher->title) {
$lines[] = '- **职称**'.$this->cleanText($teacher->title);
}
if ($teacher->department) {
$lines[] = '- **院系**'.$this->cleanText($teacher->department);
}
if ($teacher->bio) {
$lines[] = '- **简介**'.$this->truncate($this->cleanText($teacher->bio), 220);
}
$lines[] = '';
$index++;
}
return $lines;
}
/**
* @param list<array{label: string, url: string}> $references
*/
protected function appendReference(string $label, ?string $url, int &$refIndex, array &$references): string
{
$label = $this->cleanText($label) ?: '来源';
$url = trim((string) $url);
if ($url === '' || ! preg_match('#^https?://#i', $url)) {
return $label;
}
foreach ($references as $ref) {
if ($ref['url'] === $url) {
return "[{$label}]({$url})";
}
}
$references[] = ['label' => $label.' - '.$this->briefTitleFromUrl($url), 'url' => $url];
$refIndex++;
return "[{$label}]({$url})";
}
protected function briefTitleFromUrl(string $url): string
{
$host = parse_url($url, PHP_URL_HOST) ?: $url;
return Str::limit($host, 48, '');
}
protected function formatChineseDate(Carbon $date): string
{
return $date->format('Y年n月j日');
}
protected function cleanText(?string $text): string
{
if ($text === null || trim($text) === '') {
return '';
}
return trim(preg_replace('/\s+/u', ' ', strip_tags($text)) ?? '');
}
protected function truncate(string $text, int $limit): string
{
if (mb_strlen($text) <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit - 1).'…';
}
}