You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

665 lines
24 KiB

1 week ago
<?php
namespace App\Services\Brief;
use App\Models\News;
use App\Models\Paper;
use App\Models\Teacher;
use App\Models\WeeklyBrief;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Str;
class WeeklyBriefService
{
4 days ago
/** @var array<string, string> */
protected const TRACKS = [
'physics' => 'Physics',
'math' => 'Mathematics',
'cs' => 'Computer Science',
'eess' => 'Electrical Engineering and Systems Science',
];
1 week ago
/** @var list<string> */
4 days ago
protected const HIGH_VALUE_KEYWORDS = [
'产业化', '量产', '中试', '小试', '落地', '商用', '专利', '转化', '示范', '试点',
'prototype', 'commercial', 'deployment', 'industrial', 'pilot', 'scale-up',
];
1 week ago
/** @var list<string> */
4 days ago
protected const COOP_KEYWORDS = [
'校企', '产学研', '联合研究', '联合实验室', '产业资本', '投资', '融资', '合作',
'签约', '落地', '试点', '示范', '转化', '专利', '孵化',
];
1 week ago
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolvePreviousWeek(?Carbon $reference = null): array
4 days ago
{
return $this->resolveWeekByOffset(1, $reference);
}
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolveWeekByOffset(int $offset = 0, ?Carbon $reference = null): array
1 week ago
{
$ref = ($reference ?? now())->copy()->timezone('Asia/Shanghai')->startOfDay();
4 days ago
$weekStart = $ref->copy()->startOfWeek(Carbon::MONDAY)->subWeeks($offset);
if ($offset === 0) {
$weekEnd = $ref->copy()->endOfDay();
} else {
$weekEnd = $weekStart->copy()->endOfWeek(Carbon::SUNDAY)->endOfDay();
1 week ago
}
return [$weekStart, $weekEnd];
}
4 days ago
/**
* @return list<array{offset:int,label:string,week_start:string,week_end:string}>
*/
public function weekOptions(int $count = 8, ?Carbon $reference = null): array
{
$options = [];
for ($offset = 0; $offset < $count; $offset++) {
[$start, $end] = $this->resolveWeekByOffset($offset, $reference);
$options[] = [
'offset' => $offset,
'label' => $offset === 0 ? '本周(截至今日)' : ($offset === 1 ? '上周' : "前{$offset}周"),
'week_start' => $start->toDateString(),
'week_end' => $end->toDateString(),
];
}
return $options;
}
1 week ago
public function generate(Carbon $weekStart, Carbon $weekEnd, ?int $adminUserId = null, bool $replace = true): WeeklyBrief
{
$weekStart = $weekStart->copy()->timezone('Asia/Shanghai')->startOfDay();
$weekEnd = $weekEnd->copy()->timezone('Asia/Shanghai')->endOfDay();
$papers = $this->fetchPapers($weekStart, $weekEnd);
$newsItems = $this->fetchNews($weekStart, $weekEnd);
$teachers = $this->fetchTeachers($weekStart, $weekEnd);
4 days ago
$scoredPapers = $papers->map(fn (Paper $paper) => [
'paper' => $paper,
'track' => $this->resolvePaperTrack($paper),
'score' => $this->scorePaper($paper),
'maturity' => $this->assessMaturity($paper),
])->sortByDesc('score')->values();
$trackCounts = $scoredPapers->groupBy('track')->map->count();
$highValuePapers = $scoredPapers->filter(fn ($row) => $row['score'] >= 60)->values();
$topTracks = $trackCounts->sortDesc()->take(3)->keys()->all();
1 week ago
$generatedAt = now()->timezone('Asia/Shanghai');
$titleDate = $this->formatChineseDate($generatedAt);
4 days ago
$periodLabel = $weekStart->format('Y年n月j日').' - '.$weekEnd->format('Y年n月j日');
1 week ago
4 days ago
$title = "高校科技成果周报 | {$titleDate}";
1 week ago
$lines = [];
$lines[] = "# {$title}";
$lines[] = '';
4 days ago
$lines[] = "> 统计周期:{$periodLabel}Asia/Shanghai";
1 week ago
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$refIndex = 1;
$references = [];
4 days ago
$lines[] = '## 一、总览';
1 week ago
$lines[] = '';
4 days ago
$lines[] = '### 1、赛道热度拐点';
$lines[] = '';
if ($topTracks === []) {
$lines[] = '_本周暂无足够论文数据识别赛道拐点。_';
1 week ago
} else {
4 days ago
foreach ($topTracks as $trackKey) {
$label = self::TRACKS[$trackKey] ?? $trackKey;
$count = (int) ($trackCounts[$trackKey] ?? 0);
$lines[] = "- **{$label}**:本周入库 {$count} 篇,技术突破相对集中";
1 week ago
}
4 days ago
}
$lines[] = '';
$lines[] = '### 2、核心突破';
$lines[] = '';
$coreBreakthroughs = $highValuePapers->take(5);
if ($coreBreakthroughs->isEmpty()) {
$lines[] = '_本周暂无可落地、可商业化的关键技术进展。_';
} else {
foreach ($coreBreakthroughs as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$lines[] = '- **'.$this->truncate($paper->title, 120).'**'.$trackLabel.''.$this->truncate($this->extractInnovation($paper), 160);
}
}
$lines[] = '';
$lines[] = '### 3、项目机会';
$lines[] = '';
$projectHints = $this->buildProjectOpportunities($scoredPapers, $teachers);
if ($projectHints === []) {
$lines[] = '_本周暂无新增值得跟踪的高校创业团队或可转化实验室成果。_';
} else {
foreach ($projectHints as $hint) {
$lines[] = '- '.$hint;
1 week ago
}
}
4 days ago
$lines[] = '';
$lines[] = '### 4、数据总结';
$lines[] = '';
$total = $papers->count();
$highValueCount = $highValuePapers->count();
$lines[] = "- 本周有效检索论文总量:**{$total}** 篇";
$lines[] = "- 高价值产业级论文数量:**{$highValueCount}** 篇";
$lines[] = '- 各赛道分布占比:'.$this->formatTrackDistribution($trackCounts, $total);
$lines[] = '';
1 week ago
$lines[] = '---';
$lines[] = '';
4 days ago
$lines[] = '## 二、技术突破盘点';
1 week ago
$lines[] = '';
4 days ago
foreach (self::TRACKS as $trackKey => $trackLabel) {
$trackPapers = $scoredPapers->where('track', $trackKey)->take(5)->values();
$lines[] = "### {$trackLabel}";
$lines[] = '';
if ($trackPapers->isEmpty()) {
$lines[] = '_本周该赛道暂无论文入库。_';
$lines[] = '';
continue;
}
$lines[] = '**1、技术突破**';
$lines[] = '';
$idx = 1;
foreach ($trackPapers as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$lines[] = "{$idx}. **{$paper->title}**";
$lines[] = ' - 核心创新点:'.$this->extractInnovation($paper);
if ($paper->authors) {
$lines[] = ' - 作者团队:'.$this->truncate($this->cleanText($paper->authors), 100);
}
$source = $this->appendReference($paper->source_site ?: '论文来源', $paper->url, $refIndex, $references);
$lines[] = ' - 来源:'.$source;
$lines[] = '';
$idx++;
}
$lines[] = '**2、产业化成熟度判定**';
$lines[] = '';
$maturityGroups = $trackPapers->groupBy('maturity');
foreach (['可直接量产落地', '中试优化', '小试可行', '实验室原理验证'] as $level) {
$count = $maturityGroups->get($level)?->count() ?? 0;
if ($count > 0) {
$lines[] = "- {$level}{$count} 篇";
}
}
$lines[] = '';
$lines[] = '**3、商业价值**';
$lines[] = '';
$sample = $trackPapers->first();
/** @var Paper|null $samplePaper */
$samplePaper = $sample['paper'] ?? null;
if ($samplePaper) {
$lines[] = $this->assessCommercialValue($samplePaper);
} else {
$lines[] = '_待进一步评估。_';
}
$lines[] = '';
}
1 week ago
$lines[] = '---';
$lines[] = '';
4 days ago
$lines[] = '## 三、产业级重磅论文';
$lines[] = '';
$lines[] = '筛选本周具备投资影响力的核心论文前 10 篇:';
1 week ago
$lines[] = '';
4 days ago
$topTen = $scoredPapers->take(10);
if ($topTen->isEmpty()) {
$lines[] = '_本周暂无符合条件的论文。_';
} else {
$index = 1;
foreach ($topTen as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$lines[] = "### {$index}. {$paper->title}";
$lines[] = '- **发表期刊/平台**'.($paper->source_site ?: 'arXiv / 预印本');
$lines[] = '- **作者团队**'.($this->cleanText($paper->authors) ?: '待补充');
$lines[] = '- **所属高校/实验室**'.($paper->school_name ?: '待关联');
if ($paper->published_at) {
$lines[] = '- **发表时间**'.$paper->published_at->format('Y年n月j日');
}
$lines[] = '- **所属赛道**'.$trackLabel;
$lines[] = '- **可转化落地可能性**'.$this->assessConversionPotential($row['score'], $row['maturity']);
$source = $this->appendReference('论文链接', $paper->url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
$index++;
}
}
1 week ago
$lines[] = '---';
$lines[] = '';
4 days ago
$lines[] = '## 四、潜在项目挖掘';
1 week ago
$lines[] = '';
4 days ago
$lines = array_merge($lines, $this->renderPotentialProjects($scoredPapers, $teachers));
1 week ago
$lines[] = '---';
$lines[] = '';
4 days ago
$lines[] = '## 五、专利&产学研联动动态';
1 week ago
$lines[] = '';
4 days ago
$coopNews = $newsItems->filter(fn (News $n) => $this->matchesKeywords($n->title.' '.($n->summary ?? ''), self::COOP_KEYWORDS));
if ($coopNews->isEmpty()) {
$lines[] = '_本周暂无产学研合作或技术转化公开动态。_';
} else {
$index = 1;
foreach ($coopNews->take(12) as $news) {
$lines = array_merge($lines, $this->renderCoopNewsEntry($index, $news, $refIndex, $references));
$index++;
}
}
1 week ago
$lines[] = '---';
$lines[] = '';
$lines[] = '## 参考来源汇总';
$lines[] = '';
foreach ($references as $i => $ref) {
$num = $i + 1;
$lines[] = "{$num}. {$ref['label']} {$ref['url']}";
}
if ($references === []) {
$lines[] = '_本周暂无外部来源链接。_';
}
$lines[] = '';
$lines[] = '---';
$lines[] = '';
$lines[] = '*简报生成时间:'.$generatedAt->format('Y年n月j日 H:i').' (Asia/Shanghai)*';
$markdown = implode("\n", $lines);
$stats = [
'papers_count' => $papers->count(),
4 days ago
'high_value_papers_count' => $highValueCount,
1 week ago
'news_count' => $newsItems->count(),
'teachers_count' => $teachers->count(),
'references_count' => count($references),
4 days ago
'track_distribution' => $trackCounts->all(),
1 week ago
'sections' => [
4 days ago
'overview' => 1,
'breakthrough' => $scoredPapers->count(),
'top_papers' => $topTen->count(),
'projects' => min(8, $teachers->count() + $scoredPapers->take(5)->count()),
1 week ago
'coop' => $coopNews->count(),
],
];
if ($replace) {
WeeklyBrief::query()
->whereDate('week_start', $weekStart->toDateString())
->whereDate('week_end', $weekEnd->toDateString())
->delete();
}
return WeeklyBrief::query()->create([
'week_start' => $weekStart->toDateString(),
'week_end' => $weekEnd->toDateString(),
'title' => $title,
'markdown' => $markdown,
'stats_json' => $stats,
'admin_user_id' => $adminUserId,
'generated_at' => $generatedAt,
]);
}
/**
* @return Collection<int, Paper>
*/
protected function fetchPapers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Paper::query()
4 days ago
->with('crawlJob')
1 week ago
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
4 days ago
->limit(200)
1 week ago
->get();
}
/**
* @return Collection<int, News>
*/
protected function fetchNews(Carbon $weekStart, Carbon $weekEnd): Collection
{
return News::query()
->with('categoryItem')
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('published_at')
->orderByDesc('id')
4 days ago
->limit(80)
1 week ago
->get();
}
/**
* @return Collection<int, Teacher>
*/
protected function fetchTeachers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Teacher::query()
4 days ago
->with(['university', 'researchDirections'])
1 week ago
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('id')
4 days ago
->limit(30)
1 week ago
->get();
}
4 days ago
protected function resolvePaperTrack(Paper $paper): string
{
$url = strtolower((string) ($paper->crawlJob?->request_url ?? ''));
foreach (array_keys(self::TRACKS) as $track) {
if (preg_match('#/list/'.$track.'(?:/|$|[?&#])#', $url)) {
return $track;
}
if (preg_match('#categories?/'.$track.'#', $url)) {
return $track;
}
}
$haystack = strtolower($paper->title.' '.($paper->summary ?? '').' '.($paper->url ?? ''));
if (str_contains($haystack, 'physics') || str_contains($haystack, 'quantum')) {
return 'physics';
}
if (str_contains($haystack, 'math')) {
return 'math';
}
if (str_contains($haystack, 'eess') || str_contains($haystack, 'electrical')) {
return 'eess';
}
return 'cs';
}
protected function scorePaper(Paper $paper): int
{
$score = 40;
$text = $paper->title.' '.($paper->summary ?? '');
if ($paper->school_name) {
$score += 10;
}
if ($paper->authors) {
$score += 8;
}
if ($paper->published_at) {
$score += 5;
}
if ($this->matchesKeywords($text, self::HIGH_VALUE_KEYWORDS)) {
$score += 20;
}
if (mb_strlen($this->cleanText($paper->summary)) >= 120) {
$score += 7;
}
return min(100, $score);
}
protected function assessMaturity(Paper $paper): string
{
$text = $paper->title.' '.($paper->summary ?? '');
if ($this->matchesKeywords($text, ['量产', '部署', '商用', '落地应用', 'production', 'deployed'])) {
return '可直接量产落地';
}
if ($this->matchesKeywords($text, ['中试', 'pilot plant', 'scale-up', '示范线'])) {
return '中试优化';
}
if ($this->matchesKeywords($text, ['小试', 'prototype', '样机', '验证平台'])) {
return '小试可行';
}
return '实验室原理验证';
}
protected function extractInnovation(Paper $paper): string
{
$summary = $this->cleanText($paper->summary);
if ($summary !== '') {
return $this->truncate($summary, 220);
}
return '围绕「'.$this->truncate($paper->title, 80).'」提出方法或系统层面的创新,具备进一步工程化验证价值。';
}
protected function assessCommercialValue(Paper $paper): string
{
$text = $this->cleanText($paper->summary);
if ($text === '') {
return '- 降本/提效空间与下游应用场景需结合实验数据进一步量化;建议关注头部团队复现与工程化进度。';
}
return '- 降本提效:'.$this->truncate($text, 120)."\n".
'- 替代现有方案:需对照行业 baseline 评估性能/成本拐点'."\n".
'- 下游场景:'.($paper->school_name ? $paper->school_name.'相关方向' : '待结合赛道进一步拆解');
}
protected function assessConversionPotential(int $score, string $maturity): string
{
if ($score >= 75 && in_array($maturity, ['可直接量产落地', '中试优化'], true)) {
return '高(接近工程化/产业化窗口)';
}
if ($score >= 60) {
return '中(具备跟踪价值,需验证复现与场景)';
}
return '低(偏基础研究,可作为技术雷达储备)';
}
1 week ago
/**
4 days ago
* @param Collection<string, int> $trackCounts
1 week ago
*/
4 days ago
protected function formatTrackDistribution(Collection $trackCounts, int $total): string
1 week ago
{
4 days ago
if ($total <= 0) {
return '暂无数据';
1 week ago
}
4 days ago
$parts = [];
foreach (self::TRACKS as $key => $label) {
$count = (int) ($trackCounts[$key] ?? 0);
$percent = (int) round($count / $total * 100);
$parts[] = "{$label} {$percent}%{$count}篇)";
}
return implode('', $parts);
1 week ago
}
/**
4 days ago
* @param Collection<int, array{paper: Paper, track: string, score: int, maturity: string}> $scoredPapers
* @param Collection<int, Teacher> $teachers
* @return list<string>
1 week ago
*/
4 days ago
protected function buildProjectOpportunities(Collection $scoredPapers, Collection $teachers): array
1 week ago
{
4 days ago
$hints = [];
foreach ($teachers->take(5) as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$dirs = $teacher->researchDirections->pluck('name')->join('、') ?: '细分方向待补充';
$hints[] = "新增团队:**{$uni} · {$teacher->name}**{$dirs}),建议纳入长期跟踪池";
}
foreach ($scoredPapers->take(3) as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
if ($row['score'] < 55) {
continue;
1 week ago
}
4 days ago
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$hints[] = "可转化成果:**{$paper->title}**{$trackLabel}),成熟度:{$row['maturity']}";
1 week ago
}
4 days ago
return $hints;
1 week ago
}
/**
4 days ago
* @param Collection<int, array{paper: Paper, track: string, score: int, maturity: string}> $scoredPapers
* @param Collection<int, Teacher> $teachers
1 week ago
* @return list<string>
*/
4 days ago
protected function renderPotentialProjects(Collection $scoredPapers, Collection $teachers): array
1 week ago
{
$lines = [];
4 days ago
if ($teachers->isEmpty() && $scoredPapers->isEmpty()) {
$lines[] = '_本周暂无高辨识度核心团队或高价值成果。_';
$lines[] = '';
1 week ago
4 days ago
return $lines;
1 week ago
}
4 days ago
$index = 1;
foreach ($teachers->take(8) as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$dirs = $teacher->researchDirections->pluck('name')->join('、') ?: '待补充';
$lines[] = "### {$index}. {$uni} · {$teacher->name}";
$lines[] = '- **团队背景**'.($teacher->bio ? $this->truncate($this->cleanText($teacher->bio), 180) : ($teacher->title ?: '职称/履历待补充'));
$lines[] = '- **技术稀缺性**:长期深耕「'.$dirs.'」,可对比行业现有方案评估差异化';
$lines[] = '- **技术进度**'.$teacher->created_at?->format('Y-m-d').' 入库,建议结合论文/专利进一步核验';
$lines[] = '';
$index++;
1 week ago
}
4 days ago
foreach ($scoredPapers->take(5) as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$lines[] = "### {$index}. {$paper->title}";
$lines[] = '- **团队背景**'.($this->cleanText($paper->authors) ?: '作者信息待补充');
$lines[] = '- **技术稀缺性**'.self::TRACKS[$row['track']].' 赛道,'.$this->assessConversionPotential($row['score'], $row['maturity']);
$lines[] = '- **技术进度**'.$row['maturity'];
$lines[] = '';
$index++;
1 week ago
}
return $lines;
}
/**
* @param list<array{label: string, url: string}> $references
* @return list<string>
*/
4 days ago
protected function renderCoopNewsEntry(int $index, News $news, int &$refIndex, array &$references): array
1 week ago
{
$lines = [];
4 days ago
$lines[] = "### {$index}. {$news->title}";
1 week ago
$summary = $this->cleanText($news->summary);
if ($summary !== '') {
4 days ago
$lines[] = '- **合作/转化进展**'.$this->truncate($summary, 240);
1 week ago
}
4 days ago
$text = $news->title.' '.$summary;
$flags = [];
if ($this->matchesKeywords($text, ['头部企业', '联合研究', '联合实验室'])) {
$flags[] = '与头部企业联合研究';
}
if ($this->matchesKeywords($text, ['投资', '融资', '产业资本', '基金'])) {
$flags[] = '产业资本介入';
}
if ($this->matchesKeywords($text, ['试点', '示范', '落地', '应用'])) {
$flags[] = '落地试点应用';
}
if ($flags !== []) {
$lines[] = '- **产学研信号**'.implode('', $flags);
}
if ($news->published_at) {
$lines[] = '- **时间**'.$news->published_at->timezone('Asia/Shanghai')->format('Y年n月j日');
1 week ago
}
$label = $news->source_site ?: ($news->source ?: '资讯来源');
$source = $this->appendReference($label, $news->source_url ?: $news->cover_url, $refIndex, $references);
$lines[] = '- **来源**'.$source;
$lines[] = '';
return $lines;
}
/**
4 days ago
* @param list<string> $keywords
1 week ago
*/
4 days ago
protected function matchesKeywords(string $text, array $keywords): bool
1 week ago
{
4 days ago
foreach ($keywords as $keyword) {
if ($keyword !== '' && Str::contains($text, $keyword)) {
return true;
1 week ago
}
}
4 days ago
return false;
1 week ago
}
/**
* @param list<array{label: string, url: string}> $references
*/
protected function appendReference(string $label, ?string $url, int &$refIndex, array &$references): string
{
$label = $this->cleanText($label) ?: '来源';
$url = trim((string) $url);
if ($url === '' || ! preg_match('#^https?://#i', $url)) {
return $label;
}
foreach ($references as $ref) {
if ($ref['url'] === $url) {
return "[{$label}]({$url})";
}
}
$references[] = ['label' => $label.' - '.$this->briefTitleFromUrl($url), 'url' => $url];
$refIndex++;
return "[{$label}]({$url})";
}
protected function briefTitleFromUrl(string $url): string
{
$host = parse_url($url, PHP_URL_HOST) ?: $url;
return Str::limit($host, 48, '');
}
protected function formatChineseDate(Carbon $date): string
{
return $date->format('Y年n月j日');
}
protected function cleanText(?string $text): string
{
if ($text === null || trim($text) === '') {
return '';
}
return trim(preg_replace('/\s+/u', ' ', strip_tags($text)) ?? '');
}
protected function truncate(string $text, int $limit): string
{
if (mb_strlen($text) <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit - 1).'…';
}
}