You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

694 lines
26 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
namespace App\Services\Brief;
use App\Models\News;
use App\Models\Paper;
use App\Models\Teacher;
use App\Models\WeeklyBrief;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
class WeeklyBriefService
{
/** @var array<string, string> */
protected const TRACKS = [
'physics' => 'Physics',
'math' => 'Mathematics',
'cs' => 'Computer Science',
'eess' => 'Electrical Engineering and Systems Science',
];
/** @var list<string> */
protected const HIGH_VALUE_KEYWORDS = [
'产业化', '量产', '中试', '小试', '落地', '商用', '专利', '转化', '示范', '试点',
'prototype', 'commercial', 'deployment', 'industrial', 'pilot', 'scale-up',
];
/** @var list<string> */
protected const COOP_KEYWORDS = [
'校企', '产学研', '联合研究', '联合实验室', '产业资本', '投资', '融资', '合作',
'签约', '落地', '试点', '示范', '转化', '专利', '孵化',
];
public function __construct(
protected ?WeeklyBriefDocxWriter $docxWriter = null,
) {
$this->docxWriter ??= new WeeklyBriefDocxWriter;
}
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolvePreviousWeek(?Carbon $reference = null): array
{
return $this->resolveWeekByOffset(1, $reference);
}
/**
* @return array{0: Carbon, 1: Carbon}
*/
public function resolveWeekByOffset(int $offset = 0, ?Carbon $reference = null): array
{
$ref = ($reference ?? now())->copy()->timezone('Asia/Shanghai')->startOfDay();
$weekStart = $ref->copy()->startOfWeek(Carbon::MONDAY)->subWeeks($offset);
if ($offset === 0) {
$weekEnd = $ref->copy()->endOfDay();
} else {
$weekEnd = $weekStart->copy()->endOfWeek(Carbon::SUNDAY)->endOfDay();
}
return [$weekStart, $weekEnd];
}
/**
* @return list<array{offset:int,label:string,week_start:string,week_end:string}>
*/
public function weekOptions(int $count = 8, ?Carbon $reference = null): array
{
$options = [];
for ($offset = 0; $offset < $count; $offset++) {
[$start, $end] = $this->resolveWeekByOffset($offset, $reference);
$options[] = [
'offset' => $offset,
'label' => $offset === 0 ? '本周(截至今日)' : ($offset === 1 ? '上周' : "{$offset}"),
'week_start' => $start->toDateString(),
'week_end' => $end->toDateString(),
];
}
return $options;
}
public function generate(Carbon $weekStart, Carbon $weekEnd, ?int $adminUserId = null, bool $replace = true): WeeklyBrief
{
$weekStart = $weekStart->copy()->timezone('Asia/Shanghai')->startOfDay();
$weekEnd = $weekEnd->copy()->timezone('Asia/Shanghai')->endOfDay();
$papers = $this->fetchPapers($weekStart, $weekEnd);
$newsItems = $this->fetchNews($weekStart, $weekEnd);
$teachers = $this->fetchTeachers($weekStart, $weekEnd);
$papersTotal = $this->countPapers($weekStart, $weekEnd);
$newsTotal = $this->countNews($weekStart, $weekEnd);
$teachersTotal = $this->countTeachers($weekStart, $weekEnd);
$scoredPapers = $papers->map(fn (Paper $paper) => [
'paper' => $paper,
'track' => $this->resolvePaperTrack($paper),
'score' => $this->scorePaper($paper),
'maturity' => $this->assessMaturity($paper),
])->sortByDesc('score')->values();
$trackCounts = $scoredPapers->groupBy('track')->map->count();
$highValuePapers = $scoredPapers->filter(fn ($row) => $row['score'] >= 60)->values();
$topTracks = $trackCounts->sortDesc()->take(3)->keys()->all();
$topTen = $scoredPapers->take(10);
$generatedAt = now()->timezone('Asia/Shanghai');
$titleDate = $this->formatChineseDate($generatedAt);
$periodLabel = $weekStart->format('Y年n月j日').' - '.$weekEnd->format('Y年n月j日');
$title = "高校科技成果周报 | {$titleDate}";
$builder = new WeeklyBriefContentBuilder;
$references = [];
$refIndex = 1;
$builder->h1($title)
->paragraph("统计周期:{$periodLabel}Asia/Shanghai")
->spacer();
$builder->h2('一、总览')->spacer();
$builder->h3('1、赛道热度拐点')->spacer();
if ($topTracks === []) {
$builder->paragraph('本周暂无足够论文数据识别赛道拐点。');
} else {
foreach ($topTracks as $trackKey) {
$label = self::TRACKS[$trackKey] ?? $trackKey;
$count = (int) ($trackCounts[$trackKey] ?? 0);
$builder->bullet("{$label}:本周入库 {$count} 篇,技术突破相对集中");
}
}
$builder->spacer();
$builder->h3('2、核心突破')->spacer();
$coreBreakthroughs = $highValuePapers->take(5);
if ($coreBreakthroughs->isEmpty()) {
$builder->paragraph('本周暂无可落地、可商业化的关键技术进展。');
} else {
foreach ($coreBreakthroughs as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$builder->bullet($this->truncate($paper->title, 120).''.$trackLabel.''.$this->truncate($this->extractInnovation($paper), 160));
}
}
$builder->spacer();
$builder->h3('3、项目机会')->spacer();
$projectHints = $this->buildProjectOpportunities($scoredPapers, $teachers);
if ($projectHints === []) {
$builder->paragraph('本周暂无新增值得跟踪的高校创业团队或可转化实验室成果。');
} else {
foreach ($projectHints as $hint) {
$builder->bullet($hint);
}
}
$builder->spacer();
$builder->h3('4、数据总结')->spacer();
$highValueCount = $highValuePapers->count();
$builder->bullet("本周有效检索论文总量:{$papersTotal}");
$builder->bullet("高价值产业级论文数量:{$highValueCount}");
$builder->bullet('各赛道分布占比:'.$this->formatTrackDistribution($trackCounts, max(1, $scoredPapers->count())));
$builder->bullet("本周入库资讯总量:{$newsTotal}");
$builder->spacer();
$builder->h2('二、技术突破盘点')->spacer();
foreach (self::TRACKS as $trackKey => $trackLabel) {
$trackAll = $scoredPapers->where('track', $trackKey)->values();
$trackPapers = $trackAll->take(5);
$builder->h3($trackLabel)->spacer();
if ($trackPapers->isEmpty()) {
$builder->paragraph('本周该赛道暂无论文入库。')->spacer();
continue;
}
$builder->paragraph('1、技术突破')->spacer();
$idx = 1;
foreach ($trackPapers as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$builder->paragraph("{$idx}. {$paper->title}");
$builder->bullet('核心创新点:'.$this->extractInnovation($paper));
if ($paper->authors) {
$builder->bullet('作者团队:'.$this->truncate($this->cleanText($paper->authors), 100));
}
$this->appendReferenceBlock($builder, $paper->source_site ?: '论文来源', $paper->url, $refIndex, $references);
$builder->spacer();
$idx++;
}
$builder->paragraph('2、产业化成熟度判定')->spacer();
$maturityGroups = $trackAll->groupBy('maturity');
foreach (['可直接量产落地', '中试优化', '小试可行', '实验室原理验证'] as $level) {
$count = $maturityGroups->get($level)?->count() ?? 0;
if ($count > 0) {
$builder->bullet("{$level}{$count}");
}
}
$builder->spacer();
$builder->paragraph('3、商业价值')->spacer();
$samplePaper = $trackPapers->first()['paper'] ?? null;
$builder->paragraph($samplePaper ? $this->assessCommercialValue($samplePaper) : '待进一步评估。');
$builder->spacer();
}
$builder->h2('三、产业级重磅论文')->spacer();
$builder->paragraph('筛选本周具备投资影响力的核心论文前 10 篇:')->spacer();
if ($topTen->isEmpty()) {
$builder->paragraph('本周暂无符合条件的论文。');
} else {
$index = 1;
foreach ($topTen as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$builder->h3("{$index}. {$paper->title}");
$builder->bullet('发表期刊/平台:'.($paper->source_site ?: 'arXiv / 预印本'));
$builder->bullet('作者团队:'.($this->cleanText($paper->authors) ?: '待补充'));
$builder->bullet('所属高校/实验室:'.($paper->school_name ?: '待关联'));
if ($paper->published_at) {
$builder->bullet('发表时间:'.$paper->published_at->format('Y年n月j日'));
}
$builder->bullet('所属赛道:'.$trackLabel);
$builder->bullet('可转化落地可能性:'.$this->assessConversionPotential($row['score'], $row['maturity']));
$this->appendReferenceBlock($builder, '论文链接', $paper->url, $refIndex, $references);
$builder->spacer();
$index++;
}
}
$builder->h2('四、潜在项目挖掘')->spacer();
$this->renderPotentialProjects($builder, $scoredPapers, $teachers);
$builder->h2('五、专利&产学研联动动态')->spacer();
$coopNews = $newsItems->filter(fn (News $n) => $this->matchesKeywords($n->title.' '.($n->summary ?? ''), self::COOP_KEYWORDS));
if ($coopNews->isEmpty()) {
$builder->paragraph('本周暂无产学研合作或技术转化公开动态。');
} else {
$index = 1;
foreach ($coopNews->take(20) as $news) {
$this->renderCoopNewsEntry($builder, $index, $news, $refIndex, $references);
$index++;
}
}
$builder->spacer()->h2('参考来源汇总')->spacer();
if ($references === []) {
$builder->paragraph('本周暂无外部来源链接。');
} else {
foreach ($references as $i => $ref) {
$num = $i + 1;
$builder->paragraph("{$num}. {$ref['label']}");
if ($ref['url'] !== '') {
$builder->link($ref['url'], $ref['url']);
}
}
}
$builder->spacer()->paragraph('简报生成时间:'.$generatedAt->format('Y年n月j日 H:i').' (Asia/Shanghai)');
$docxRelativePath = sprintf(
'weekly-briefs/brief_%s_%s_%s.docx',
$weekStart->format('Ymd'),
$weekEnd->format('Ymd'),
$generatedAt->format('His')
);
$docxAbsolutePath = storage_path('app/'.$docxRelativePath);
$plainText = $builder->toPlainText();
$this->docxWriter->write($builder->blocks(), $docxAbsolutePath);
$stats = [
'papers_count' => $papersTotal,
'papers_analyzed' => $scoredPapers->count(),
'high_value_papers_count' => $highValueCount,
'news_count' => $newsTotal,
'news_analyzed' => $newsItems->count(),
'teachers_count' => $teachersTotal,
'references_count' => count($references),
'track_distribution' => $trackCounts->all(),
'sections' => [
'overview' => 1,
'breakthrough' => $scoredPapers->count(),
'top_papers' => $topTen->count(),
'projects' => min(8, $teachers->count()) + min(5, $scoredPapers->count()),
'coop' => $coopNews->count(),
],
];
if ($replace) {
WeeklyBrief::query()
->whereDate('week_start', $weekStart->toDateString())
->whereDate('week_end', $weekEnd->toDateString())
->get()
->each(function (WeeklyBrief $old) {
if ($old->docx_path) {
Storage::disk('local')->delete($old->docx_path);
}
});
WeeklyBrief::query()
->whereDate('week_start', $weekStart->toDateString())
->whereDate('week_end', $weekEnd->toDateString())
->delete();
}
return WeeklyBrief::query()->create([
'week_start' => $weekStart->toDateString(),
'week_end' => $weekEnd->toDateString(),
'title' => $title,
'markdown' => $plainText,
'docx_path' => $docxRelativePath,
'stats_json' => $stats,
'admin_user_id' => $adminUserId,
'generated_at' => $generatedAt,
]);
}
protected function paperQueryInRange(Carbon $weekStart, Carbon $weekEnd): Builder
{
return Paper::query()
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd]);
}
protected function newsQueryInRange(Carbon $weekStart, Carbon $weekEnd): Builder
{
return News::query()
->whereNotNull('crawl_job_id')
->whereBetween('created_at', [$weekStart, $weekEnd]);
}
protected function countPapers(Carbon $weekStart, Carbon $weekEnd): int
{
return (int) $this->paperQueryInRange($weekStart, $weekEnd)->count();
}
protected function countNews(Carbon $weekStart, Carbon $weekEnd): int
{
return (int) $this->newsQueryInRange($weekStart, $weekEnd)->count();
}
protected function countTeachers(Carbon $weekStart, Carbon $weekEnd): int
{
return (int) Teacher::query()
->whereBetween('created_at', [$weekStart, $weekEnd])
->count();
}
/**
* @return Collection<int, Paper>
*/
protected function fetchPapers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return $this->paperQueryInRange($weekStart, $weekEnd)
->with('crawlJob')
->orderByDesc('published_at')
->orderByDesc('id')
->get();
}
/**
* @return Collection<int, News>
*/
protected function fetchNews(Carbon $weekStart, Carbon $weekEnd): Collection
{
return $this->newsQueryInRange($weekStart, $weekEnd)
->with('categoryItem')
->orderByDesc('published_at')
->orderByDesc('id')
->get();
}
/**
* @return Collection<int, Teacher>
*/
protected function fetchTeachers(Carbon $weekStart, Carbon $weekEnd): Collection
{
return Teacher::query()
->with(['university', 'researchDirections'])
->whereBetween('created_at', [$weekStart, $weekEnd])
->orderByDesc('id')
->limit(50)
->get();
}
protected function resolvePaperTrack(Paper $paper): string
{
$url = strtolower((string) ($paper->crawlJob?->request_url ?? ''));
foreach (array_keys(self::TRACKS) as $track) {
if (
str_contains($url, '/list/'.$track)
|| str_contains($url, '/list/'.$track.'/')
|| str_contains($url, 'categories/'.$track)
) {
return $track;
}
}
$haystack = strtolower($paper->title.' '.($paper->summary ?? '').' '.($paper->url ?? ''));
if (str_contains($haystack, 'physics') || str_contains($haystack, 'quantum')) {
return 'physics';
}
if (str_contains($haystack, 'math')) {
return 'math';
}
if (str_contains($haystack, 'eess') || str_contains($haystack, 'electrical')) {
return 'eess';
}
return 'cs';
}
protected function scorePaper(Paper $paper): int
{
$score = 40;
$text = $paper->title.' '.($paper->summary ?? '');
if ($paper->school_name) {
$score += 10;
}
if ($paper->authors) {
$score += 8;
}
if ($paper->published_at) {
$score += 5;
}
if ($this->matchesKeywords($text, self::HIGH_VALUE_KEYWORDS)) {
$score += 20;
}
if (mb_strlen($this->cleanText($paper->summary)) >= 120) {
$score += 7;
}
return min(100, $score);
}
protected function assessMaturity(Paper $paper): string
{
$text = $paper->title.' '.($paper->summary ?? '');
if ($this->matchesKeywords($text, ['量产', '部署', '商用', '落地应用', 'production', 'deployed'])) {
return '可直接量产落地';
}
if ($this->matchesKeywords($text, ['中试', 'pilot plant', 'scale-up', '示范线'])) {
return '中试优化';
}
if ($this->matchesKeywords($text, ['小试', 'prototype', '样机', '验证平台'])) {
return '小试可行';
}
return '实验室原理验证';
}
protected function extractInnovation(Paper $paper): string
{
$summary = $this->cleanText($paper->summary);
if ($summary !== '') {
return $this->truncate($summary, 220);
}
return '围绕「'.$this->truncate($paper->title, 80).'」提出方法或系统层面的创新,具备进一步工程化验证价值。';
}
protected function assessCommercialValue(Paper $paper): string
{
$text = $this->cleanText($paper->summary);
if ($text === '') {
return '降本/提效空间与下游应用场景需结合实验数据进一步量化;建议关注头部团队复现与工程化进度。';
}
return '降本提效:'.$this->truncate($text, 120).';替代现有方案需对照行业 baseline 评估;下游场景:'.($paper->school_name ?: '待结合赛道进一步拆解');
}
protected function assessConversionPotential(int $score, string $maturity): string
{
if ($score >= 75 && in_array($maturity, ['可直接量产落地', '中试优化'], true)) {
return '高(接近工程化/产业化窗口)';
}
if ($score >= 60) {
return '中(具备跟踪价值,需验证复现与场景)';
}
return '低(偏基础研究,可作为技术雷达储备)';
}
/**
* @param Collection<string, int> $trackCounts
*/
protected function formatTrackDistribution(Collection $trackCounts, int $total): string
{
if ($total <= 0) {
return '暂无数据';
}
$parts = [];
foreach (self::TRACKS as $key => $label) {
$count = (int) ($trackCounts[$key] ?? 0);
$percent = (int) round($count / $total * 100);
$parts[] = "{$label} {$percent}%{$count}篇)";
}
return implode('', $parts);
}
/**
* @param Collection<int, array{paper: Paper, track: string, score: int, maturity: string}> $scoredPapers
* @param Collection<int, Teacher> $teachers
* @return list<string>
*/
protected function buildProjectOpportunities(Collection $scoredPapers, Collection $teachers): array
{
$hints = [];
foreach ($teachers->take(5) as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$dirs = $teacher->researchDirections->pluck('name')->join('、') ?: '细分方向待补充';
$hints[] = "新增团队:{$uni} · {$teacher->name}{$dirs}),建议纳入长期跟踪池";
}
foreach ($scoredPapers->take(3) as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
if ($row['score'] < 55) {
continue;
}
$trackLabel = self::TRACKS[$row['track']] ?? $row['track'];
$hints[] = "可转化成果:{$paper->title}{$trackLabel}),成熟度:{$row['maturity']}";
}
return $hints;
}
/**
* @param Collection<int, array{paper: Paper, track: string, score: int, maturity: string}> $scoredPapers
* @param Collection<int, Teacher> $teachers
*/
protected function renderPotentialProjects(
WeeklyBriefContentBuilder $builder,
Collection $scoredPapers,
Collection $teachers,
): void {
if ($teachers->isEmpty() && $scoredPapers->isEmpty()) {
$builder->paragraph('本周暂无高辨识度核心团队或高价值成果。')->spacer();
return;
}
$index = 1;
foreach ($teachers->take(8) as $teacher) {
$uni = $teacher->university?->name ?: $teacher->university_text ?: '国内高校';
$dirs = $teacher->researchDirections->pluck('name')->join('、') ?: '待补充';
$builder->h3("{$index}. {$uni} · {$teacher->name}");
$builder->bullet('团队背景:'.($teacher->bio ? $this->truncate($this->cleanText($teacher->bio), 180) : ($teacher->title ?: '职称/履历待补充')));
$builder->bullet('技术稀缺性:长期深耕「'.$dirs.'」,可对比行业现有方案评估差异化');
$builder->bullet('技术进度:'.$teacher->created_at?->format('Y-m-d').' 入库,建议结合论文/专利进一步核验');
$builder->spacer();
$index++;
}
foreach ($scoredPapers->take(5) as $row) {
/** @var Paper $paper */
$paper = $row['paper'];
$builder->h3("{$index}. {$paper->title}");
$builder->bullet('团队背景:'.($this->cleanText($paper->authors) ?: '作者信息待补充'));
$builder->bullet('技术稀缺性:'.self::TRACKS[$row['track']].' 赛道,'.$this->assessConversionPotential($row['score'], $row['maturity']));
$builder->bullet('技术进度:'.$row['maturity']);
$builder->spacer();
$index++;
}
}
/**
* @param list<array{label: string, url: string}> $references
*/
protected function renderCoopNewsEntry(
WeeklyBriefContentBuilder $builder,
int $index,
News $news,
int &$refIndex,
array &$references,
): void {
$builder->h3("{$index}. {$news->title}");
$summary = $this->cleanText($news->summary);
if ($summary !== '') {
$builder->bullet('合作/转化进展:'.$this->truncate($summary, 240));
}
$text = $news->title.' '.$summary;
$flags = [];
if ($this->matchesKeywords($text, ['头部企业', '联合研究', '联合实验室'])) {
$flags[] = '与头部企业联合研究';
}
if ($this->matchesKeywords($text, ['投资', '融资', '产业资本', '基金'])) {
$flags[] = '产业资本介入';
}
if ($this->matchesKeywords($text, ['试点', '示范', '落地', '应用'])) {
$flags[] = '落地试点应用';
}
if ($flags !== []) {
$builder->bullet('产学研信号:'.implode('', $flags));
}
if ($news->published_at) {
$builder->bullet('时间:'.$news->published_at->timezone('Asia/Shanghai')->format('Y年n月j日'));
}
$label = $news->source_site ?: ($news->source ?: '资讯来源');
$this->appendReferenceBlock($builder, $label, $news->source_url ?: $news->cover_url, $refIndex, $references);
$builder->spacer();
}
/**
* @param list<array{label: string, url: string}> $references
*/
protected function appendReferenceBlock(
WeeklyBriefContentBuilder $builder,
string $label,
?string $url,
int &$refIndex,
array &$references,
): void {
$label = $this->cleanText($label) ?: '来源';
$url = trim((string) $url);
if ($url !== '' && preg_match('~^https?://~i', $url)) {
foreach ($references as $ref) {
if ($ref['url'] === $url) {
$builder->bullet('来源:'.$label);
$builder->link($label, $url);
return;
}
}
$references[] = ['label' => $label.' - '.$this->briefTitleFromUrl($url), 'url' => $url];
$refIndex++;
$builder->bullet('来源:'.$label);
$builder->link($label, $url);
return;
}
$builder->bullet('来源:'.$label);
}
/**
* @param list<string> $keywords
*/
protected function matchesKeywords(string $text, array $keywords): bool
{
foreach ($keywords as $keyword) {
if ($keyword !== '' && Str::contains($text, $keyword)) {
return true;
}
}
return false;
}
protected function briefTitleFromUrl(string $url): string
{
$host = parse_url($url, PHP_URL_HOST) ?: $url;
return Str::limit($host, 48, '');
}
protected function formatChineseDate(Carbon $date): string
{
return $date->format('Y年n月j日');
}
protected function cleanText(?string $text): string
{
if ($text === null || trim($text) === '') {
return '';
}
return trim(preg_replace('/\s+/u', ' ', strip_tags($text)) ?? '');
}
protected function truncate(string $text, int $limit): string
{
if (mb_strlen($text) <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit - 1).'…';
}
}