You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
szkp-map-service/app/Support/StudyTourDeclarationParser.php

919 lines
30 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
namespace App\Support;
use App\Models\DictItem;
use App\Models\Venue;
use Illuminate\Support\Collection;
class StudyTourDeclarationParser
{
private const SECTION_MARKERS = [
'basic' => '一、线路基本情况',
'intro' => '二、线路简介',
'route' => '三、线路规划',
'courses' => '四、研学课程',
'fee' => '五、线路收费标准',
'impl' => '六、线路计划实施情况',
];
/**
* @return array{parsed: array<string, mixed>, warnings: array<int, string>}
*/
public static function parseFile(string $path, string $extension): array
{
$extension = strtolower(ltrim($extension, '.'));
if ($extension === 'doc') {
return self::parseDocFile($path);
}
$text = DocTextExtractor::extract($path, $extension);
return self::parseText($text);
}
/**
* @return array{parsed: array<string, mixed>, warnings: array<int, string>}
*/
private static function parseDocFile(string $path): array
{
$candidates = DocTextExtractor::extractDocCandidates($path);
if ($candidates === []) {
throw new \RuntimeException('无法解析 .doc 申报表,请检查文件是否损坏,或另存为 .docx 后重试');
}
$bestResult = null;
$bestScore = -1;
foreach ($candidates as $text) {
$result = self::parseText($text);
$score = self::scoreParsedResult($result);
if ($score > $bestScore) {
$bestScore = $score;
$bestResult = $result;
}
}
return $bestResult ?? self::parseText($candidates[0]);
}
/**
* @param array{parsed: array<string, mixed>, warnings: array<int, string>} $result
*/
private static function scoreParsedResult(array $result): int
{
$parsed = $result['parsed'] ?? [];
$score = 0;
if (trim((string) ($parsed['name'] ?? '')) !== '') {
$score += 50;
}
if (trim((string) ($parsed['org_name'] ?? '')) !== '') {
$score += 20;
}
if (trim((string) ($parsed['suitable_count'] ?? '')) !== '') {
$score += 5;
}
if (trim((string) ($parsed['duration'] ?? '')) !== '') {
$score += 5;
}
$score += count($parsed['seasons'] ?? []) * 3;
$score += count($parsed['grade_levels'] ?? []) * 3;
$score += count($parsed['venue_items'] ?? []) * 8;
foreach ($parsed['route_plans'] ?? [] as $group) {
$validItems = 0;
foreach ($group['items'] ?? [] as $item) {
$time = trim((string) ($item['time'] ?? ''));
$activity = trim((string) ($item['activity'] ?? ''));
$location = trim((string) ($item['location'] ?? ''));
if ($activity === '' && $time === '') {
continue;
}
if (self::isGarbledLine($activity) || self::isGarbledLine($time) || self::isGarbledLine($location)) {
$score -= 30;
continue;
}
$validItems++;
}
if ($validItems > 0) {
$score += 10;
$score += $validItems * 6;
}
}
$score += count($parsed['courses'] ?? []) * 5;
foreach ($parsed['courses'] ?? [] as $course) {
$name = trim((string) ($course['name'] ?? ''));
$content = trim((string) ($course['content'] ?? ''));
if ($name === '' || $name === $content) {
$score -= 20;
continue;
}
if (preg_match('/^(课程\d+|运用|文化馆|走进|寻找|领略|窑烤|泡泡剧场|萤火虫知识)/u', $name)) {
$score -= 20;
continue;
}
if (mb_strlen($name) > 24) {
$score -= 15;
continue;
}
$score += 18;
}
foreach (['intro_html', 'fee_html', 'implementation_html'] as $key) {
$plain = trim(strip_tags((string) ($parsed[$key] ?? '')));
if ($plain === '') {
continue;
}
if (self::containsGarbledText($plain)) {
$score -= 120;
continue;
}
$score += 12 + min(mb_strlen($plain), 120) / 20;
}
return (int) $score;
}
/**
* @return array{parsed: array<string, mixed>, warnings: array<int, string>}
*/
public static function parseText(string $text): array
{
$warnings = [];
$lines = self::splitLines($text);
$sections = self::splitSections($lines);
$basicLines = $sections['basic'] ?? [];
$basic = self::parseBasicSection($basicLines);
$introText = self::joinSectionLines($sections['intro'] ?? []);
$feeText = self::joinSectionLines($sections['fee'] ?? []);
$implText = self::joinSectionLines($sections['impl'] ?? []);
$routePlans = self::parseRouteSection($sections['route'] ?? []);
$courses = self::parseCoursesSection($sections['courses'] ?? [], $routePlans);
$venueResult = self::matchVenueItems((string) ($basic['venue_raw'] ?? ''), $warnings);
unset($basic['venue_raw']);
if ($basic['name'] === '') {
$warnings[] = '未识别到线路名称,请手动填写';
}
if ($venueResult['items'] === []) {
$warnings[] = '未识别到线路点位/场馆,请手动添加';
}
$parsed = StudyTourPayload::normalizeIncoming([
'name' => $basic['name'],
'org_name' => $basic['org_name'],
'seasons' => $basic['seasons'],
'suitable_count' => $basic['suitable_count'],
'grade_levels' => $basic['grade_levels'],
'duration' => $basic['duration'],
'contact_person' => $basic['contact_person'],
'contact_phones' => $basic['contact_phones'],
'venue_items' => $venueResult['items'],
'intro_html' => self::plainTextToHtml($introText),
'route_plans' => $routePlans,
'courses' => $courses,
'fee_html' => self::plainTextToHtml($feeText),
'implementation_html' => self::plainTextToHtml($implText),
'tags' => [],
'cover_image' => '',
'sort' => 0,
'is_on_shelf' => true,
]);
return [
'parsed' => $parsed,
'warnings' => array_values(array_unique($warnings)),
];
}
/**
* @return array<int, string>
*/
private static function splitLines(string $text): array
{
$text = str_replace(["\r\n", "\r", "\f"], "\n", $text);
$parts = preg_split('/\n/u', $text) ?: [];
$lines = [];
foreach ($parts as $part) {
$line = trim((string) $part);
if ($line === '' || self::isGarbledLine($line)) {
continue;
}
$lines[] = $line;
}
return $lines;
}
/**
* @param array<int, string> $lines
* @return array<string, array<int, string>>
*/
private static function splitSections(array $lines): array
{
$keys = array_keys(self::SECTION_MARKERS);
$sections = array_fill_keys($keys, []);
$current = null;
foreach ($lines as $line) {
$matched = null;
foreach (self::SECTION_MARKERS as $key => $marker) {
if ($line === $marker || str_starts_with($line, $marker)) {
$matched = $key;
break;
}
}
if ($matched !== null) {
$current = $matched;
continue;
}
if ($current !== null) {
$sections[$current][] = $line;
}
}
return $sections;
}
/**
* @param array<int, string> $lines
* @return array<string, mixed>
*/
private static function parseBasicSection(array $lines): array
{
$fields = [
'org_name' => '',
'name' => '',
'seasons' => [],
'venue_raw' => '',
'suitable_count' => '',
'grade_levels' => [],
'duration' => '',
'contact_person' => '',
'contact_phones' => '',
];
$labels = [
'组织单位名称' => 'org_name',
'线路名称' => 'name',
'线路点位' => 'venue_raw',
'适宜人数' => 'suitable_count',
'研学时长' => 'duration',
'线路联络人' => 'contact_person',
'咨询电话' => 'contact_phones',
];
$seasonBuffer = [];
$gradeBuffer = [];
for ($i = 0; $i < count($lines); $i++) {
$line = $lines[$i];
if ($line === '') {
continue;
}
if (str_starts_with($line, '对应季节')) {
$seasonBuffer[] = $line;
if (($lines[$i + 1] ?? '') === '(可多选)') {
$i++;
}
while (($lines[$i + 1] ?? '') !== '' && ! self::isBasicLabelLine($lines[$i + 1])) {
$seasonBuffer[] = $lines[++$i];
}
$fields['seasons'] = self::parseSeasons(implode(' ', $seasonBuffer));
continue;
}
if (str_starts_with($line, '适配学段')) {
$gradeBuffer[] = $line;
if (($lines[$i + 1] ?? '') === '(可多选)') {
$i++;
}
while (($lines[$i + 1] ?? '') !== '' && ! self::isBasicLabelLine($lines[$i + 1])) {
$gradeBuffer[] = $lines[++$i];
}
$fields['grade_levels'] = self::parseGrades(implode(' ', $gradeBuffer));
continue;
}
foreach ($labels as $label => $key) {
if ($line !== $label) {
continue;
}
$valueLines = [];
while (($lines[$i + 1] ?? '') !== '' && ! self::isBasicLabelLine($lines[$i + 1]) && ! str_starts_with($lines[$i + 1], '对应季节') && ! str_starts_with($lines[$i + 1], '适配学段')) {
$valueLines[] = $lines[++$i];
}
$fields[$key] = trim(implode("\n", $valueLines));
continue 2;
}
}
$fields['suitable_count'] = self::normalizeBlankPlaceholder($fields['suitable_count']);
$fields['duration'] = self::normalizeDuration($fields['duration']);
$fields['contact_phones'] = StudyTourPayload::normalizeContactPhones($fields['contact_phones']);
foreach (['org_name', 'name', 'contact_person', 'venue_raw'] as $key) {
$fields[$key] = StudyTourPayload::compactText((string) $fields[$key]);
}
return $fields;
}
private static function isBasicLabelLine(string $line): bool
{
if ($line === '(可多选)') {
return true;
}
return array_key_exists($line, [
'组织单位名称' => true,
'线路名称' => true,
'线路点位' => true,
'适宜人数' => true,
'研学时长' => true,
'线路联络人' => true,
'咨询电话' => true,
]);
}
/**
* @return array<int, string>
*/
private static function parseSeasons(string $raw): array
{
$map = [
'春季' => 'spring',
'夏季' => 'summer',
'秋季' => 'autumn',
'冬季' => 'winter',
];
$selected = [];
foreach ($map as $label => $value) {
if (preg_match('/(?:[☑✅✔]|■)\s*'.preg_quote($label, '/').'/u', $raw)) {
$selected[] = $value;
}
}
return self::filterDictValues('study_tour_season', $selected);
}
/**
* @return array<int, string>
*/
private static function parseGrades(string $raw): array
{
$map = [
'幼儿园' => 'kindergarten',
'小学' => 'primary',
'初中' => 'junior',
'高中' => 'high',
'全学段' => 'all',
];
$selected = [];
foreach ($map as $label => $value) {
if (preg_match('/(?:[☑✅✔]|■)\s*'.preg_quote($label, '/').'/u', $raw)) {
$selected[] = $value;
}
}
return self::filterDictValues('study_tour_grade_level', $selected);
}
/**
* @param array<int, string> $values
* @return array<int, string>
*/
private static function filterDictValues(string $dictType, array $values): array
{
$allowed = DictItem::query()
->where('dict_type', $dictType)
->where('is_active', true)
->pluck('item_value')
->all();
return array_values(array_intersect($values, $allowed));
}
private static function normalizeBlankPlaceholder(string $raw): string
{
$text = StudyTourPayload::compactText($raw);
$text = preg_replace('/_+/u', '', $text) ?? $text;
return StudyTourPayload::compactText($text);
}
private static function normalizeDuration(string $raw): string
{
$text = StudyTourPayload::compactMultilineText($raw);
if ($text === '') {
return '';
}
$text = preg_replace('/_+/u', '', $text) ?? $text;
return StudyTourPayload::compactText(str_replace("\n", ' ', $text));
}
/**
* @param array<int, string> $lines
* @return array<int, array{date_label: string, items: array<int, array{time: string, activity: string, location: string}>}>
*/
private static function parseRouteSection(array $lines): array
{
$start = 0;
foreach ($lines as $idx => $line) {
if (in_array($line, ['日期', '时间', '行程安排', '地点'], true)) {
$start = $idx + 1;
}
}
$groups = [];
$currentIndex = null;
for ($i = $start; $i < count($lines); $i++) {
$line = $lines[$i];
if ($line === '') {
continue;
}
if (self::isRouteDateLabel($line)) {
$groups[] = [
'date_label' => StudyTourPayload::compactText($line),
'items' => [],
];
$currentIndex = count($groups) - 1;
continue;
}
if ($currentIndex === null) {
continue;
}
if (self::isGarbledLine($line) || preg_match('/^[四五六]、/u', $line)) {
break;
}
if (! self::isTimeLine($line)) {
if (self::isRouteTableHeaderLine($line)) {
continue;
}
$itemCount = count($groups[$currentIndex]['items']);
if ($itemCount > 0 && self::isLikelyRouteLocation($line)) {
$lastIndex = $itemCount - 1;
if ($groups[$currentIndex]['items'][$lastIndex]['location'] === '') {
$groups[$currentIndex]['items'][$lastIndex]['location'] = StudyTourPayload::compactText($line);
continue;
}
}
if (! self::isLikelyRouteLocation($line)) {
$groups[$currentIndex]['items'][] = [
'time' => '',
'activity' => StudyTourPayload::compactText($line),
'location' => '',
];
}
continue;
}
$time = $line;
$activity = '';
$location = '';
if ($i + 1 < count($lines) && ($lines[$i + 1] ?? '') !== '' && ! self::isTimeLine($lines[$i + 1]) && ! self::isRouteDateLabel($lines[$i + 1]) && ! self::isRouteTableHeaderLine($lines[$i + 1])) {
$activity = $lines[++$i];
}
while ($i + 1 < count($lines) && ($lines[$i + 1] ?? '') === '') {
$i++;
}
if ($i + 1 < count($lines) && ($lines[$i + 1] ?? '') !== '' && ! self::isTimeLine($lines[$i + 1]) && ! self::isRouteDateLabel($lines[$i + 1]) && ! self::isRouteTableHeaderLine($lines[$i + 1])) {
$candidate = $lines[$i + 1];
if (self::isLikelyRouteLocation($candidate) || ! self::looksLikeRouteActivity($candidate)) {
$location = $lines[++$i];
}
}
$groups[$currentIndex]['items'][] = [
'time' => StudyTourPayload::compactText($time),
'activity' => StudyTourPayload::compactText($activity),
'location' => StudyTourPayload::compactText($location),
];
}
foreach ($groups as &$group) {
$lastLocation = '';
foreach ($group['items'] as &$item) {
if ($item['location'] !== '') {
$lastLocation = $item['location'];
continue;
}
if ($lastLocation !== '') {
$item['location'] = $lastLocation;
}
}
unset($item);
}
unset($group);
return StudyTourPayload::normalizeRoutePlans($groups);
}
/**
* @param array<int, string> $lines
* @param array<int, array{date_label: string, items: array<int, array{time: string, activity: string, location: string}>}> $routePlans
* @return array<int, array{sort: int, name: string, content: string}>
*/
private static function parseCoursesSection(array $lines, array $routePlans = []): array
{
$start = 0;
foreach ($lines as $idx => $line) {
if (in_array($line, ['序号', '课程名称', '课程内容'], true)) {
$start = $idx + 1;
}
}
$courses = [];
$sort = 1;
for ($i = $start; $i < count($lines); $i++) {
$line = $lines[$i];
if ($line === '' || ! preg_match('/^\d+$/', $line)) {
continue;
}
$name = StudyTourPayload::compactText((string) ($lines[$i + 1] ?? ''));
$content = StudyTourPayload::compactText((string) ($lines[$i + 2] ?? ''));
if ($name === '' && $content === '') {
$i += 2;
continue;
}
$courses[] = [
'sort' => $sort++,
'name' => $name,
'content' => $content,
];
$i += 2;
}
if ($courses === []) {
$courses = self::parseCoursesFromContentLines($lines, $start, $routePlans);
}
return StudyTourPayload::normalizeCourses($courses);
}
/**
* @param array<int, string> $lines
* @param array<int, array{date_label: string, items: array<int, array{time: string, activity: string, location: string}>}> $routePlans
* @return array<int, array{sort: int, name: string, content: string}>
*/
private static function parseCoursesFromContentLines(array $lines, int $start, array $routePlans): array
{
$contentLines = [];
for ($i = $start; $i < count($lines); $i++) {
$line = $lines[$i];
if ($line === '' || in_array($line, ['序号', '课程名称', '课程内容'], true)) {
continue;
}
if (preg_match('/^[五六]、/u', $line)) {
break;
}
$contentLines[] = StudyTourPayload::compactText($line);
}
if ($contentLines === []) {
return [];
}
$pairedCourses = self::parseCoursesFromNameContentPairs($contentLines);
if ($pairedCourses !== []) {
return $pairedCourses;
}
$activityNames = self::candidateCourseNamesFromRoutes($routePlans);
if (count($activityNames) === count($contentLines)) {
$courses = [];
foreach ($contentLines as $idx => $content) {
$courses[] = [
'sort' => $idx + 1,
'name' => $activityNames[$idx],
'content' => $content,
];
}
return $courses;
}
$courses = [];
foreach ($contentLines as $idx => $line) {
if (preg_match('/^(.{2,30}?)[:]\s*(.+)$/u', $line, $matches)) {
$courses[] = [
'sort' => $idx + 1,
'name' => StudyTourPayload::compactText($matches[1]),
'content' => StudyTourPayload::compactText($matches[2]),
];
continue;
}
$courses[] = [
'sort' => $idx + 1,
'name' => mb_strlen($line) <= 20 ? $line : ('课程'.($idx + 1)),
'content' => $line,
];
}
return $courses;
}
/**
* @param array<int, string> $lines
* @return array<int, array{sort: int, name: string, content: string}>
*/
private static function parseCoursesFromNameContentPairs(array $lines): array
{
if (count($lines) < 2 || count($lines) % 2 !== 0) {
return [];
}
$courses = [];
for ($i = 0; $i < count($lines); $i += 2) {
$name = StudyTourPayload::compactText($lines[$i]);
$content = StudyTourPayload::compactText($lines[$i + 1]);
if ($name === '' || $content === '') {
return [];
}
if (mb_strlen($name) > 30 || mb_strlen($content) < mb_strlen($name)) {
return [];
}
$courses[] = [
'sort' => count($courses) + 1,
'name' => $name,
'content' => $content,
];
}
return count($courses) >= 1 ? $courses : [];
}
/**
* @param array<int, array{date_label: string, items: array<int, array{time: string, activity: string, location: string}>}> $routePlans
* @return array<int, string>
*/
private static function candidateCourseNamesFromRoutes(array $routePlans): array
{
$names = [];
foreach ($routePlans as $group) {
foreach ($group['items'] ?? [] as $item) {
$activity = StudyTourPayload::compactText((string) ($item['activity'] ?? ''));
if ($activity === '' || self::isRouteMealOrCeremony($activity)) {
continue;
}
if (! in_array($activity, $names, true)) {
$names[] = $activity;
}
}
}
return $names;
}
/**
* @return array{items: array<int, array<string, mixed>>, warnings: array<int, string>}
*/
private static function matchVenueItems(string $raw, array &$warnings): array
{
$raw = trim($raw);
if ($raw === '') {
return ['items' => [], 'warnings' => []];
}
$parts = preg_split('#[++、,/|;\n]+#u', $raw) ?: [];
$parts = array_values(array_filter(array_map('trim', $parts), fn ($p) => $p !== ''));
if ($parts === []) {
$parts = [$raw];
}
/** @var Collection<int, Venue> $venues */
$venues = Venue::query()->orderBy('sort')->orderBy('id')->get(['id', 'name']);
$items = [];
$usedVenueIds = [];
foreach ($parts as $part) {
$part = self::cleanVenueToken($part);
if ($part === '') {
continue;
}
$match = self::findVenueMatch($part, $venues, $usedVenueIds);
if ($match !== null) {
$items[] = ['type' => 'system', 'venue_id' => $match->id];
$usedVenueIds[] = $match->id;
continue;
}
$items[] = ['type' => 'custom', 'name' => $part];
$warnings[] = "场馆「{$part}」未在系统中匹配,已作为自定义场馆添加";
}
return ['items' => $items, 'warnings' => []];
}
/**
* @param Collection<int, Venue> $venues
* @param array<int, int> $usedVenueIds
*/
private static function findVenueMatch(string $token, Collection $venues, array $usedVenueIds): ?Venue
{
$tokenNorm = self::normalizeVenueName($token);
$exact = $venues->first(function (Venue $v) use ($tokenNorm, $usedVenueIds) {
if (in_array($v->id, $usedVenueIds, true)) {
return false;
}
return self::normalizeVenueName((string) $v->name) === $tokenNorm;
});
if ($exact !== null) {
return $exact;
}
$contains = $venues->filter(function (Venue $v) use ($tokenNorm, $usedVenueIds) {
if (in_array($v->id, $usedVenueIds, true)) {
return false;
}
$nameNorm = self::normalizeVenueName((string) $v->name);
return $nameNorm !== '' && (str_contains($nameNorm, $tokenNorm) || str_contains($tokenNorm, $nameNorm));
})->sortByDesc(fn (Venue $v) => mb_strlen((string) $v->name))->first();
return $contains;
}
private static function cleanVenueToken(string $token): string
{
$token = StudyTourPayload::compactText($token);
$token = preg_replace('/等$/u', '', $token) ?? $token;
$token = preg_replace('/[(].*[)]/u', '', $token) ?? $token;
return StudyTourPayload::compactText($token);
}
private static function normalizeVenueName(string $name): string
{
$name = mb_strtolower(trim($name));
$name = str_replace([' ', ' ', '·', '•'], '', $name);
return $name;
}
private static function isRouteDateLabel(string $line): bool
{
if (self::isTimeLine($line)) {
return false;
}
return (bool) preg_match('/^(线路[一二三四五六七八九十百零\d]+|第[一二三四五六七八九十百零\d]+天|上午|中午|下午|晚上)/u', $line);
}
private static function isTimeLine(string $line): bool
{
return (bool) preg_match('/^\d{1,2}:\d{2}/', $line);
}
private static function isRouteTableHeaderLine(string $line): bool
{
return in_array($line, ['日期', '时间', '行程安排', '地点'], true);
}
private static function isRouteMealOrCeremony(string $activity): bool
{
return (bool) preg_match('/(办理入住|民宿早餐|午休|结营仪式|领取伴手礼|欢迎晚宴|夜探|午餐|晚餐|野火饭|鸡汤|江村饭店|自由活动)/u', $activity);
}
private static function isLikelyRouteLocation(string $line): bool
{
if (self::isRouteMealOrCeremony($line)) {
return false;
}
return (bool) preg_match('/(博物馆|文化园|科技馆|湿地|故居|纪念馆|风情园|蚕桑|丝绸|活动中心|营地|基地|有限公司|酒店|民宿)/u', $line);
}
private static function looksLikeRouteActivity(string $line): bool
{
return (bool) preg_match('/(体验|探秘|制作|参观|采摘|仪式|晚宴|午餐|晚餐|早餐|午休|活动|课程|王国|有趣|小夜灯|缫丝|挂件|面包)/u', $line);
}
private static function isGarbledLine(string $line): bool
{
if ($line === '') {
return false;
}
if (preg_match('/^\d+$/', $line)) {
return false;
}
if (preg_match('/^\d{1,2}:\d{2}/', $line)) {
return false;
}
if (preg_match('/[\x{0080}-\x{009F}]/u', $line)) {
return true;
}
if (preg_match('/[ᘀ-᛿]/u', $line)) {
return true;
}
if (preg_match('/[漀愀脈摫欀䡒⡯␖]/u', $line)) {
return true;
}
$cjk = preg_match_all('/[\x{4e00}-\x{9fff}]/u', $line) ?: 0;
if ($cjk === 0 && mb_strlen($line) >= 3) {
return true;
}
$len = mb_strlen($line);
if ($len < 6) {
return $cjk === 0;
}
$readable = preg_match_all('/[\x{4e00}-\x{9fff}0-9A-Za-z。、\-\s\/&]/u', $line) ?: 0;
return ($readable / max($len, 1)) < 0.45;
}
private static function containsGarbledText(string $text): bool
{
foreach (preg_split('/\R/u', $text) ?: [] as $line) {
if (self::isGarbledLine(trim((string) $line))) {
return true;
}
}
return false;
}
/**
* @param array<int, string> $lines
*/
private static function joinSectionLines(array $lines): string
{
$chunks = [];
$buf = [];
foreach ($lines as $line) {
if ($line === '' || self::isGarbledLine($line)) {
if ($buf !== []) {
$chunks[] = trim(implode("\n", $buf));
$buf = [];
}
if (self::isGarbledLine($line)) {
break;
}
continue;
}
$buf[] = $line;
}
if ($buf !== []) {
$chunks[] = trim(implode("\n", $buf));
}
return trim(implode("\n\n", array_filter($chunks, fn ($c) => $c !== '')));
}
private static function plainTextToHtml(string $text): string
{
$text = StudyTourPayload::compactMultilineText($text);
if ($text === '') {
return '';
}
$paragraphs = preg_split("/\n{2,}/u", $text) ?: [$text];
$html = [];
foreach ($paragraphs as $paragraph) {
$paragraph = StudyTourPayload::compactMultilineText($paragraph);
if ($paragraph === '') {
continue;
}
$escaped = htmlspecialchars($paragraph, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
$escaped = nl2br($escaped, false);
$html[] = '<p>'.$escaped.'</p>';
}
return implode('', $html);
}
}