|
|
|
|
@ -814,9 +814,12 @@ class StudyTourDeclarationParser
|
|
|
|
|
if ($line === '') {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (! self::isValidImplementationLine($line)) {
|
|
|
|
|
if (self::isGarbledLine($line) || self::isWordMetadataLine($line)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (preg_match('/^\(?实施场次[、,].*等\)?$/u', $line)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$kept[] = $line;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -837,17 +840,33 @@ class StudyTourDeclarationParser
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (preg_match('/^\d{1,2}月\d{1,2}日/u', $line)) {
|
|
|
|
|
if (preg_match('/^\(?实施场次[、,].*等\)?$/u', $line)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (preg_match('/(?:\d{1,2}月\d{1,2}(?:-\d{1,2})?日|\d{4}年\d{1,2}月\d{1,2}日)/u', $line)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (! preg_match('/[月日场活动安排如下夏令营研学实施时间共]/u', $line)) {
|
|
|
|
|
return false;
|
|
|
|
|
if (preg_match('/第[一二三四五六七八九十百零\d]+[场期次]/u', $line)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (preg_match('/^(说明|备注)[::]/u', $line)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (preg_match('/[共场次].*(夏令营|研学|活动)/u', $line) || preg_match('/时间安排如下/u', $line)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$cjk = preg_match_all('/[\x{4e00}-\x{9fff}]/u', $line) ?: 0;
|
|
|
|
|
$readable = preg_match_all('/[\x{4e00}-\x{9fff}0-9A-Za-z,。、:;()\-\s\/&“”《》—–,.\:]/u', $line) ?: 0;
|
|
|
|
|
$len = mb_strlen($line);
|
|
|
|
|
|
|
|
|
|
return $cjk >= 4 && ($cjk / max(mb_strlen($line), 1)) >= 0.45;
|
|
|
|
|
return $cjk >= 4
|
|
|
|
|
&& preg_match('/[月日场活动实施时间说明预约成团]/u', $line)
|
|
|
|
|
&& ($readable / max($len, 1)) >= 0.45;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static function isWordMetadataLine(string $line): bool
|
|
|
|
|
@ -932,7 +951,7 @@ class StudyTourDeclarationParser
|
|
|
|
|
return $cjk < 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$readable = preg_match_all('/[\x{4e00}-\x{9fff}0-9A-Za-z,。、:;()\-\s\/&“”]/u', $line) ?: 0;
|
|
|
|
|
$readable = preg_match_all('/[\x{4e00}-\x{9fff}0-9A-Za-z,。、:;()\-\s\/&“”《》—–,.\:]/u', $line) ?: 0;
|
|
|
|
|
|
|
|
|
|
return ($readable / max($len, 1)) < 0.45;
|
|
|
|
|
}
|
|
|
|
|
|