研学线路

master
lion 2 days ago
parent 0c95acf29e
commit 4e18dedf8b

@ -3,6 +3,13 @@
namespace App\Support;
use Illuminate\Support\Str;
use PhpOffice\PhpWord\Element\AbstractContainer;
use PhpOffice\PhpWord\Element\Cell;
use PhpOffice\PhpWord\Element\Row;
use PhpOffice\PhpWord\Element\Table;
use PhpOffice\PhpWord\Element\Text;
use PhpOffice\PhpWord\Element\TextBreak;
use PhpOffice\PhpWord\IOFactory;
use RuntimeException;
use Symfony\Component\Process\Process;
use ZipArchive;
@ -83,6 +90,11 @@ class DocTextExtractor
public static function extractDoc(string $path): string
{
$phpWordText = self::extractDocViaPhpWord($path);
if (is_string($phpWordText) && trim($phpWordText) !== '') {
return $phpWordText;
}
$textutil = self::resolveBinary('textutil');
if ($textutil !== null) {
$out = self::runCommand([$textutil, '-convert', 'txt', '-stdout', $path], 45);
@ -121,7 +133,96 @@ class DocTextExtractor
@rmdir($tmpDir);
}
throw new RuntimeException('无法解析 .doc 文件,请安装 LibreOfficesoffice或改用 .docx 格式');
throw new RuntimeException('无法解析 .doc 申报表,请检查文件是否损坏,或另存为 .docx 后重试');
}
private static function extractDocViaPhpWord(string $path): ?string
{
try {
$phpWord = IOFactory::load($path, 'MsDoc');
} catch (\Throwable) {
return null;
}
$lines = [];
foreach ($phpWord->getSections() as $section) {
foreach ($section->getElements() as $element) {
$chunk = self::extractPhpWordElementText($element);
if ($chunk === '') {
continue;
}
foreach (preg_split('/\R/u', $chunk) ?: [] as $line) {
$line = self::normalizeLine((string) $line);
if ($line !== '') {
$lines[] = $line;
}
}
}
}
$text = self::joinLines($lines);
return trim($text) !== '' ? $text : null;
}
private static function extractPhpWordElementText(object $element): string
{
if ($element instanceof Text) {
return (string) $element->getText();
}
if ($element instanceof TextBreak) {
return "\n";
}
if ($element instanceof Table) {
$parts = [];
foreach ($element->getRows() as $row) {
if ($row instanceof Row) {
$parts[] = self::extractPhpWordRowText($row);
}
}
return implode("\n", array_filter($parts, fn ($part) => $part !== ''));
}
if ($element instanceof AbstractContainer) {
$parts = [];
foreach ($element->getElements() as $child) {
$parts[] = self::extractPhpWordElementText($child);
}
return implode('', $parts);
}
if (method_exists($element, 'getText')) {
$text = $element->getText();
return is_string($text) ? $text : '';
}
return '';
}
private static function extractPhpWordRowText(Row $row): string
{
$cells = [];
foreach ($row->getCells() as $cell) {
if ($cell instanceof Cell) {
$cells[] = self::extractPhpWordCellText($cell);
}
}
return implode("\n", array_values(array_filter(
array_map(fn ($cellText) => self::normalizeLine($cellText), $cells),
fn ($cellText) => $cellText !== ''
)));
}
private static function extractPhpWordCellText(Cell $cell): string
{
$parts = [];
foreach ($cell->getElements() as $element) {
$parts[] = self::extractPhpWordElementText($element);
}
return self::normalizeLine(implode('', $parts));
}
private static function textFromNode(\DOMXPath $xpath, \DOMNode $node): string

@ -11,7 +11,8 @@
"laravel/sanctum": "^3.3",
"laravel/tinker": "^2.8",
"overtrue/pinyin": "^5.0",
"phpoffice/phpspreadsheet": "^5.3"
"phpoffice/phpspreadsheet": "^5.3",
"phpoffice/phpword": "^1.3"
},
"require-dev": {
"fakerphp/faker": "^1.9.1",

175
composer.lock generated

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "2fd1262c3117ca0367f247bc6998ea9b",
"content-hash": "1a85fb70206d510a41d0911211e84521",
"packages": [
{
"name": "brick/math",
@ -2938,6 +2938,64 @@
],
"time": "2025-03-16T02:16:27+00:00"
},
{
"name": "phpoffice/math",
"version": "0.2.0",
"source": {
"type": "git",
"url": "https://github.com/PHPOffice/Math.git",
"reference": "fc2eb6d1a61b058d5dac77197059db30ee3c8329"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/PHPOffice/Math/zipball/fc2eb6d1a61b058d5dac77197059db30ee3c8329",
"reference": "fc2eb6d1a61b058d5dac77197059db30ee3c8329",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"ext-dom": "*",
"ext-xml": "*",
"php": "^7.1|^8.0"
},
"require-dev": {
"phpstan/phpstan": "^0.12.88 || ^1.0.0",
"phpunit/phpunit": "^7.0 || ^9.0"
},
"type": "library",
"autoload": {
"psr-4": {
"PhpOffice\\Math\\": "src/Math/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Progi1984",
"homepage": "https://lefevre.dev"
}
],
"description": "Math - Manipulate Math Formula",
"homepage": "https://phpoffice.github.io/Math/",
"keywords": [
"MathML",
"officemathml",
"php"
],
"support": {
"issues": "https://github.com/PHPOffice/Math/issues",
"source": "https://github.com/PHPOffice/Math/tree/0.2.0"
},
"time": "2024-08-12T07:30:45+00:00"
},
{
"name": "phpoffice/phpspreadsheet",
"version": "5.3.0",
@ -3050,6 +3108,121 @@
},
"time": "2025-11-24T15:47:10+00:00"
},
{
"name": "phpoffice/phpword",
"version": "1.3.0",
"source": {
"type": "git",
"url": "https://github.com/PHPOffice/PHPWord.git",
"reference": "8392134ce4b5dba65130ba956231a1602b848b7f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/PHPOffice/PHPWord/zipball/8392134ce4b5dba65130ba956231a1602b848b7f",
"reference": "8392134ce4b5dba65130ba956231a1602b848b7f",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"ext-dom": "*",
"ext-json": "*",
"ext-xml": "*",
"php": "^7.1|^8.0",
"phpoffice/math": "^0.2"
},
"require-dev": {
"dompdf/dompdf": "^2.0",
"ext-gd": "*",
"ext-libxml": "*",
"ext-zip": "*",
"friendsofphp/php-cs-fixer": "^3.3",
"mpdf/mpdf": "^8.1",
"phpmd/phpmd": "^2.13",
"phpstan/phpstan-phpunit": "@stable",
"phpunit/phpunit": ">=7.0",
"symfony/process": "^4.4 || ^5.0",
"tecnickcom/tcpdf": "^6.5"
},
"suggest": {
"dompdf/dompdf": "Allows writing PDF",
"ext-gd2": "Allows adding images",
"ext-xmlwriter": "Allows writing OOXML and ODF",
"ext-xsl": "Allows applying XSL style sheet to headers, to main document part, and to footers of an OOXML template",
"ext-zip": "Allows writing OOXML and ODF"
},
"type": "library",
"autoload": {
"psr-4": {
"PhpOffice\\PhpWord\\": "src/PhpWord"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"LGPL-3.0"
],
"authors": [
{
"name": "Mark Baker"
},
{
"name": "Gabriel Bull",
"email": "me@gabrielbull.com",
"homepage": "http://gabrielbull.com/"
},
{
"name": "Franck Lefevre",
"homepage": "https://rootslabs.net/blog/"
},
{
"name": "Ivan Lanin",
"homepage": "http://ivan.lanin.org"
},
{
"name": "Roman Syroeshko",
"homepage": "http://ru.linkedin.com/pub/roman-syroeshko/34/a53/994/"
},
{
"name": "Antoine de Troostembergh"
}
],
"description": "PHPWord - A pure PHP library for reading and writing word processing documents (OOXML, ODF, RTF, HTML, PDF)",
"homepage": "https://phpoffice.github.io/PHPWord/",
"keywords": [
"ISO IEC 29500",
"OOXML",
"Office Open XML",
"OpenDocument",
"OpenXML",
"PhpOffice",
"PhpWord",
"Rich Text Format",
"WordprocessingML",
"doc",
"docx",
"html",
"odf",
"odt",
"office",
"pdf",
"php",
"reader",
"rtf",
"template",
"template processor",
"word",
"writer"
],
"support": {
"issues": "https://github.com/PHPOffice/PHPWord/issues",
"source": "https://github.com/PHPOffice/PHPWord/tree/1.3.0"
},
"time": "2024-08-30T18:03:42+00:00"
},
{
"name": "phpoption/phpoption",
"version": "1.9.3",

Loading…
Cancel
Save