|
|
|
|
@ -83,20 +83,22 @@ class DocTextExtractor
|
|
|
|
|
|
|
|
|
|
public static function extractDoc(string $path): string
|
|
|
|
|
{
|
|
|
|
|
if (self::commandExists('textutil')) {
|
|
|
|
|
$out = self::runCommand(['textutil', '-convert', 'txt', '-stdout', $path], 45);
|
|
|
|
|
$textutil = self::resolveBinary('textutil');
|
|
|
|
|
if ($textutil !== null) {
|
|
|
|
|
$out = self::runCommand([$textutil, '-convert', 'txt', '-stdout', $path], 45);
|
|
|
|
|
if (is_string($out) && trim($out) !== '') {
|
|
|
|
|
return self::normalizeText($out);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (self::commandExists('soffice')) {
|
|
|
|
|
$soffice = self::resolveBinary('soffice');
|
|
|
|
|
if ($soffice !== null) {
|
|
|
|
|
$tmpDir = sys_get_temp_dir().'/study-tour-doc-'.Str::random(8);
|
|
|
|
|
if (! @mkdir($tmpDir, 0700, true) && ! is_dir($tmpDir)) {
|
|
|
|
|
throw new RuntimeException('无法创建临时目录');
|
|
|
|
|
}
|
|
|
|
|
self::runCommand([
|
|
|
|
|
'soffice',
|
|
|
|
|
$soffice,
|
|
|
|
|
'--headless',
|
|
|
|
|
'--convert-to',
|
|
|
|
|
'txt:Text',
|
|
|
|
|
@ -119,7 +121,7 @@ class DocTextExtractor
|
|
|
|
|
@rmdir($tmpDir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new RuntimeException('无法解析 .doc 文件,请安装 textutil 或 LibreOffice,或改用 .docx 格式');
|
|
|
|
|
throw new RuntimeException('无法解析 .doc 文件,请安装 LibreOffice(soffice),或改用 .docx 格式');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static function textFromNode(\DOMXPath $xpath, \DOMNode $node): string
|
|
|
|
|
@ -158,11 +160,55 @@ class DocTextExtractor
|
|
|
|
|
return trim($text);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static function commandExists(string $command): bool
|
|
|
|
|
private static function resolveBinary(string $command): ?string
|
|
|
|
|
{
|
|
|
|
|
$path = trim((string) shell_exec('command -v '.escapeshellarg($command).' 2>/dev/null'));
|
|
|
|
|
$candidates = [$command];
|
|
|
|
|
if ($command === 'soffice') {
|
|
|
|
|
$candidates = array_merge($candidates, [
|
|
|
|
|
'/usr/bin/soffice',
|
|
|
|
|
'/usr/local/bin/soffice',
|
|
|
|
|
'/opt/libreoffice/program/soffice',
|
|
|
|
|
'/usr/lib/libreoffice/program/soffice',
|
|
|
|
|
]);
|
|
|
|
|
}
|
|
|
|
|
if ($command === 'textutil') {
|
|
|
|
|
$candidates[] = '/usr/bin/textutil';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($candidates as $candidate) {
|
|
|
|
|
if ($candidate === $command) {
|
|
|
|
|
if (self::isExecutableOnPath($command)) {
|
|
|
|
|
return $command;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (is_file($candidate) && is_executable($candidate)) {
|
|
|
|
|
return $candidate;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static function isExecutableOnPath(string $command): bool
|
|
|
|
|
{
|
|
|
|
|
$pathEnv = getenv('PATH');
|
|
|
|
|
if (! is_string($pathEnv) || $pathEnv === '') {
|
|
|
|
|
$pathEnv = '/usr/local/bin:/usr/bin:/bin';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach (explode(':', $pathEnv) as $dir) {
|
|
|
|
|
$dir = trim($dir);
|
|
|
|
|
if ($dir === '') {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$full = rtrim($dir, '/').'/'.$command;
|
|
|
|
|
if (is_file($full) && is_executable($full)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $path !== '';
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|