> */ protected array $cache = []; public function resolveByRequestUrl(?string $url, string $targetType = 'industry_news'): ?string { $normalized = $this->normalizeUrl($url); if ($normalized === '') { return null; } return $this->addresses($targetType) ->first(fn (CrawlAddress $address) => $this->normalizeUrl($address->request_url) === $normalized) ?->name; } public function resolveBySourceUrl(?string $sourceUrl, string $targetType = 'industry_news'): ?string { $host = strtolower((string) parse_url((string) $sourceUrl, PHP_URL_HOST)); if ($host === '') { return null; } $matches = $this->addresses($targetType) ->filter(function (CrawlAddress $address) use ($host) { $addressHost = strtolower((string) parse_url($address->request_url, PHP_URL_HOST)); return $addressHost !== '' && $addressHost === $host; }) ->values(); if ($matches->isEmpty()) { return null; } if ($matches->count() === 1) { return $matches->first()->name; } $path = (string) (parse_url((string) $sourceUrl, PHP_URL_PATH) ?: '/'); $bestName = null; $bestLength = 0; foreach ($matches as $address) { $prefix = $this->listPathPrefix($address->request_url); if ($prefix !== '' && str_starts_with($path, $prefix) && strlen($prefix) > $bestLength) { $bestName = $address->name; $bestLength = strlen($prefix); } } return $bestName; } public function resolveForNews(?string $jobRequestUrl, ?string $articleSourceUrl, string $targetType = 'industry_news'): ?string { return $this->resolveByRequestUrl($jobRequestUrl, $targetType) ?? $this->resolveBySourceUrl($articleSourceUrl, $targetType); } public function normalizeRequestUrl(?string $url): string { return $this->normalizeUrl($url); } /** * @return list */ public function genericAdapterSourceNames(): array { return [ '通用资讯 HTML', '爬虫采集', ]; } /** * @return Collection */ protected function addresses(string $targetType): Collection { if (! isset($this->cache[$targetType])) { $this->cache[$targetType] = CrawlAddress::query() ->where('target_type', $targetType) ->where('status', 1) ->orderBy('sort') ->orderBy('name') ->get(); } return $this->cache[$targetType]; } protected function normalizeUrl(?string $url): string { $trimmed = trim((string) $url); if ($trimmed === '') { return ''; } if (! preg_match('#^https?://#i', $trimmed)) { $trimmed = 'https://'.$trimmed; } $parts = parse_url($trimmed); if (! is_array($parts) || empty($parts['host'])) { return rtrim($trimmed, '/'); } $scheme = strtolower((string) ($parts['scheme'] ?? 'https')); $host = strtolower((string) $parts['host']); $path = $parts['path'] ?? '/'; $path = $path === '' ? '/' : $path; $path = rtrim($path, '/') ?: '/'; return $scheme.'://'.$host.$path; } protected function listPathPrefix(string $url): string { $path = (string) (parse_url($url, PHP_URL_PATH) ?: '/'); if ($path === '/') { return '/'; } $path = rtrim($path, '/'); $lastSlash = strrpos($path, '/'); if ($lastSlash === false) { return $path; } return substr($path, 0, $lastSlash + 1); } }