This commit is contained in:
zhu
2026-05-06 14:04:05 +08:00
parent d78d70bde0
commit 40df507300
17 changed files with 691 additions and 163 deletions

View File

@@ -12,7 +12,7 @@ const isPanelOpen = ref(false);
let timer: number | undefined;
// 只有任务处于运行中时,才在网页右下角展示计时按钮。
const isVisible = computed(() => crawlState.value?.status === 'running');
const isVisible = computed(() => crawlState.value ? ['running', 'paused'].includes(crawlState.value.status) : false);
// 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。
onMounted(() => {
@@ -85,6 +85,14 @@ function getStepText(status: string): string {
return textMap[status] ?? status;
}
/**
* 请求 background 继续暂停中的爬取任务。
*/
async function handleResumeCrawl() {
await sendBackgroundMessage({ action: 'RESUME_CRAWL' });
await refreshCrawlState();
}
/**
* 发送消息到 background非扩展环境下返回空成功响应方便本地页面不报错。
*/
@@ -114,15 +122,20 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
<ol class="dianshan-crawl-timeline">
<li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`">
<span class="dianshan-crawl-dot"></span>
<div class="dianshan-crawl-step">
<strong>{{ index + 1 }}. {{ step.name }}</strong>
<em>{{ getStepText(step.status) }}</em>
<small v-if="step.message">{{ step.message }}</small>
</div>
</li>
</ol>
</section>
</div>
<div class="dianshan-crawl-step">
<strong>{{ index + 1 }}. {{ step.name }}</strong>
<em>{{ getStepText(step.status) }}</em>
<small v-if="step.message">{{ step.message }}</small>
</div>
</li>
</ol>
<div v-if="crawlState.status === 'paused' && crawlState.pause" class="dianshan-crawl-pause">
<p>{{ crawlState.pause.message }}</p>
<button type="button" @click="handleResumeCrawl">我已处理继续</button>
</div>
</section>
</div>
</template>
<style scoped>
@@ -223,6 +236,35 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
color: #b91c1c;
}
.dianshan-crawl-pause {
display: grid;
gap: 8px;
margin-top: 12px;
padding: 10px;
border: 1px solid #f59e0b;
border-radius: 8px;
background: #fffbeb;
}
.dianshan-crawl-pause p {
margin: 0;
color: #92400e;
font-size: 12px;
line-height: 1.5;
}
.dianshan-crawl-pause button {
width: 100%;
border: 0;
border-radius: 6px;
padding: 8px 10px;
color: #ffffff;
background: #059669;
cursor: pointer;
font-size: 12px;
font-weight: 700;
}
.is-running .dianshan-crawl-dot,
.is-success .dianshan-crawl-dot {
background: #10b981;

View File

@@ -1,5 +1,6 @@
import { createApp } from 'vue';
import { createApp } from 'vue';
import App from './App.vue';
import { setupPageRunner } from './pageRunner';
/**
* 将内容脚本应用挂载到页面中。
@@ -9,17 +10,15 @@ function mountApp() {
return;
}
// 内容脚本在宿主页面中的根容器。
// 用于避免污染业务页面结构。
const container = document.createElement('div');
container.id = 'dianshan-crx-root';
// Vue 应用实际挂载的节点。
const appRoot = document.createElement('div');
container.appendChild(appRoot);
document.body.appendChild(container);
createApp(App).mount(appRoot);
setupPageRunner();
}
if (document.readyState === 'loading') {

207
src/content/pageRunner.ts Normal file
View File

@@ -0,0 +1,207 @@
import { processFields, type DomScrapeResult } from '@/background/domScraper';
import type { CrawlPauseInfo, PlatformFieldConfig } from '@/types';
interface ScrapeStepMessage {
action: 'SCRAPE_STEP';
payload: {
fields: PlatformFieldConfig[];
checkSelector: string;
};
}
interface CheckInterruptMessage {
action: 'CHECK_INTERRUPT';
}
type PageRunnerMessage = ScrapeStepMessage | CheckInterruptMessage;
interface PageRunnerResponse {
ok: boolean;
data?: DomScrapeResult | null;
interrupt?: CrawlPauseInfo;
error?: string;
}
/**
* 注册页面执行器,供 background 在目标网页中触发中断检测和 DOM 抓取。
*/
export function setupPageRunner(): void {
chrome.runtime.onMessage.addListener((message: PageRunnerMessage, _sender, sendResponse) => {
void handlePageRunnerMessage(message).then(sendResponse);
return true;
});
}
/**
* 处理 background 发来的页面执行消息。
*/
async function handlePageRunnerMessage(message: PageRunnerMessage): Promise<PageRunnerResponse> {
if (message.action === 'CHECK_INTERRUPT') {
return { ok: true, interrupt: detectPageInterrupt() };
}
if (message.action === 'SCRAPE_STEP') {
const interrupt = detectPageInterrupt();
if (interrupt) {
return { ok: false, interrupt };
}
const readyElement = await waitForStableSelector(message.payload.checkSelector, 18000);
if (!readyElement) {
return {
ok: false,
interrupt: {
reason: 'page_not_ready',
message: '页面关键内容暂未加载,请确认页面是否正常显示后继续',
},
};
}
const data = await processFields(message.payload.fields, document.body);
return { ok: true, data };
}
return { ok: false, error: '未知页面执行指令' };
}
/**
* 检测当前页面是否需要用户手动处理登录、验证码或页面不存在。
*/
function detectPageInterrupt(): CrawlPauseInfo | undefined {
if (isShieldPage()) {
return {
reason: 'shield',
message: '检测到验证码或风控验证,请在打开的商家后台窗口处理完成后继续',
};
}
if (isLoginPage()) {
return {
reason: 'reauth',
message: '检测到需要重新登录,请在打开的商家后台窗口登录完成后继续',
};
}
if (isNotFoundPage()) {
return {
reason: 'not_found',
message: '当前页面不存在或已失效,请确认平台配置里的页面地址是否正确',
};
}
}
/**
* 判断是否进入验证码、流量盾或风控验证页。
*/
function isShieldPage(): boolean {
const path = location.pathname.toLowerCase();
if (path.startsWith('/verify/captcha') || path.startsWith('/verify/traffic')) {
return true;
}
const shieldElement = document.querySelector(
'[data-name="verification"], .ant-captcha, #captchaContainer, [class*="captcha" i], [id*="captcha" i]',
);
return shieldElement ? isVisibleElement(shieldElement) : false;
}
/**
* 判断当前页面是否需要登录或二次验证密码。
*/
function isLoginPage(): boolean {
const path = location.pathname.toLowerCase();
if (
/^\/(?:buyer\/)?login\b/i.test(path) ||
/^\/account\/(?:signin|login)\b/i.test(path) ||
/^\/portal\/login\b/i.test(path)
) {
return true;
}
const visiblePasswordInput = Array.from(document.querySelectorAll('input[type="password"]')).some(isVisibleElement);
if (visiblePasswordInput) {
return true;
}
const bodyText = document.body.innerText.slice(0, 3000);
const loginTextPatterns = [
/enter\s+(your\s+)?password\s+to\s+continue/i,
/sign\s+in\s+(again\s+)?to\s+continue/i,
/please\s+(re-?)?enter\s+(your\s+)?password/i,
/请(再次|重新)?输入(您的)?密码/,
/请登录|重新登录|登录后继续/,
];
return loginTextPatterns.some((pattern) => pattern.test(bodyText));
}
/**
* 判断当前页面是否是不存在、下架或错误页面。
*/
function isNotFoundPage(): boolean {
const text = document.body.innerText.slice(0, 8000);
const title = document.title;
const notFoundPatterns = [
/page\s+not\s+found/i,
/the\s+page\s+you\s+are\s+looking\s+for/i,
/this\s+page\s+(has\s+been\s+)?removed/i,
/product\s+(is\s+)?unavailable/i,
/页面不存在|找不到(此|该)?页面|抱歉.*不存在|(商品|产品)已下架/,
];
return notFoundPatterns.some((pattern) => pattern.test(title) || pattern.test(text));
}
/**
* 等待页面中出现稳定的关键元素。
*/
async function waitForStableSelector(selector: string, timeoutMs: number): Promise<Element | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const element = document.querySelector(selector);
if (element && isVisibleElement(element)) {
await sleep(600);
const stableElement = document.querySelector(selector);
return stableElement && isVisibleElement(stableElement) ? stableElement : null;
}
await sleep(500);
}
return null;
}
/**
* 判断元素是否真实可见。
*/
function isVisibleElement(element: Element): boolean {
if (!element.isConnected) {
return false;
}
const style = element.ownerDocument.defaultView?.getComputedStyle(element);
if (!style || style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity) < 0.05) {
return false;
}
const rect = element.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}
/**
* 简单等待工具。
*/
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
window.setTimeout(resolve, ms);
});
}