1
This commit is contained in:
@@ -12,7 +12,7 @@ const isPanelOpen = ref(false);
|
||||
let timer: number | undefined;
|
||||
|
||||
// 只有任务处于运行中时,才在网页右下角展示计时按钮。
|
||||
const isVisible = computed(() => crawlState.value?.status === 'running');
|
||||
const isVisible = computed(() => crawlState.value ? ['running', 'paused'].includes(crawlState.value.status) : false);
|
||||
|
||||
// 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。
|
||||
onMounted(() => {
|
||||
@@ -85,6 +85,14 @@ function getStepText(status: string): string {
|
||||
return textMap[status] ?? status;
|
||||
}
|
||||
|
||||
/**
|
||||
* 请求 background 继续暂停中的爬取任务。
|
||||
*/
|
||||
async function handleResumeCrawl() {
|
||||
await sendBackgroundMessage({ action: 'RESUME_CRAWL' });
|
||||
await refreshCrawlState();
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送消息到 background;非扩展环境下返回空成功响应,方便本地页面不报错。
|
||||
*/
|
||||
@@ -114,15 +122,20 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
||||
<ol class="dianshan-crawl-timeline">
|
||||
<li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`">
|
||||
<span class="dianshan-crawl-dot"></span>
|
||||
<div class="dianshan-crawl-step">
|
||||
<strong>{{ index + 1 }}. {{ step.name }}</strong>
|
||||
<em>{{ getStepText(step.status) }}</em>
|
||||
<small v-if="step.message">{{ step.message }}</small>
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
</div>
|
||||
<div class="dianshan-crawl-step">
|
||||
<strong>{{ index + 1 }}. {{ step.name }}</strong>
|
||||
<em>{{ getStepText(step.status) }}</em>
|
||||
<small v-if="step.message">{{ step.message }}</small>
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<div v-if="crawlState.status === 'paused' && crawlState.pause" class="dianshan-crawl-pause">
|
||||
<p>{{ crawlState.pause.message }}</p>
|
||||
<button type="button" @click="handleResumeCrawl">我已处理,继续</button>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
@@ -223,6 +236,35 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
||||
color: #b91c1c;
|
||||
}
|
||||
|
||||
.dianshan-crawl-pause {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
margin-top: 12px;
|
||||
padding: 10px;
|
||||
border: 1px solid #f59e0b;
|
||||
border-radius: 8px;
|
||||
background: #fffbeb;
|
||||
}
|
||||
|
||||
.dianshan-crawl-pause p {
|
||||
margin: 0;
|
||||
color: #92400e;
|
||||
font-size: 12px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.dianshan-crawl-pause button {
|
||||
width: 100%;
|
||||
border: 0;
|
||||
border-radius: 6px;
|
||||
padding: 8px 10px;
|
||||
color: #ffffff;
|
||||
background: #059669;
|
||||
cursor: pointer;
|
||||
font-size: 12px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.is-running .dianshan-crawl-dot,
|
||||
.is-success .dianshan-crawl-dot {
|
||||
background: #10b981;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { createApp } from 'vue';
|
||||
import { createApp } from 'vue';
|
||||
import App from './App.vue';
|
||||
import { setupPageRunner } from './pageRunner';
|
||||
|
||||
/**
|
||||
* 将内容脚本应用挂载到页面中。
|
||||
@@ -9,17 +10,15 @@ function mountApp() {
|
||||
return;
|
||||
}
|
||||
|
||||
// 内容脚本在宿主页面中的根容器。
|
||||
// 用于避免污染业务页面结构。
|
||||
const container = document.createElement('div');
|
||||
container.id = 'dianshan-crx-root';
|
||||
// Vue 应用实际挂载的节点。
|
||||
const appRoot = document.createElement('div');
|
||||
|
||||
container.appendChild(appRoot);
|
||||
document.body.appendChild(container);
|
||||
|
||||
createApp(App).mount(appRoot);
|
||||
setupPageRunner();
|
||||
}
|
||||
|
||||
if (document.readyState === 'loading') {
|
||||
|
||||
207
src/content/pageRunner.ts
Normal file
207
src/content/pageRunner.ts
Normal file
@@ -0,0 +1,207 @@
|
||||
import { processFields, type DomScrapeResult } from '@/background/domScraper';
|
||||
import type { CrawlPauseInfo, PlatformFieldConfig } from '@/types';
|
||||
|
||||
interface ScrapeStepMessage {
|
||||
action: 'SCRAPE_STEP';
|
||||
payload: {
|
||||
fields: PlatformFieldConfig[];
|
||||
checkSelector: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface CheckInterruptMessage {
|
||||
action: 'CHECK_INTERRUPT';
|
||||
}
|
||||
|
||||
type PageRunnerMessage = ScrapeStepMessage | CheckInterruptMessage;
|
||||
|
||||
interface PageRunnerResponse {
|
||||
ok: boolean;
|
||||
data?: DomScrapeResult | null;
|
||||
interrupt?: CrawlPauseInfo;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 注册页面执行器,供 background 在目标网页中触发中断检测和 DOM 抓取。
|
||||
*/
|
||||
export function setupPageRunner(): void {
|
||||
chrome.runtime.onMessage.addListener((message: PageRunnerMessage, _sender, sendResponse) => {
|
||||
void handlePageRunnerMessage(message).then(sendResponse);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理 background 发来的页面执行消息。
|
||||
*/
|
||||
async function handlePageRunnerMessage(message: PageRunnerMessage): Promise<PageRunnerResponse> {
|
||||
if (message.action === 'CHECK_INTERRUPT') {
|
||||
return { ok: true, interrupt: detectPageInterrupt() };
|
||||
}
|
||||
|
||||
if (message.action === 'SCRAPE_STEP') {
|
||||
const interrupt = detectPageInterrupt();
|
||||
|
||||
if (interrupt) {
|
||||
return { ok: false, interrupt };
|
||||
}
|
||||
|
||||
const readyElement = await waitForStableSelector(message.payload.checkSelector, 18000);
|
||||
|
||||
if (!readyElement) {
|
||||
return {
|
||||
ok: false,
|
||||
interrupt: {
|
||||
reason: 'page_not_ready',
|
||||
message: '页面关键内容暂未加载,请确认页面是否正常显示后继续',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const data = await processFields(message.payload.fields, document.body);
|
||||
return { ok: true, data };
|
||||
}
|
||||
|
||||
return { ok: false, error: '未知页面执行指令' };
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测当前页面是否需要用户手动处理登录、验证码或页面不存在。
|
||||
*/
|
||||
function detectPageInterrupt(): CrawlPauseInfo | undefined {
|
||||
if (isShieldPage()) {
|
||||
return {
|
||||
reason: 'shield',
|
||||
message: '检测到验证码或风控验证,请在打开的商家后台窗口处理完成后继续',
|
||||
};
|
||||
}
|
||||
|
||||
if (isLoginPage()) {
|
||||
return {
|
||||
reason: 'reauth',
|
||||
message: '检测到需要重新登录,请在打开的商家后台窗口登录完成后继续',
|
||||
};
|
||||
}
|
||||
|
||||
if (isNotFoundPage()) {
|
||||
return {
|
||||
reason: 'not_found',
|
||||
message: '当前页面不存在或已失效,请确认平台配置里的页面地址是否正确',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否进入验证码、流量盾或风控验证页。
|
||||
*/
|
||||
function isShieldPage(): boolean {
|
||||
const path = location.pathname.toLowerCase();
|
||||
|
||||
if (path.startsWith('/verify/captcha') || path.startsWith('/verify/traffic')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const shieldElement = document.querySelector(
|
||||
'[data-name="verification"], .ant-captcha, #captchaContainer, [class*="captcha" i], [id*="captcha" i]',
|
||||
);
|
||||
|
||||
return shieldElement ? isVisibleElement(shieldElement) : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前页面是否需要登录或二次验证密码。
|
||||
*/
|
||||
function isLoginPage(): boolean {
|
||||
const path = location.pathname.toLowerCase();
|
||||
|
||||
if (
|
||||
/^\/(?:buyer\/)?login\b/i.test(path) ||
|
||||
/^\/account\/(?:signin|login)\b/i.test(path) ||
|
||||
/^\/portal\/login\b/i.test(path)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const visiblePasswordInput = Array.from(document.querySelectorAll('input[type="password"]')).some(isVisibleElement);
|
||||
|
||||
if (visiblePasswordInput) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const bodyText = document.body.innerText.slice(0, 3000);
|
||||
const loginTextPatterns = [
|
||||
/enter\s+(your\s+)?password\s+to\s+continue/i,
|
||||
/sign\s+in\s+(again\s+)?to\s+continue/i,
|
||||
/please\s+(re-?)?enter\s+(your\s+)?password/i,
|
||||
/请(再次|重新)?输入(您的)?密码/,
|
||||
/请登录|重新登录|登录后继续/,
|
||||
];
|
||||
|
||||
return loginTextPatterns.some((pattern) => pattern.test(bodyText));
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前页面是否是不存在、下架或错误页面。
|
||||
*/
|
||||
function isNotFoundPage(): boolean {
|
||||
const text = document.body.innerText.slice(0, 8000);
|
||||
const title = document.title;
|
||||
const notFoundPatterns = [
|
||||
/page\s+not\s+found/i,
|
||||
/the\s+page\s+you\s+are\s+looking\s+for/i,
|
||||
/this\s+page\s+(has\s+been\s+)?removed/i,
|
||||
/product\s+(is\s+)?unavailable/i,
|
||||
/页面不存在|找不到(此|该)?页面|抱歉.*不存在|(商品|产品)已下架/,
|
||||
];
|
||||
|
||||
return notFoundPatterns.some((pattern) => pattern.test(title) || pattern.test(text));
|
||||
}
|
||||
|
||||
/**
|
||||
* 等待页面中出现稳定的关键元素。
|
||||
*/
|
||||
async function waitForStableSelector(selector: string, timeoutMs: number): Promise<Element | null> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const element = document.querySelector(selector);
|
||||
|
||||
if (element && isVisibleElement(element)) {
|
||||
await sleep(600);
|
||||
const stableElement = document.querySelector(selector);
|
||||
return stableElement && isVisibleElement(stableElement) ? stableElement : null;
|
||||
}
|
||||
|
||||
await sleep(500);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断元素是否真实可见。
|
||||
*/
|
||||
function isVisibleElement(element: Element): boolean {
|
||||
if (!element.isConnected) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const style = element.ownerDocument.defaultView?.getComputedStyle(element);
|
||||
|
||||
if (!style || style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity) < 0.05) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const rect = element.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单等待工具。
|
||||
*/
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
window.setTimeout(resolve, ms);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user