This commit is contained in:
zhu
2026-05-12 15:26:17 +08:00
parent cf7ea741a6
commit c7cb977243
14 changed files with 507 additions and 944 deletions

View File

@@ -1,40 +1,66 @@
import { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service'; import {broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage} from './service/externalBridge';
import { broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage } from './service/externalBridge'; import {MessageAction} from "@/shared/message";
import type { BackgroundCommand } from './types'; import {cancelCrawl, startCrawl} from "./task/crawlTask";
import { cancelStaleCrawlWhenWindowMissing } from './service/crawlTask'; import {getCrawlTaskState} from "./task/taskState";
import { getCrawlTaskState } from './service/taskState';
chrome.runtime.onInstalled.addListener(() => { chrome.runtime.onInstalled.addListener(() => {
void handleInstalled();
}); });
chrome.runtime.onStartup.addListener(() => { chrome.runtime.onStartup.addListener(() => {
void handleStartup();
}); });
chrome.runtime.onMessage.addListener((message: BackgroundCommand | { action?: string }, sender, sendResponse) => { /**
if (message && typeof message === 'object' && message.action === 'GET_CRAWL_STATE_FOR_TAB') { * 接受popup的指令
void (async () => { */
await cancelStaleCrawlWhenWindowMissing(); chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
const state = await getCrawlTaskState(); // 1. 统一提取 action 和 payload
const tabId = sender.tab?.id; const action = message.action as MessageAction;
if (state && typeof tabId === 'number' && state.tabId === tabId) { const payload = message.payload;
sendResponse({ ok: true, data: state });
return; // 2. 使用一个异步立即执行函数来处理逻辑
} (async () => {
sendResponse({ ok: true, data: null }); try {
})(); let resultData: any = null;
return true;
} // 3. 根据 action 分发任务
switch (action) {
case "START_CRAWL":
resultData = await startCrawl(payload.platformId);
break;
case "GET_CRAWL_STATE":
resultData = await getCrawlTaskState();
break;
case "CANCEL_CRAWL":
await cancelCrawl()
break;
default:
throw new Error(`未知的后台指令: ${action}`);
}
sendResponse({ok: true, data: resultData});
} catch (error: any) {
console.error(`[Background] Action ${action} failed:`, error);
sendResponse({ok: false, error: error.message || 'Unknown error'});
}
})();
void handleBackgroundMessage(message as BackgroundCommand, sendResponse);
return true; return true;
}); });
/**
* 监听窗口关闭:
* 用户手动关掉爬虫窗口时,自动触发任务清理逻辑(取消任务、停掉后台循环)。
*/
chrome.windows.onRemoved.addListener((windowId) => { chrome.windows.onRemoved.addListener((windowId) => {
void handleWindowRemoved(windowId);
}); });
/**
* 接收外部网页消息:
* 允许在 manifest.json 中授权的官网域名(如 your-app.com直接调起插件功能。
*/
chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => { chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => {
void handleExternalMessage(message).then(sendResponse).catch((error: unknown) => { void handleExternalMessage(message).then(sendResponse).catch((error: unknown) => {
sendResponse({ sendResponse({
@@ -43,27 +69,19 @@ chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) =>
}); });
}); });
return true; return true; // 保持异步响应通道开启
});
chrome.runtime.onConnectExternal.addListener(handleExternalConnect);
chrome.storage.onChanged.addListener((changes, areaName) => {
broadcastCrawlStorageChange(changes, areaName);
}); });
/** /**
* Wrap background command handling so async errors can still be returned to callers. * 处理外部长连接:
* 用于官网页面与插件后台建立持久通信,实现实时的数据流同步。
*/ */
async function handleBackgroundMessage( chrome.runtime.onConnectExternal.addListener(handleExternalConnect);
message: BackgroundCommand,
sendResponse: (response?: unknown) => void, /**
) { * 监听存储变化:
try { * 只要插件的本地数据storage发生改动就立即广播给所有 UIPopup/网页),实现进度条同步。
const result = await handleBackgroundCommand(message); */
sendResponse(result); chrome.storage.onChanged.addListener((changes, areaName) => {
} catch (error: unknown) { broadcastCrawlStorageChange(changes, areaName);
const messageText = error instanceof Error ? error.message : 'Unknown error'; });
sendResponse({ ok: false, data: null, error: messageText });
}
}

View File

@@ -1 +0,0 @@
export { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service/lifecycle';

View File

@@ -1,671 +0,0 @@
import { getPlatformById } from '@/config/platforms';
import type { CrawlPauseInfo, CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types';
import type { DomScrapeResult } from '../domScraper';
import type { CrawlStateResponse } from '../types';
import { clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState';
interface PageRunnerResponse {
ok: boolean;
data?: DomScrapeResult | null;
interrupt?: CrawlPauseInfo;
error?: string;
}
const activeCrawlControllers = new Map<string, AbortController>();
const autoCloseTimers = new Map<string, number>();
const DEFAULT_AUTOCLOSE_DELAY_MS = 10_000;
/**
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
*/
export async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
const platform = getPlatformById(platformId);
const currentState = await getCrawlTaskState();
if (currentState && ['running', 'paused'].includes(currentState.status)) {
return { ok: true, data: currentState };
}
if (!platform) {
return { ok: false, error: '平台配置不存在' };
}
const firstStep = platform.steps[0];
if (!firstStep) {
return { ok: false, error: '平台未配置爬取步骤' };
}
const startedAt = Date.now();
const nextState: CrawlTaskState = {
id: `${platform.id}-${startedAt}`,
platformId: platform.id,
platformName: platform.name,
startedAt,
status: 'running',
currentStepIndex: 0,
steps: platform.steps.map<CrawlProgressStep>((step, index) => ({
name: step.name,
uniqueKey: step.uniqueKey,
status: index === 0 ? 'running' : 'pending',
})),
};
await setCrawlTaskState(nextState);
try {
const windowInfo = await createCrawlWindow(firstStep.url);
let tabId: number | undefined;
try {
if (windowInfo.id) {
tabId = await getWindowActiveTabId(windowInfo.id);
}
} catch {
tabId = undefined;
}
const stateWithWindow = { ...nextState, windowId: windowInfo.id, tabId };
const controller = new AbortController();
await setCrawlTaskState(stateWithWindow);
activeCrawlControllers.set(stateWithWindow.id, controller);
void runCrawlSteps(platform, stateWithWindow, controller.signal).finally(() => {
activeCrawlControllers.delete(stateWithWindow.id);
});
return { ok: true, data: stateWithWindow };
} catch (error: unknown) {
const failedState: CrawlTaskState = {
...nextState,
status: 'failed',
steps: nextState.steps.map((step, index) =>
index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step,
),
};
await setCrawlTaskState(failedState);
return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' };
}
}
/**
* 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。
*/
export async function cancelCrawl(): Promise<CrawlStateResponse> {
const state = await getCrawlTaskState();
if (!state) {
return { ok: true, data: null };
}
abortActiveCrawl(state.id);
clearAutoCloseTimer(state.id);
const canceledState: CrawlTaskState = {
...state,
status: 'canceled',
autocloseAt: state.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null,
steps: state.steps.map((step, index) =>
index === state.currentStepIndex && step.status === 'running'
? { ...step, status: 'failed', message: '用户取消爬取任务' }
: step,
),
};
await setCrawlTaskState(canceledState);
if (canceledState.windowId) {
scheduleAutoCloseWindow(canceledState.id, canceledState.windowId, canceledState.autocloseAt);
}
return { ok: true, data: canceledState };
}
/**
* 用户处理完登录、验证码或风控后,恢复当前暂停中的爬取任务。
*/
export async function resumeCrawl(): Promise<CrawlStateResponse> {
const state = await getCrawlTaskState();
if (!state || state.status !== 'paused') {
return { ok: true, data: state };
}
const resumedState: CrawlTaskState = {
...state,
status: 'running',
pause: undefined,
steps: state.steps.map((step, index) =>
index === state.currentStepIndex ? { ...step, status: 'running', message: undefined } : step,
),
};
await setCrawlTaskState(resumedState);
return { ok: true, data: resumedState };
}
/**
* 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。
*/
export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise<void> {
const state = await getCrawlTaskState();
if (state?.windowId !== windowId || !['running', 'paused'].includes(state.status)) {
return;
}
abortActiveCrawl(state.id);
clearAutoCloseTimer(state.id);
await setCrawlTaskState({
...state,
status: 'canceled',
autocloseAt: null,
steps: state.steps.map((step, index) =>
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step,
),
});
}
export async function cancelStaleCrawlWhenWindowMissing(): Promise<void> {
const state = await getCrawlTaskState();
if (!state || !['running', 'paused'].includes(state.status)) {
return;
}
const isWindowAlive = state.windowId ? await hasWindow(state.windowId) : false;
if (isWindowAlive) {
return;
}
abortActiveCrawl(state.id);
clearAutoCloseTimer(state.id);
await setCrawlTaskState({
...state,
status: 'canceled',
autocloseAt: null,
steps: state.steps.map((step, index) =>
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭,任务已取消' } : step,
),
});
}
function abortActiveCrawl(taskId: string): void {
activeCrawlControllers.get(taskId)?.abort();
}
/**
* 取消终态自动关窗overlay“保持打开”
*/
export async function cancelAutoclose(): Promise<CrawlStateResponse> {
const state = await getCrawlTaskState();
if (!state) {
return { ok: true, data: null };
}
clearAutoCloseTimer(state.id);
const nextState: CrawlTaskState = {
...state,
autocloseAt: null,
};
await setCrawlTaskState(nextState);
return { ok: true, data: nextState };
}
/**
* 清理当前任务快照popup 的 Close/Dismiss。不强制关窗只影响 UI。
*/
export async function dismissCrawl(): Promise<CrawlStateResponse> {
const state = await getCrawlTaskState();
if (!state) {
return { ok: true, data: null };
}
clearAutoCloseTimer(state.id);
await clearCrawlTaskState();
return { ok: true, data: null };
}
function scheduleAutoCloseWindow(taskId: string, windowId: number, autocloseAt?: number | null): void {
if (!autocloseAt) {
return;
}
clearAutoCloseTimer(taskId);
const delayMs = Math.max(0, autocloseAt - Date.now());
const timer = setTimeout(() => {
autoCloseTimers.delete(taskId);
chrome.windows.remove(windowId).catch(() => undefined);
}, delayMs) as unknown as number;
autoCloseTimers.set(taskId, timer);
}
function clearAutoCloseTimer(taskId: string): void {
const timer = autoCloseTimers.get(taskId);
if (timer === undefined) {
return;
}
clearTimeout(timer);
autoCloseTimers.delete(taskId);
}
/**
* 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。
*/
async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState, signal: AbortSignal): Promise<void> {
if (!initialState.windowId) {
return;
}
try {
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
const step = platform.steps[stepIndex];
let shouldRetryStep = true;
while (shouldRetryStep) {
const currentState = await getCrawlTaskState();
if (signal.aborted || currentState?.id !== initialState.id || currentState.status === 'canceled') {
return;
}
if (currentState.status === 'paused') {
const resumed = await waitUntilResumed(initialState.id, signal);
if (!resumed) {
return;
}
}
await updateCrawlTaskState(initialState.id, (state) => ({
...state,
currentStepIndex: stepIndex,
status: 'running',
pause: undefined,
steps: state.steps.map((item, index) => ({
...item,
status: index === stepIndex ? 'running' : item.status,
message: index === stepIndex ? undefined : item.message,
})),
}));
const tabId = await getWindowActiveTabId(initialState.windowId);
await chrome.tabs.update(tabId, { url: step.url, active: true });
const tabLoaded = await waitForTabLoaded(tabId, signal);
if (!tabLoaded || signal.aborted) {
return;
}
const response = await scrapeStepInContent(tabId, step, signal);
if (signal.aborted) {
return;
}
if (response.interrupt) {
await pauseForInterrupt(initialState.id, stepIndex, response.interrupt);
const resumed = await waitUntilResumed(initialState.id, signal);
if (!resumed) {
return;
}
continue;
}
if (!response.ok) {
const message = response.error ?? '页面抓取失败';
await updateCrawlTaskState(initialState.id, (state) => ({
...state,
status: 'failed',
currentStepIndex: stepIndex,
steps: state.steps.map((item, index) =>
index === stepIndex ? { ...item, status: 'failed', message } : item,
),
}));
return;
}
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, response.data);
await updateCrawlTaskState(initialState.id, (state) => ({
...state,
steps: state.steps.map((item, index) =>
index === stepIndex
? { ...item, status: 'success', message: undefined, result: response.data }
: item,
),
}));
shouldRetryStep = false;
}
}
const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null;
await updateCrawlTaskState(initialState.id, (state) => ({
...state,
status: 'completed',
autocloseAt,
steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)),
}));
if (initialState.windowId) {
scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt);
}
} catch (error: unknown) {
console.error('[crawl] 执行失败', error);
const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null;
await updateCrawlTaskState(initialState.id, (state) => ({
...state,
status: 'failed',
autocloseAt,
steps: state.steps.map((step, index) =>
index === state.currentStepIndex && step.status === 'running'
? { ...step, status: 'failed', message: error instanceof Error ? error.message : '爬取执行失败' }
: step,
),
}));
if (initialState.windowId) {
scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt);
}
}
}
/**
* 获取指定窗口中的活动 tab ID。
*/
async function getWindowActiveTabId(windowId: number): Promise<number> {
const tabs = await chrome.tabs.query({ windowId, active: true });
const tab = tabs[0];
if (!tab?.id) {
throw new Error('未找到爬取窗口中的标签页');
}
return tab.id;
}
/**
* 让 content script 直接在目标页面执行检查和抓取。
*/
async function scrapeStepInContent(
tabId: number,
step: PlatformStepConfig,
signal: AbortSignal,
): Promise<PageRunnerResponse> {
const startedAt = Date.now();
while (Date.now() - startedAt < 20000) {
if (signal.aborted) {
return { ok: false, error: 'canceled' };
}
const response = await sendPageRunnerMessage(tabId, {
action: 'SCRAPE_STEP',
payload: {
fields: step.fields,
checkSelector: step.checkSelector,
},
}, signal);
if (response.ok || response.interrupt || !isPageRunnerNotReadyError(response.error)) {
return response;
}
if (!(await sleep(500, signal))) {
return { ok: false, error: 'canceled' };
}
}
return { ok: false, error: '页面脚本未响应,请刷新扩展后重试' };
}
/**
* 给目标页的 content script 发送页面执行消息。
*/
async function sendPageRunnerMessage(tabId: number, message: unknown, signal: AbortSignal): Promise<PageRunnerResponse> {
if (signal.aborted) {
return { ok: false, error: 'canceled' };
}
return raceWithAbort(sendPageRunnerMessageOnce(tabId, message), signal);
}
async function sendPageRunnerMessageOnce(tabId: number, message: unknown): Promise<PageRunnerResponse> {
try {
const response = await chrome.tabs.sendMessage(tabId, message);
if (response && typeof response === 'object') {
return response as PageRunnerResponse;
}
return { ok: false, error: '页面脚本返回为空' };
} catch (error: unknown) {
return { ok: false, error: error instanceof Error ? error.message : String(error) };
}
}
/**
* 判断错误是否只是 content script 尚未注入完成。
*/
function isPageRunnerNotReadyError(error?: string): boolean {
if (!error) {
return false;
}
return /receiving end does not exist|could not establish connection|no receiving end/i.test(error);
}
/**
* 因登录、验证码或页面异常暂停当前任务。
*/
async function pauseForInterrupt(taskId: string, stepIndex: number, interrupt: CrawlPauseInfo): Promise<void> {
await updateCrawlTaskState(taskId, (state) => ({
...state,
status: 'paused',
pause: interrupt,
currentStepIndex: stepIndex,
steps: state.steps.map((step, index) =>
index === stepIndex ? { ...step, status: 'running', message: interrupt.message } : step,
),
}));
}
/**
* 暂停后等待用户点继续或取消。
*/
async function waitUntilResumed(taskId: string, signal: AbortSignal): Promise<boolean> {
while (true) {
if (signal.aborted) {
return false;
}
const state = await getCrawlTaskState();
if (!state || state.id !== taskId || state.status === 'canceled' || state.status === 'failed') {
return false;
}
if (state.status === 'running') {
return true;
}
if (!(await sleep(1000, signal))) {
return false;
}
}
}
/**
* 打开一个普通浏览器窗口承载目标平台页面。
*/
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
return new Promise((resolve, reject) => {
chrome.windows.create(
{
url,
type: 'popup',
focused: false,
state: 'normal',
width: 1280,
height: 900,
},
(windowInfo) => {
const runtimeError = chrome.runtime.lastError;
if (runtimeError) {
reject(new Error(runtimeError.message));
return;
}
if (!windowInfo?.id) {
reject(new Error('窗口创建失败'));
return;
}
void chrome.windows.update(windowInfo.id, { drawAttention: true }).catch(() => undefined);
resolve(windowInfo);
},
);
});
}
/**
* 等待 tab 完成页面加载。
*/
function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise<boolean> {
return new Promise((resolve) => {
if (signal.aborted) {
resolve(false);
return;
}
const timeout = globalThis.setTimeout(() => {
cleanup();
resolve(true);
}, 15000);
function cleanup() {
globalThis.clearTimeout(timeout);
chrome.tabs.onUpdated.removeListener(handleUpdated);
signal.removeEventListener('abort', handleAbort);
}
function handleAbort() {
cleanup();
resolve(false);
}
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
if (updatedTabId === tabId && changeInfo.status === 'complete') {
cleanup();
resolve(true);
}
}
chrome.tabs.onUpdated.addListener(handleUpdated);
signal.addEventListener('abort', handleAbort, { once: true });
});
}
/**
* 简单等待工具。
*/
async function hasWindow(windowId: number): Promise<boolean> {
try {
await chrome.windows.get(windowId);
return true;
} catch {
return false;
}
}
function raceWithAbort<T>(promise: Promise<T>, signal: AbortSignal): Promise<T> {
return new Promise((resolve, reject) => {
if (signal.aborted) {
resolve({ ok: false, error: 'canceled' } as T);
return;
}
let isSettled = false;
function cleanup() {
signal.removeEventListener('abort', handleAbort);
}
function handleAbort() {
if (isSettled) {
return;
}
isSettled = true;
cleanup();
resolve({ ok: false, error: 'canceled' } as T);
}
signal.addEventListener('abort', handleAbort, { once: true });
promise.then(
(value) => {
if (isSettled) {
return;
}
isSettled = true;
cleanup();
resolve(value);
},
(error) => {
if (isSettled) {
return;
}
isSettled = true;
cleanup();
reject(error);
},
);
});
}
function sleep(ms: number, signal?: AbortSignal): Promise<boolean> {
return new Promise((resolve) => {
if (signal?.aborted) {
resolve(false);
return;
}
const timeout = globalThis.setTimeout(() => {
cleanup();
resolve(true);
}, ms);
function cleanup() {
globalThis.clearTimeout(timeout);
signal?.removeEventListener('abort', handleAbort);
}
function handleAbort() {
cleanup();
resolve(false);
}
signal?.addEventListener('abort', handleAbort, { once: true });
});
}

View File

@@ -1,7 +1,7 @@
import { platformConfigs } from '@/config/platforms'; import { platformConfigs } from '@/config/platforms';
import type { CrawlTaskState } from '@/types'; import type { CrawlTaskState } from '@/types';
import { cancelCrawl, startCrawl } from './crawlTask'; import {getCrawlTaskState} from "@/background/task/taskState";
import { getCrawlTaskState } from './taskState'; import {cancelCrawl, startCrawl} from "@/background/task/crawlTask";
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
const EXTERNAL_PORT_NAME = 'DIANSHAN_CRAWL'; const EXTERNAL_PORT_NAME = 'DIANSHAN_CRAWL';

View File

@@ -1,59 +0,0 @@
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types';
import {
cancelAutoclose,
cancelCrawl,
cancelCrawlWhenWindowRemoved,
cancelStaleCrawlWhenWindowMissing,
dismissCrawl,
resumeCrawl,
startCrawl,
} from './crawlTask';
import { getCrawlTaskState } from './taskState';
/**
* 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。
*/
export async function handleInstalled(): Promise<void> {
console.log('[background] installed');
}
/**
* 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。
*/
export async function handleStartup(): Promise<void> {
console.log('[background] startup');
await cancelStaleCrawlWhenWindowMissing();
}
/**
* 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。
*/
export async function handleWindowRemoved(windowId: number): Promise<void> {
console.log('[background] window removed', windowId);
await cancelCrawlWhenWindowRemoved(windowId);
}
/**
* 根据 popup/content 发来的 action 分发到对应的后台处理函数。
*/
export async function handleBackgroundCommand(
message: BackgroundCommand,
): Promise<BackgroundResponse | CrawlStateResponse> {
switch (message.action) {
case 'START_CRAWL':
return startCrawl(message.payload.platformId);
case 'GET_CRAWL_STATE':
await cancelStaleCrawlWhenWindowMissing();
return { ok: true, data: await getCrawlTaskState() };
case 'CANCEL_CRAWL':
return cancelCrawl();
case 'RESUME_CRAWL':
return resumeCrawl();
case 'CANCEL_AUTOCLOSE':
return cancelAutoclose();
case 'DISMISS_CRAWL':
return dismissCrawl();
default:
return { ok: false, error: '未知的后台指令' };
}
}

View File

@@ -1,47 +0,0 @@
import type { CrawlTaskState } from '@/types';
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
export async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
const state = result[CRAWL_TASK_STORAGE_KEY];
return isCrawlTaskState(state) ? state : null;
}
export async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
broadcastToCrawlTab(state);
}
export async function clearCrawlTaskState(): Promise<void> {
await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY);
}
export async function updateCrawlTaskState(
taskId: string,
updater: (state: CrawlTaskState) => CrawlTaskState,
): Promise<void> {
const state = await getCrawlTaskState();
if (!state || state.id !== taskId || state.status === 'canceled') {
return;
}
await setCrawlTaskState(updater(state));
}
function broadcastToCrawlTab(state: CrawlTaskState): void {
if (!state.tabId) {
return;
}
try {
void chrome.tabs.sendMessage(state.tabId, { type: 'crawl_state_update', state }).catch(() => undefined);
} catch {
// ignore
}
}
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
}

View File

@@ -0,0 +1,147 @@
import {getPlatformById} from "@/config/platforms";
import {CrawlTaskState, PlatformStepConfig} from "@/types";
import {openSingleTabWindow, scrapeStepInContent, sleep, waitForTabLoaded} from "@/background/task/helper";
import {clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState} from "./taskState";
const activeCrawlControllers = new Map<string, AbortController>();
/**
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
* @param platformId 平台id
*/
export async function startCrawl(platformId: string): Promise<any> {
const platform = getPlatformById(platformId);
if (!platform) {
return {error: '平台配置不存在'};
}
//打开窗口
let windowInfo = await openSingleTabWindow(platform.steps[0].url)
//初始化数据
const startedAt = Date.now();
const nextState: CrawlTaskState = {
id: `${platform.id}-${startedAt}`,
windowId: windowInfo.windowId,
tabId: windowInfo.tabId,
platformId: platform.id,
platformName: platform.name,
startedAt,
status: 'running',
currentStepIndex: 0,
steps: platform.steps.map((item, index) => {
return {
name: item.name,
uniqueKey: item.uniqueKey,
status: index === 0 ? 'running' : 'pending',
}
})
};
await setCrawlTaskState(nextState);
//写入任务,用于取消
const controller = new AbortController();
activeCrawlControllers.set(nextState.id, controller);
//启动
void runCrawlSteps(nextState.id, nextState.tabId!, platform.steps, controller.signal).finally(() => {
activeCrawlControllers.delete(nextState.id);
});
//自动开始爬取
return nextState
}
/**
* 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。
* @param steps 平台步骤配置
* @param signal 中断信号
*/
/**
* 执行器
*/
async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepConfig[], signal: AbortSignal) {
for (let i = 0; i < steps.length; i += 1) {
const step = steps[i];
let shouldRetryStep = true;
// 【修改 2】进入新步骤立刻更新状态机里的索引和步骤状态
await updateCrawlTaskState(taskId, s => ({
...s,
currentStepIndex: i,
steps: s.steps.map((stepItem, idx) => ({
...stepItem,
status: idx === i ? 'running' : stepItem.status
}))
}));
while (shouldRetryStep) {
if (signal.aborted) return;
// 1. 等待网页加载
await chrome.tabs.update(tabId, {url: step.url, active: true});
const loaded = await waitForTabLoaded(tabId, signal);
if (!loaded) return;
// 2. 检测撞盾/抓取
const res: any = await scrapeStepInContent(tabId, step, signal);
if (signal.aborted) return;
// 3. 处理中断(验证码等)
if (res.interrupt) {
await updateCrawlTaskState(taskId, s => ({...s, status: 'paused', pause: res.interrupt}));
// 死等恢复
while ((await getCrawlTaskState())?.status === 'paused') {
if (signal.aborted) return;
if (!(await sleep(1000, signal))) return;
}
continue; // 恢复后重新触发 while 循环(重刷页面)
}
// 4. 处理结果
if (res.ok) {
await updateCrawlTaskState(taskId, s => ({
...s,
steps: s.steps.map((item, idx) =>
idx === i ? {...item, status: 'success', result: res.data} : item
)
}));
shouldRetryStep = false; // 退出 while准备进下一个 for 循环步骤
} else {
// 抓取失败重试
if (!(await sleep(2000, signal))) return;
}
}
}
// 【修改 3】全部步骤完成标记任务结束
await updateCrawlTaskState(taskId, s => ({...s, status: 'completed'}));
}
/**
* 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。
*/
export async function cancelCrawl() {
const state = await getCrawlTaskState();
if (!state) return
// 立即触发 Abort 信号,让脚本自动停止
const controller = activeCrawlControllers.get(state.id);
if (controller) {
controller.abort();
activeCrawlControllers.delete(state.id);
}
//清楚缓存
await clearCrawlTaskState();
//关闭窗口
if (state.windowId) {
chrome.windows.remove(state.windowId).catch(() => {
});
}
}

View File

@@ -0,0 +1,171 @@
import {CrawlPauseInfo, PlatformStepConfig} from "@/types";
import {DomScrapeResult} from "@/background/domScraper";
/**
* 打开一个纯净的单标签窗口,并提醒用户注意
* @param url 目标网址
*/
export async function openSingleTabWindow(url: string) {
return new Promise<{ windowId: number; tabId: number }>((resolve, reject) => {
chrome.windows.create({
url,
type: 'popup',
width: 1260,
height: 900,
focused: true // 初始设为聚焦,方便窗口弹出
}, (win) => {
// 1. 检查创建是否报错
if (chrome.runtime.lastError) {
return reject(new Error(chrome.runtime.lastError.message));
}
if (win?.id && win.tabs?.[0]?.id) {
// 2. 让窗口在任务栏“闪烁”,提醒用户(比如处理登录或验证码)
// 使用 void 表示不等待结果catch 防止窗口意外关闭导致崩溃
void chrome.windows.update(win.id, { drawAttention: true }).catch(() => {});
// 3. 返回双 ID 供后续爬取逻辑使用
resolve({
windowId: win.id,
tabId: win.tabs[0].id
});
} else {
reject(new Error('窗口初始化失败'));
}
});
});
}
/**
* 等待指定的标签页加载完成
* @param tabId 标签页ID
* @param signal 中断信号
*/
export function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise<boolean> {
return new Promise((resolve) => {
if (signal.aborted) {
resolve(false);
return;
}
const timeout = globalThis.setTimeout(() => {
cleanup();
resolve(true);
}, 15000);
function cleanup() {
globalThis.clearTimeout(timeout);
chrome.tabs.onUpdated.removeListener(handleUpdated);
signal.removeEventListener('abort', handleAbort);
}
function handleAbort() {
cleanup();
resolve(false);
}
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
if (updatedTabId === tabId && changeInfo.status === 'complete') {
cleanup();
resolve(true);
}
}
chrome.tabs.onUpdated.addListener(handleUpdated);
signal.addEventListener('abort', handleAbort, {once: true});
});
}
/**
* 让 content script 在目标页面执行抓取或探测
*/
interface PageRunnerResponse {
ok: boolean;
data?: DomScrapeResult | null;
interrupt?: CrawlPauseInfo;
error?: string;
}
export async function scrapeStepInContent(tabId: number, step: PlatformStepConfig, signal: AbortSignal): Promise<PageRunnerResponse> {
const startTime = Date.now();
const TIMEOUT = 20000; // 最多等 20 秒
while (Date.now() - startTime < TIMEOUT) {
if (signal.aborted) return {ok: false, error: 'canceled'};
try {
// 给 Content Script 发消息
const res: any = await chrome.tabs.sendMessage(tabId, {
action: 'SCRAPE_STEP',
payload: {
fields: step.fields, // 要抓哪些字段
checkSelector: step.checkSelector // 用来检测是否“撞盾”的特征选择器
}
});
// 情况 1撞盾了比如检测到了登录框、验证码
// Content Script 发现特征后会返回 interrupt 对象
if (res.interrupt) {
return res;
}
// 情况 2抓取成功
if (res.ok) {
return res;
}
// 情况 3如果 res.ok 是 false 且没有 interrupt说明页面还没渲染出来
// 继续循环重试
} catch (err: any) {
// 特殊处理:如果报错是“接收端不存在”,说明 Content Script 还没加载完
// 这属于正常情况,忽略它,等下一轮循环重试
if (!err.message.includes('receiving end does not exist')) {
console.warn('通信异常:', err.message);
}
}
// 等 500ms 再问下一次
const canContinue = await sleep(500, signal);
if (!canContinue) break;
}
return {ok: false, error: '页面响应超时,可能需要刷新'};
}
/**
* 延迟指定毫秒数,并支持随时中断
* @param ms 延迟毫秒数
* @param signal 中断信号
* @returns {Promise<boolean>} 返回 true 表示等完了,返回 false 表示被中断了
*/
export function sleep(ms: number, signal?: AbortSignal): Promise<boolean> {
return new Promise((resolve) => {
// 1. 如果信号已经中断了,直接返回 false
if (signal?.aborted) {
return resolve(false);
}
// 2. 正常设置定时器
const timer = setTimeout(() => {
cleanup();
resolve(true);
}, ms);
// 3. 定义清理逻辑
const cleanup = () => {
clearTimeout(timer);
signal?.removeEventListener('abort', onAbort);
};
// 4. 监听中断事件
const onAbort = () => {
cleanup();
resolve(false); // 一旦中断,立刻返回 false
};
// 5. 注册监听(只监听一次)
signal?.addEventListener('abort', onAbort, {once: true});
});
}

View File

@@ -0,0 +1,54 @@
import type {CrawlTaskState} from '@/types';
import {sendTabMessage} from "@/shared/tab";
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
/**
* 从本地存储中获取当前的爬取任务状态
* @returns {Promise<CrawlTaskState | null>} 返回任务状态对象,如果不存在或数据非法则返回 null
*/
export async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
const state = result[CRAWL_TASK_STORAGE_KEY];
return (state as CrawlTaskState) || null;
}
/**
* 设置并保存爬取任务状态,并同步广播给对应的标签页
* @param {CrawlTaskState} state - 需要保存的新状态对象
*/
export async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
// 持久化到本地存储
await chrome.storage.local.set({[CRAWL_TASK_STORAGE_KEY]: state});
// 将更新后的状态发送给正在执行任务的标签页内容脚本
if (!state.tabId) return
sendTabMessage(state.tabId, 'CRAWL_STATE_UPDATE', state)
}
/**
* 从本地存储中清除当前的爬取任务状态(通常用于任务结束或彻底重置)
*/
export async function clearCrawlTaskState(): Promise<void> {
await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY);
}
/**
* 局部更新爬取任务状态
* 只有当任务 ID 匹配且任务未被取消时,才会执行更新逻辑
* @param {string} taskId - 任务的唯一标识符
* @param {(state: CrawlTaskState) => CrawlTaskState} updater - 接收旧状态并返回新状态的回调函数
*/
export async function updateCrawlTaskState(
taskId: string,
updater: (state: CrawlTaskState) => CrawlTaskState,
): Promise<void> {
const state = await getCrawlTaskState();
// 检查任务是否存在、ID 是否一致、以及任务是否已被标记为取消
if (!state || state.id !== taskId || state.status === 'canceled') {
return;
}
// 执行更新并保存
await setCrawlTaskState(updater(state));
}

View File

@@ -1,54 +1,5 @@
import type { CrawlTaskState } from '@/types'; import type { CrawlTaskState } from '@/types';
// 启动爬取任务的后台消息。
export interface StartCrawlCommand {
// 消息动作类型:请求 background 创建爬取窗口并初始化任务状态。
action: 'START_CRAWL';
// 启动爬取所需参数。
payload: {
// 当前要爬取的平台 ID对应 config/platforms.ts 中的平台配置。
platformId: string;
};
}
// 获取当前爬取任务状态的后台消息。
export interface GetCrawlStateCommand {
// 消息动作类型:请求 background 返回当前任务快照。
action: 'GET_CRAWL_STATE';
}
// 取消当前爬取任务的后台消息。
export interface CancelCrawlCommand {
// 消息动作类型:请求 background 标记任务取消并关闭爬取窗口。
action: 'CANCEL_CRAWL';
}
// 继续当前暂停中的爬取任务。
export interface ResumeCrawlCommand {
// 消息动作类型:用户已处理登录/验证码,允许 background 继续重试当前步骤。
action: 'RESUME_CRAWL';
}
// 取消终态自动关窗(保持窗口打开)的后台消息。
export interface CancelAutocloseCommand {
// 消息动作类型:用户在 overlay 中点“保持打开”,阻止 background 自动关闭爬取窗口。
action: 'CANCEL_AUTOCLOSE';
}
// 清理当前爬取任务快照(用于 popup 的 Dismiss/Close
export interface DismissCrawlCommand {
// 消息动作类型:清空 crawlTaskState让 popup 回到 idle。
action: 'DISMISS_CRAWL';
}
// popup/content script 能发送给 background 的全部消息类型。
export type BackgroundCommand =
| StartCrawlCommand
| GetCrawlStateCommand
| CancelCrawlCommand
| ResumeCrawlCommand
| CancelAutocloseCommand
| DismissCrawlCommand;
// background 统一响应结构。 // background 统一响应结构。
export interface BackgroundResponse<T = unknown> { export interface BackgroundResponse<T = unknown> {

View File

@@ -1,5 +1,4 @@
<script setup lang="ts"> <script setup lang="ts">
import { onBeforeUnmount} from 'vue';
import {platformConfigs} from '@/config/platforms'; import {platformConfigs} from '@/config/platforms';
import {formatSeconds} from '@/shared/time_format'; import {formatSeconds} from '@/shared/time_format';
import {useLogin} from './hook/use-login'; import {useLogin} from './hook/use-login';
@@ -17,6 +16,8 @@ const {
handleCancelCrawl, handleCancelCrawl,
handleResumeCrawl, handleResumeCrawl,
} = useScan(); } = useScan();
console.log(crawlState.value)
/** 从扩展 manifest 读取版本号(兜底 `0.0.0`)。 */ /** 从扩展 manifest 读取版本号(兜底 `0.0.0`)。 */
const manifestVersion = (() => { const manifestVersion = (() => {
@@ -38,39 +39,16 @@ async function focusCrawlWindow(): Promise<void> {
} }
} }
let cancelConfirmTimer: number | null = null;
/** “Cancel” 二次确认:第一次点击变成 `Cancel?`,再次点击才真正取消。 */ /**
* 取消
*/
function requestCancel(): void { function requestCancel(): void {
const btn = document.getElementById('popup-cancel-btn') as HTMLButtonElement | null; crawlState.value = null
if (!btn) { handleCancelCrawl()
void handleCancelCrawl();
return;
}
if (btn.dataset.confirming === '1') {
btn.dataset.confirming = '0';
btn.textContent = 'Cancel';
if (cancelConfirmTimer) window.clearTimeout(cancelConfirmTimer);
cancelConfirmTimer = null;
void handleCancelCrawl();
return;
}
btn.dataset.confirming = '1';
btn.textContent = 'Cancel?';
cancelConfirmTimer = window.setTimeout(() => {
btn.dataset.confirming = '0';
btn.textContent = 'Cancel';
cancelConfirmTimer = null;
}, 3000);
} }
onBeforeUnmount(() => {
/** 组件销毁前清理定时器,避免异步回调触发在已卸载的视图上。 */
if (cancelConfirmTimer) window.clearTimeout(cancelConfirmTimer);
cancelConfirmTimer = null;
});
</script> </script>
<template> <template>
@@ -93,6 +71,7 @@ onBeforeUnmount(() => {
<template v-else> <template v-else>
<!-- 未开始--> <!-- 未开始-->
<template v-if="crawlState == null"> <template v-if="crawlState == null">
<label class="platform-select"> <label class="platform-select">
<span class="account">平台选择</span> <span class="account">平台选择</span>
<select v-model="selectedPlatformId" <select v-model="selectedPlatformId"

View File

@@ -5,8 +5,6 @@ import {sendBackgroundMessage} from '@/shared/message';
/** 用于同步爬取任务状态的 `chrome.storage.local` key。 */ /** 用于同步爬取任务状态的 `chrome.storage.local` key。 */
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
/** 会持续刷新计时器的任务状态集合。 */
const ACTIVE_STATUSES = new Set(['running', 'paused']);
/** /**
* Popup 内的爬取状态与操作集合。 * Popup 内的爬取状态与操作集合。
@@ -29,7 +27,9 @@ export const useScan = () => {
let timer: number | undefined; let timer: number | undefined;
/** 启动新的爬取任务(使用当前选择的平台)。 */ /**
* 动新的爬取任务
*/
const handleScan = async () => { const handleScan = async () => {
if (isScanning.value) { if (isScanning.value) {
return; return;
@@ -38,8 +38,6 @@ export const useScan = () => {
isScanning.value = true; isScanning.value = true;
try { try {
ensureElapsedTimer();
const response = await sendBackgroundMessage<CrawlTaskState>({ const response = await sendBackgroundMessage<CrawlTaskState>({
action: 'START_CRAWL', action: 'START_CRAWL',
payload: {platformId: selectedPlatformId.value}, payload: {platformId: selectedPlatformId.value},
@@ -94,49 +92,26 @@ export const useScan = () => {
await refreshCrawlState(); await refreshCrawlState();
}; };
/** 应用任务状态:刷新 elapsed并根据状态管理计时器的开启/关闭。 */ /**
* 设置状态值,并设置时间
*/
function syncCrawlState(state: CrawlTaskState | null) { function syncCrawlState(state: CrawlTaskState | null) {
crawlState.value = state; crawlState.value = state;
updateSeconds(); startElapsedTimer()
if (state && ACTIVE_STATUSES.has(state.status)) {
ensureElapsedTimer();
return;
}
clearElapsedTimer();
} }
/** 确保 1 秒一次的计时器正在运行。 */ /**
function ensureElapsedTimer() { * 启动定时器
if (timer !== undefined) { */
function startElapsedTimer() {
if (crawlState.value === null || timer) {
return; return;
} }
timer = window.setInterval(() => { timer = window.setInterval(() => {
updateSeconds(); elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value!.startedAt) / 1000));
}, 1000); }, 1000);
} }
/** 停止计时器(如果存在)。 */
function clearElapsedTimer() {
if (timer === undefined) {
return;
}
window.clearInterval(timer);
timer = undefined;
}
/** 根据任务 `startedAt` 更新时间(秒)。 */
function updateSeconds() {
if (!crawlState.value) {
elapsedSeconds.value = 0;
return;
}
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000));
}
/** 从 background 拉取最新任务状态。 */ /** 从 background 拉取最新任务状态。 */
async function refreshCrawlState() { async function refreshCrawlState() {
@@ -173,7 +148,7 @@ export const useScan = () => {
onUnmounted(() => { onUnmounted(() => {
/** 清理计时器 + 取消订阅 storage 事件。 */ /** 清理计时器 + 取消订阅 storage 事件。 */
clearElapsedTimer(); clearInterval(timer);
if (typeof chrome !== 'undefined' && chrome.storage?.onChanged) { if (typeof chrome !== 'undefined' && chrome.storage?.onChanged) {
chrome.storage.onChanged.removeListener(handleStorageChanged); chrome.storage.onChanged.removeListener(handleStorageChanged);

View File

@@ -1,9 +1,18 @@
export type MessageAction = export type MessageAction =
/** 获取当前爬取任务的状态*/
| 'GET_CRAWL_STATE' | 'GET_CRAWL_STATE'
/** 启动一个新的爬取任务 */
| 'START_CRAWL' | 'START_CRAWL'
/** 彻底取消并停止当前的爬取任务 */
| 'CANCEL_CRAWL' | 'CANCEL_CRAWL'
/** 恢复之前被暂停或因中断而停止的爬取任务 */
| 'RESUME_CRAWL' | 'RESUME_CRAWL'
| 'CANCEL_AUTOCLOSE'
/** 忽略/关闭当前爬取任务的 UI 提示或通知(通常指任务结束后清理界面) */
| 'DISMISS_CRAWL'; | 'DISMISS_CRAWL';
interface BackgroundMessage<T = unknown> { interface BackgroundMessage<T = unknown> {
@@ -18,12 +27,15 @@ interface BackgroundResponse<T = unknown> {
} }
/** /**
* Send a command to the background service worker. * 发送消息给服务
*/ */
export function sendBackgroundMessage<T>(data: BackgroundMessage): Promise<BackgroundResponse<T>> { export function sendBackgroundMessage<T>(data: BackgroundMessage): Promise<BackgroundResponse<T>> {
if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) { if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) {
return Promise.resolve({ ok: true, data: null }); return Promise.resolve({ok: true, data: null});
} }
return chrome.runtime.sendMessage(data); return chrome.runtime.sendMessage(data);
} }
//接受

34
src/shared/tab.ts Normal file
View File

@@ -0,0 +1,34 @@
/**
* 后台发给网页Tab的消息行为
*/
export type TabAction =
/** 任务状态更新(进度、状态改变等) */
| 'CRAWL_STATE_UPDATE'
/** 任务发生错误 */
| 'CRAWL_ERROR'
/** 任务完成 */
| 'CRAWL_COMPLETED';
/**
* 后台发给网页的消息格式
*/
interface TabMessage<T = unknown> {
action: TabAction;
payload?: T;
}
/**
* 发送消息给特定的标签页(由后台调用)
*/
export function sendTabMessage<T>(tabId: number, action: TabAction, payload?: T): void {
if (typeof chrome === 'undefined' || !chrome.tabs?.sendMessage) {
return;
}
const message: TabMessage<T> = {action, payload};
chrome.tabs.sendMessage(tabId, message).catch((err) => {
// 这里的错误通常是因为 Tab 被关闭了或者页面刷新了,属于正常现象
console.warn(`[Message] Failed to send ${action} to tab ${tabId}:`, err);
});
}