diff --git a/src/background/index.ts b/src/background/index.ts index abbb7c0..8134da6 100644 --- a/src/background/index.ts +++ b/src/background/index.ts @@ -1,40 +1,66 @@ -import { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service'; -import { broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage } from './service/externalBridge'; -import type { BackgroundCommand } from './types'; -import { cancelStaleCrawlWhenWindowMissing } from './service/crawlTask'; -import { getCrawlTaskState } from './service/taskState'; +import {broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage} from './service/externalBridge'; +import {MessageAction} from "@/shared/message"; +import {cancelCrawl, startCrawl} from "./task/crawlTask"; +import {getCrawlTaskState} from "./task/taskState"; chrome.runtime.onInstalled.addListener(() => { - void handleInstalled(); }); chrome.runtime.onStartup.addListener(() => { - void handleStartup(); }); -chrome.runtime.onMessage.addListener((message: BackgroundCommand | { action?: string }, sender, sendResponse) => { - if (message && typeof message === 'object' && message.action === 'GET_CRAWL_STATE_FOR_TAB') { - void (async () => { - await cancelStaleCrawlWhenWindowMissing(); - const state = await getCrawlTaskState(); - const tabId = sender.tab?.id; - if (state && typeof tabId === 'number' && state.tabId === tabId) { - sendResponse({ ok: true, data: state }); - return; - } - sendResponse({ ok: true, data: null }); - })(); - return true; - } +/** + * 接受popup的指令 + */ +chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => { + // 1. 统一提取 action 和 payload + const action = message.action as MessageAction; + const payload = message.payload; + + // 2. 使用一个异步立即执行函数来处理逻辑 + (async () => { + try { + let resultData: any = null; + + // 3. 根据 action 分发任务 + switch (action) { + case "START_CRAWL": + resultData = await startCrawl(payload.platformId); + break; + + case "GET_CRAWL_STATE": + resultData = await getCrawlTaskState(); + break; + + case "CANCEL_CRAWL": + await cancelCrawl() + break; + default: + throw new Error(`未知的后台指令: ${action}`); + } + + sendResponse({ok: true, data: resultData}); + + } catch (error: any) { + console.error(`[Background] Action ${action} failed:`, error); + sendResponse({ok: false, error: error.message || 'Unknown error'}); + } + })(); - void handleBackgroundMessage(message as BackgroundCommand, sendResponse); return true; }); +/** + * 监听窗口关闭: + * 用户手动关掉爬虫窗口时,自动触发任务清理逻辑(取消任务、停掉后台循环)。 + */ chrome.windows.onRemoved.addListener((windowId) => { - void handleWindowRemoved(windowId); }); +/** + * 接收外部网页消息: + * 允许在 manifest.json 中授权的官网域名(如 your-app.com)直接调起插件功能。 + */ chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => { void handleExternalMessage(message).then(sendResponse).catch((error: unknown) => { sendResponse({ @@ -43,27 +69,19 @@ chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => }); }); - return true; -}); - -chrome.runtime.onConnectExternal.addListener(handleExternalConnect); - -chrome.storage.onChanged.addListener((changes, areaName) => { - broadcastCrawlStorageChange(changes, areaName); + return true; // 保持异步响应通道开启 }); /** - * Wrap background command handling so async errors can still be returned to callers. + * 处理外部长连接: + * 用于官网页面与插件后台建立持久通信,实现实时的数据流同步。 */ -async function handleBackgroundMessage( - message: BackgroundCommand, - sendResponse: (response?: unknown) => void, -) { - try { - const result = await handleBackgroundCommand(message); - sendResponse(result); - } catch (error: unknown) { - const messageText = error instanceof Error ? error.message : 'Unknown error'; - sendResponse({ ok: false, data: null, error: messageText }); - } -} +chrome.runtime.onConnectExternal.addListener(handleExternalConnect); + +/** + * 监听存储变化: + * 只要插件的本地数据(storage)发生改动,就立即广播给所有 UI(Popup/网页),实现进度条同步。 + */ +chrome.storage.onChanged.addListener((changes, areaName) => { + broadcastCrawlStorageChange(changes, areaName); +}); \ No newline at end of file diff --git a/src/background/service.ts b/src/background/service.ts deleted file mode 100644 index 17330cd..0000000 --- a/src/background/service.ts +++ /dev/null @@ -1 +0,0 @@ -export { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service/lifecycle'; diff --git a/src/background/service/crawlTask.ts b/src/background/service/crawlTask.ts deleted file mode 100644 index c53330b..0000000 --- a/src/background/service/crawlTask.ts +++ /dev/null @@ -1,671 +0,0 @@ -import { getPlatformById } from '@/config/platforms'; -import type { CrawlPauseInfo, CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types'; -import type { DomScrapeResult } from '../domScraper'; -import type { CrawlStateResponse } from '../types'; -import { clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState'; - -interface PageRunnerResponse { - ok: boolean; - data?: DomScrapeResult | null; - interrupt?: CrawlPauseInfo; - error?: string; -} - -const activeCrawlControllers = new Map(); -const autoCloseTimers = new Map(); -const DEFAULT_AUTOCLOSE_DELAY_MS = 10_000; - -/** - * 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 - */ -export async function startCrawl(platformId: string): Promise { - const platform = getPlatformById(platformId); - const currentState = await getCrawlTaskState(); - - if (currentState && ['running', 'paused'].includes(currentState.status)) { - return { ok: true, data: currentState }; - } - - if (!platform) { - return { ok: false, error: '平台配置不存在' }; - } - - const firstStep = platform.steps[0]; - - if (!firstStep) { - return { ok: false, error: '平台未配置爬取步骤' }; - } - - const startedAt = Date.now(); - const nextState: CrawlTaskState = { - id: `${platform.id}-${startedAt}`, - platformId: platform.id, - platformName: platform.name, - startedAt, - status: 'running', - currentStepIndex: 0, - steps: platform.steps.map((step, index) => ({ - name: step.name, - uniqueKey: step.uniqueKey, - status: index === 0 ? 'running' : 'pending', - })), - }; - - await setCrawlTaskState(nextState); - - try { - const windowInfo = await createCrawlWindow(firstStep.url); - let tabId: number | undefined; - try { - if (windowInfo.id) { - tabId = await getWindowActiveTabId(windowInfo.id); - } - } catch { - tabId = undefined; - } - - const stateWithWindow = { ...nextState, windowId: windowInfo.id, tabId }; - const controller = new AbortController(); - - await setCrawlTaskState(stateWithWindow); - activeCrawlControllers.set(stateWithWindow.id, controller); - void runCrawlSteps(platform, stateWithWindow, controller.signal).finally(() => { - activeCrawlControllers.delete(stateWithWindow.id); - }); - - return { ok: true, data: stateWithWindow }; - } catch (error: unknown) { - const failedState: CrawlTaskState = { - ...nextState, - status: 'failed', - steps: nextState.steps.map((step, index) => - index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step, - ), - }; - - await setCrawlTaskState(failedState); - return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' }; - } -} - -/** - * 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 - */ -export async function cancelCrawl(): Promise { - const state = await getCrawlTaskState(); - - if (!state) { - return { ok: true, data: null }; - } - - abortActiveCrawl(state.id); - clearAutoCloseTimer(state.id); - - const canceledState: CrawlTaskState = { - ...state, - status: 'canceled', - autocloseAt: state.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null, - steps: state.steps.map((step, index) => - index === state.currentStepIndex && step.status === 'running' - ? { ...step, status: 'failed', message: '用户取消爬取任务' } - : step, - ), - }; - - await setCrawlTaskState(canceledState); - - if (canceledState.windowId) { - scheduleAutoCloseWindow(canceledState.id, canceledState.windowId, canceledState.autocloseAt); - } - - return { ok: true, data: canceledState }; -} - -/** - * 用户处理完登录、验证码或风控后,恢复当前暂停中的爬取任务。 - */ -export async function resumeCrawl(): Promise { - const state = await getCrawlTaskState(); - - if (!state || state.status !== 'paused') { - return { ok: true, data: state }; - } - - const resumedState: CrawlTaskState = { - ...state, - status: 'running', - pause: undefined, - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'running', message: undefined } : step, - ), - }; - - await setCrawlTaskState(resumedState); - return { ok: true, data: resumedState }; -} - -/** - * 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。 - */ -export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise { - const state = await getCrawlTaskState(); - - if (state?.windowId !== windowId || !['running', 'paused'].includes(state.status)) { - return; - } - - abortActiveCrawl(state.id); - clearAutoCloseTimer(state.id); - - await setCrawlTaskState({ - ...state, - status: 'canceled', - autocloseAt: null, - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step, - ), - }); -} - -export async function cancelStaleCrawlWhenWindowMissing(): Promise { - const state = await getCrawlTaskState(); - - if (!state || !['running', 'paused'].includes(state.status)) { - return; - } - - const isWindowAlive = state.windowId ? await hasWindow(state.windowId) : false; - - if (isWindowAlive) { - return; - } - - abortActiveCrawl(state.id); - clearAutoCloseTimer(state.id); - - await setCrawlTaskState({ - ...state, - status: 'canceled', - autocloseAt: null, - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭,任务已取消' } : step, - ), - }); -} - -function abortActiveCrawl(taskId: string): void { - activeCrawlControllers.get(taskId)?.abort(); -} - -/** - * 取消终态自动关窗(overlay“保持打开”)。 - */ -export async function cancelAutoclose(): Promise { - const state = await getCrawlTaskState(); - - if (!state) { - return { ok: true, data: null }; - } - - clearAutoCloseTimer(state.id); - - const nextState: CrawlTaskState = { - ...state, - autocloseAt: null, - }; - - await setCrawlTaskState(nextState); - return { ok: true, data: nextState }; -} - -/** - * 清理当前任务快照(popup 的 Close/Dismiss)。不强制关窗,只影响 UI。 - */ -export async function dismissCrawl(): Promise { - const state = await getCrawlTaskState(); - - if (!state) { - return { ok: true, data: null }; - } - - clearAutoCloseTimer(state.id); - await clearCrawlTaskState(); - return { ok: true, data: null }; -} - -function scheduleAutoCloseWindow(taskId: string, windowId: number, autocloseAt?: number | null): void { - if (!autocloseAt) { - return; - } - - clearAutoCloseTimer(taskId); - - const delayMs = Math.max(0, autocloseAt - Date.now()); - const timer = setTimeout(() => { - autoCloseTimers.delete(taskId); - chrome.windows.remove(windowId).catch(() => undefined); - }, delayMs) as unknown as number; - - autoCloseTimers.set(taskId, timer); -} - -function clearAutoCloseTimer(taskId: string): void { - const timer = autoCloseTimers.get(taskId); - if (timer === undefined) { - return; - } - - clearTimeout(timer); - autoCloseTimers.delete(taskId); -} - -/** - * 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 - */ -async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState, signal: AbortSignal): Promise { - if (!initialState.windowId) { - return; - } - - try { - for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) { - const step = platform.steps[stepIndex]; - let shouldRetryStep = true; - - while (shouldRetryStep) { - const currentState = await getCrawlTaskState(); - - if (signal.aborted || currentState?.id !== initialState.id || currentState.status === 'canceled') { - return; - } - - if (currentState.status === 'paused') { - const resumed = await waitUntilResumed(initialState.id, signal); - - if (!resumed) { - return; - } - } - - await updateCrawlTaskState(initialState.id, (state) => ({ - ...state, - currentStepIndex: stepIndex, - status: 'running', - pause: undefined, - steps: state.steps.map((item, index) => ({ - ...item, - status: index === stepIndex ? 'running' : item.status, - message: index === stepIndex ? undefined : item.message, - })), - })); - - const tabId = await getWindowActiveTabId(initialState.windowId); - await chrome.tabs.update(tabId, { url: step.url, active: true }); - const tabLoaded = await waitForTabLoaded(tabId, signal); - - if (!tabLoaded || signal.aborted) { - return; - } - - const response = await scrapeStepInContent(tabId, step, signal); - - if (signal.aborted) { - return; - } - - if (response.interrupt) { - await pauseForInterrupt(initialState.id, stepIndex, response.interrupt); - const resumed = await waitUntilResumed(initialState.id, signal); - - if (!resumed) { - return; - } - - continue; - } - - if (!response.ok) { - const message = response.error ?? '页面抓取失败'; - - await updateCrawlTaskState(initialState.id, (state) => ({ - ...state, - status: 'failed', - currentStepIndex: stepIndex, - steps: state.steps.map((item, index) => - index === stepIndex ? { ...item, status: 'failed', message } : item, - ), - })); - return; - } - - console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, response.data); - - await updateCrawlTaskState(initialState.id, (state) => ({ - ...state, - steps: state.steps.map((item, index) => - index === stepIndex - ? { ...item, status: 'success', message: undefined, result: response.data } - : item, - ), - })); - - shouldRetryStep = false; - } - } - - const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null; - - await updateCrawlTaskState(initialState.id, (state) => ({ - ...state, - status: 'completed', - autocloseAt, - steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)), - })); - - if (initialState.windowId) { - scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt); - } - } catch (error: unknown) { - console.error('[crawl] 执行失败', error); - - const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null; - - await updateCrawlTaskState(initialState.id, (state) => ({ - ...state, - status: 'failed', - autocloseAt, - steps: state.steps.map((step, index) => - index === state.currentStepIndex && step.status === 'running' - ? { ...step, status: 'failed', message: error instanceof Error ? error.message : '爬取执行失败' } - : step, - ), - })); - - if (initialState.windowId) { - scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt); - } - } -} - -/** - * 获取指定窗口中的活动 tab ID。 - */ -async function getWindowActiveTabId(windowId: number): Promise { - const tabs = await chrome.tabs.query({ windowId, active: true }); - const tab = tabs[0]; - - if (!tab?.id) { - throw new Error('未找到爬取窗口中的标签页'); - } - - return tab.id; -} - -/** - * 让 content script 直接在目标页面执行检查和抓取。 - */ -async function scrapeStepInContent( - tabId: number, - step: PlatformStepConfig, - signal: AbortSignal, -): Promise { - const startedAt = Date.now(); - - while (Date.now() - startedAt < 20000) { - if (signal.aborted) { - return { ok: false, error: 'canceled' }; - } - - const response = await sendPageRunnerMessage(tabId, { - action: 'SCRAPE_STEP', - payload: { - fields: step.fields, - checkSelector: step.checkSelector, - }, - }, signal); - - if (response.ok || response.interrupt || !isPageRunnerNotReadyError(response.error)) { - return response; - } - - if (!(await sleep(500, signal))) { - return { ok: false, error: 'canceled' }; - } - } - - return { ok: false, error: '页面脚本未响应,请刷新扩展后重试' }; -} - -/** - * 给目标页的 content script 发送页面执行消息。 - */ -async function sendPageRunnerMessage(tabId: number, message: unknown, signal: AbortSignal): Promise { - if (signal.aborted) { - return { ok: false, error: 'canceled' }; - } - - return raceWithAbort(sendPageRunnerMessageOnce(tabId, message), signal); -} - -async function sendPageRunnerMessageOnce(tabId: number, message: unknown): Promise { - try { - const response = await chrome.tabs.sendMessage(tabId, message); - - if (response && typeof response === 'object') { - return response as PageRunnerResponse; - } - - return { ok: false, error: '页面脚本返回为空' }; - } catch (error: unknown) { - return { ok: false, error: error instanceof Error ? error.message : String(error) }; - } -} - -/** - * 判断错误是否只是 content script 尚未注入完成。 - */ -function isPageRunnerNotReadyError(error?: string): boolean { - if (!error) { - return false; - } - - return /receiving end does not exist|could not establish connection|no receiving end/i.test(error); -} - -/** - * 因登录、验证码或页面异常暂停当前任务。 - */ -async function pauseForInterrupt(taskId: string, stepIndex: number, interrupt: CrawlPauseInfo): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - status: 'paused', - pause: interrupt, - currentStepIndex: stepIndex, - steps: state.steps.map((step, index) => - index === stepIndex ? { ...step, status: 'running', message: interrupt.message } : step, - ), - })); -} - -/** - * 暂停后等待用户点继续或取消。 - */ -async function waitUntilResumed(taskId: string, signal: AbortSignal): Promise { - while (true) { - if (signal.aborted) { - return false; - } - - const state = await getCrawlTaskState(); - - if (!state || state.id !== taskId || state.status === 'canceled' || state.status === 'failed') { - return false; - } - - if (state.status === 'running') { - return true; - } - - if (!(await sleep(1000, signal))) { - return false; - } - } -} - -/** - * 打开一个普通浏览器窗口承载目标平台页面。 - */ -function createCrawlWindow(url: string): Promise { - return new Promise((resolve, reject) => { - chrome.windows.create( - { - url, - type: 'popup', - focused: false, - state: 'normal', - width: 1280, - height: 900, - }, - (windowInfo) => { - const runtimeError = chrome.runtime.lastError; - - if (runtimeError) { - reject(new Error(runtimeError.message)); - return; - } - - if (!windowInfo?.id) { - reject(new Error('窗口创建失败')); - return; - } - - void chrome.windows.update(windowInfo.id, { drawAttention: true }).catch(() => undefined); - resolve(windowInfo); - }, - ); - }); -} - -/** - * 等待 tab 完成页面加载。 - */ -function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise { - return new Promise((resolve) => { - if (signal.aborted) { - resolve(false); - return; - } - - const timeout = globalThis.setTimeout(() => { - cleanup(); - resolve(true); - }, 15000); - - function cleanup() { - globalThis.clearTimeout(timeout); - chrome.tabs.onUpdated.removeListener(handleUpdated); - signal.removeEventListener('abort', handleAbort); - } - - function handleAbort() { - cleanup(); - resolve(false); - } - - function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) { - if (updatedTabId === tabId && changeInfo.status === 'complete') { - cleanup(); - resolve(true); - } - } - - chrome.tabs.onUpdated.addListener(handleUpdated); - signal.addEventListener('abort', handleAbort, { once: true }); - }); -} - -/** - * 简单等待工具。 - */ -async function hasWindow(windowId: number): Promise { - try { - await chrome.windows.get(windowId); - return true; - } catch { - return false; - } -} - -function raceWithAbort(promise: Promise, signal: AbortSignal): Promise { - return new Promise((resolve, reject) => { - if (signal.aborted) { - resolve({ ok: false, error: 'canceled' } as T); - return; - } - - let isSettled = false; - - function cleanup() { - signal.removeEventListener('abort', handleAbort); - } - - function handleAbort() { - if (isSettled) { - return; - } - - isSettled = true; - cleanup(); - resolve({ ok: false, error: 'canceled' } as T); - } - - signal.addEventListener('abort', handleAbort, { once: true }); - - promise.then( - (value) => { - if (isSettled) { - return; - } - - isSettled = true; - cleanup(); - resolve(value); - }, - (error) => { - if (isSettled) { - return; - } - - isSettled = true; - cleanup(); - reject(error); - }, - ); - }); -} - -function sleep(ms: number, signal?: AbortSignal): Promise { - return new Promise((resolve) => { - if (signal?.aborted) { - resolve(false); - return; - } - - const timeout = globalThis.setTimeout(() => { - cleanup(); - resolve(true); - }, ms); - - function cleanup() { - globalThis.clearTimeout(timeout); - signal?.removeEventListener('abort', handleAbort); - } - - function handleAbort() { - cleanup(); - resolve(false); - } - - signal?.addEventListener('abort', handleAbort, { once: true }); - }); -} diff --git a/src/background/service/externalBridge.ts b/src/background/service/externalBridge.ts index 57bb06d..f336ed0 100644 --- a/src/background/service/externalBridge.ts +++ b/src/background/service/externalBridge.ts @@ -1,7 +1,7 @@ import { platformConfigs } from '@/config/platforms'; import type { CrawlTaskState } from '@/types'; -import { cancelCrawl, startCrawl } from './crawlTask'; -import { getCrawlTaskState } from './taskState'; +import {getCrawlTaskState} from "@/background/task/taskState"; +import {cancelCrawl, startCrawl} from "@/background/task/crawlTask"; const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; const EXTERNAL_PORT_NAME = 'DIANSHAN_CRAWL'; diff --git a/src/background/service/lifecycle.ts b/src/background/service/lifecycle.ts deleted file mode 100644 index 32910dc..0000000 --- a/src/background/service/lifecycle.ts +++ /dev/null @@ -1,59 +0,0 @@ -import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types'; -import { - cancelAutoclose, - cancelCrawl, - cancelCrawlWhenWindowRemoved, - cancelStaleCrawlWhenWindowMissing, - dismissCrawl, - resumeCrawl, - startCrawl, -} from './crawlTask'; -import { getCrawlTaskState } from './taskState'; - -/** - * 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 - */ -export async function handleInstalled(): Promise { - console.log('[background] installed'); -} - -/** - * 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 - */ -export async function handleStartup(): Promise { - console.log('[background] startup'); - await cancelStaleCrawlWhenWindowMissing(); -} - -/** - * 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 - */ -export async function handleWindowRemoved(windowId: number): Promise { - console.log('[background] window removed', windowId); - await cancelCrawlWhenWindowRemoved(windowId); -} - -/** - * 根据 popup/content 发来的 action 分发到对应的后台处理函数。 - */ -export async function handleBackgroundCommand( - message: BackgroundCommand, -): Promise { - switch (message.action) { - case 'START_CRAWL': - return startCrawl(message.payload.platformId); - case 'GET_CRAWL_STATE': - await cancelStaleCrawlWhenWindowMissing(); - return { ok: true, data: await getCrawlTaskState() }; - case 'CANCEL_CRAWL': - return cancelCrawl(); - case 'RESUME_CRAWL': - return resumeCrawl(); - case 'CANCEL_AUTOCLOSE': - return cancelAutoclose(); - case 'DISMISS_CRAWL': - return dismissCrawl(); - default: - return { ok: false, error: '未知的后台指令' }; - } -} diff --git a/src/background/service/taskState.ts b/src/background/service/taskState.ts deleted file mode 100644 index 2bc8fb9..0000000 --- a/src/background/service/taskState.ts +++ /dev/null @@ -1,47 +0,0 @@ -import type { CrawlTaskState } from '@/types'; - -const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; - -export async function getCrawlTaskState(): Promise { - const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); - const state = result[CRAWL_TASK_STORAGE_KEY]; - return isCrawlTaskState(state) ? state : null; -} - -export async function setCrawlTaskState(state: CrawlTaskState): Promise { - await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state }); - broadcastToCrawlTab(state); -} - -export async function clearCrawlTaskState(): Promise { - await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY); -} - -export async function updateCrawlTaskState( - taskId: string, - updater: (state: CrawlTaskState) => CrawlTaskState, -): Promise { - const state = await getCrawlTaskState(); - - if (!state || state.id !== taskId || state.status === 'canceled') { - return; - } - - await setCrawlTaskState(updater(state)); -} - -function broadcastToCrawlTab(state: CrawlTaskState): void { - if (!state.tabId) { - return; - } - - try { - void chrome.tabs.sendMessage(state.tabId, { type: 'crawl_state_update', state }).catch(() => undefined); - } catch { - // ignore - } -} - -function isCrawlTaskState(value: unknown): value is CrawlTaskState { - return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; -} diff --git a/src/background/task/crawlTask.ts b/src/background/task/crawlTask.ts new file mode 100644 index 0000000..2e3c53e --- /dev/null +++ b/src/background/task/crawlTask.ts @@ -0,0 +1,147 @@ +import {getPlatformById} from "@/config/platforms"; +import {CrawlTaskState, PlatformStepConfig} from "@/types"; +import {openSingleTabWindow, scrapeStepInContent, sleep, waitForTabLoaded} from "@/background/task/helper"; +import {clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState} from "./taskState"; + + +const activeCrawlControllers = new Map(); + +/** + * 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 + * @param platformId 平台id + */ +export async function startCrawl(platformId: string): Promise { + const platform = getPlatformById(platformId); + if (!platform) { + return {error: '平台配置不存在'}; + } + + //打开窗口 + let windowInfo = await openSingleTabWindow(platform.steps[0].url) + //初始化数据 + const startedAt = Date.now(); + const nextState: CrawlTaskState = { + id: `${platform.id}-${startedAt}`, + windowId: windowInfo.windowId, + tabId: windowInfo.tabId, + platformId: platform.id, + platformName: platform.name, + startedAt, + status: 'running', + currentStepIndex: 0, + steps: platform.steps.map((item, index) => { + return { + name: item.name, + uniqueKey: item.uniqueKey, + status: index === 0 ? 'running' : 'pending', + } + }) + }; + + await setCrawlTaskState(nextState); + + //写入任务,用于取消 + const controller = new AbortController(); + activeCrawlControllers.set(nextState.id, controller); + //启动 + void runCrawlSteps(nextState.id, nextState.tabId!, platform.steps, controller.signal).finally(() => { + activeCrawlControllers.delete(nextState.id); + }); + //自动开始爬取 + return nextState +} + +/** + * 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 + * @param steps 平台步骤配置 + * @param signal 中断信号 + */ +/** + * 执行器 + */ +async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepConfig[], signal: AbortSignal) { + for (let i = 0; i < steps.length; i += 1) { + const step = steps[i]; + let shouldRetryStep = true; + + // 【修改 2】进入新步骤,立刻更新状态机里的索引和步骤状态 + await updateCrawlTaskState(taskId, s => ({ + ...s, + currentStepIndex: i, + steps: s.steps.map((stepItem, idx) => ({ + ...stepItem, + status: idx === i ? 'running' : stepItem.status + })) + })); + + while (shouldRetryStep) { + if (signal.aborted) return; + + // 1. 等待网页加载 + await chrome.tabs.update(tabId, {url: step.url, active: true}); + const loaded = await waitForTabLoaded(tabId, signal); + if (!loaded) return; + + // 2. 检测撞盾/抓取 + const res: any = await scrapeStepInContent(tabId, step, signal); + if (signal.aborted) return; + + // 3. 处理中断(验证码等) + if (res.interrupt) { + await updateCrawlTaskState(taskId, s => ({...s, status: 'paused', pause: res.interrupt})); + + // 死等恢复 + while ((await getCrawlTaskState())?.status === 'paused') { + if (signal.aborted) return; + if (!(await sleep(1000, signal))) return; + } + continue; // 恢复后重新触发 while 循环(重刷页面) + } + + // 4. 处理结果 + if (res.ok) { + await updateCrawlTaskState(taskId, s => ({ + ...s, + steps: s.steps.map((item, idx) => + idx === i ? {...item, status: 'success', result: res.data} : item + ) + })); + shouldRetryStep = false; // 退出 while,准备进下一个 for 循环步骤 + } else { + // 抓取失败重试 + if (!(await sleep(2000, signal))) return; + } + } + } + + // 【修改 3】全部步骤完成,标记任务结束 + await updateCrawlTaskState(taskId, s => ({...s, status: 'completed'})); +} + + +/** + * 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 + */ +export async function cancelCrawl() { + const state = await getCrawlTaskState(); + + if (!state) return + + // 立即触发 Abort 信号,让脚本自动停止 + const controller = activeCrawlControllers.get(state.id); + if (controller) { + controller.abort(); + activeCrawlControllers.delete(state.id); + } + + //清楚缓存 + await clearCrawlTaskState(); + + //关闭窗口 + if (state.windowId) { + chrome.windows.remove(state.windowId).catch(() => { + }); + } + +} + diff --git a/src/background/task/helper.ts b/src/background/task/helper.ts new file mode 100644 index 0000000..a5d5b70 --- /dev/null +++ b/src/background/task/helper.ts @@ -0,0 +1,171 @@ +import {CrawlPauseInfo, PlatformStepConfig} from "@/types"; +import {DomScrapeResult} from "@/background/domScraper"; + +/** + * 打开一个纯净的单标签窗口,并提醒用户注意 + * @param url 目标网址 + */ +export async function openSingleTabWindow(url: string) { + return new Promise<{ windowId: number; tabId: number }>((resolve, reject) => { + chrome.windows.create({ + url, + type: 'popup', + width: 1260, + height: 900, + focused: true // 初始设为聚焦,方便窗口弹出 + }, (win) => { + // 1. 检查创建是否报错 + if (chrome.runtime.lastError) { + return reject(new Error(chrome.runtime.lastError.message)); + } + + if (win?.id && win.tabs?.[0]?.id) { + // 2. 让窗口在任务栏“闪烁”,提醒用户(比如处理登录或验证码) + // 使用 void 表示不等待结果,catch 防止窗口意外关闭导致崩溃 + void chrome.windows.update(win.id, { drawAttention: true }).catch(() => {}); + + // 3. 返回双 ID 供后续爬取逻辑使用 + resolve({ + windowId: win.id, + tabId: win.tabs[0].id + }); + } else { + reject(new Error('窗口初始化失败')); + } + }); + }); +} + + +/** + * 等待指定的标签页加载完成 + * @param tabId 标签页ID + * @param signal 中断信号 + */ +export function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise { + return new Promise((resolve) => { + if (signal.aborted) { + resolve(false); + return; + } + + const timeout = globalThis.setTimeout(() => { + cleanup(); + resolve(true); + }, 15000); + + function cleanup() { + globalThis.clearTimeout(timeout); + chrome.tabs.onUpdated.removeListener(handleUpdated); + signal.removeEventListener('abort', handleAbort); + } + + function handleAbort() { + cleanup(); + resolve(false); + } + + function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) { + if (updatedTabId === tabId && changeInfo.status === 'complete') { + cleanup(); + resolve(true); + } + } + + chrome.tabs.onUpdated.addListener(handleUpdated); + signal.addEventListener('abort', handleAbort, {once: true}); + }); +} + + +/** + * 让 content script 在目标页面执行抓取或探测 + */ +interface PageRunnerResponse { + ok: boolean; + data?: DomScrapeResult | null; + interrupt?: CrawlPauseInfo; + error?: string; +} +export async function scrapeStepInContent(tabId: number, step: PlatformStepConfig, signal: AbortSignal): Promise { + const startTime = Date.now(); + const TIMEOUT = 20000; // 最多等 20 秒 + + while (Date.now() - startTime < TIMEOUT) { + if (signal.aborted) return {ok: false, error: 'canceled'}; + + try { + // 给 Content Script 发消息 + const res: any = await chrome.tabs.sendMessage(tabId, { + action: 'SCRAPE_STEP', + payload: { + fields: step.fields, // 要抓哪些字段 + checkSelector: step.checkSelector // 用来检测是否“撞盾”的特征选择器 + } + }); + + // 情况 1:撞盾了(比如检测到了登录框、验证码) + // Content Script 发现特征后会返回 interrupt 对象 + if (res.interrupt) { + return res; + } + + // 情况 2:抓取成功 + if (res.ok) { + return res; + } + + // 情况 3:如果 res.ok 是 false 且没有 interrupt,说明页面还没渲染出来 + // 继续循环重试 + + } catch (err: any) { + // 特殊处理:如果报错是“接收端不存在”,说明 Content Script 还没加载完 + // 这属于正常情况,忽略它,等下一轮循环重试 + if (!err.message.includes('receiving end does not exist')) { + console.warn('通信异常:', err.message); + } + } + + // 等 500ms 再问下一次 + const canContinue = await sleep(500, signal); + if (!canContinue) break; + } + + return {ok: false, error: '页面响应超时,可能需要刷新'}; +} + +/** + * 延迟指定毫秒数,并支持随时中断 + * @param ms 延迟毫秒数 + * @param signal 中断信号 + * @returns {Promise} 返回 true 表示等完了,返回 false 表示被中断了 + */ +export function sleep(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve) => { + // 1. 如果信号已经中断了,直接返回 false + if (signal?.aborted) { + return resolve(false); + } + + // 2. 正常设置定时器 + const timer = setTimeout(() => { + cleanup(); + resolve(true); + }, ms); + + // 3. 定义清理逻辑 + const cleanup = () => { + clearTimeout(timer); + signal?.removeEventListener('abort', onAbort); + }; + + // 4. 监听中断事件 + const onAbort = () => { + cleanup(); + resolve(false); // 一旦中断,立刻返回 false + }; + + // 5. 注册监听(只监听一次) + signal?.addEventListener('abort', onAbort, {once: true}); + }); +} \ No newline at end of file diff --git a/src/background/task/taskState.ts b/src/background/task/taskState.ts new file mode 100644 index 0000000..e964949 --- /dev/null +++ b/src/background/task/taskState.ts @@ -0,0 +1,54 @@ +import type {CrawlTaskState} from '@/types'; +import {sendTabMessage} from "@/shared/tab"; + +const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; + +/** + * 从本地存储中获取当前的爬取任务状态 + * @returns {Promise} 返回任务状态对象,如果不存在或数据非法则返回 null + */ +export async function getCrawlTaskState(): Promise { + const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); + const state = result[CRAWL_TASK_STORAGE_KEY]; + return (state as CrawlTaskState) || null; +} + +/** + * 设置并保存爬取任务状态,并同步广播给对应的标签页 + * @param {CrawlTaskState} state - 需要保存的新状态对象 + */ +export async function setCrawlTaskState(state: CrawlTaskState): Promise { + // 持久化到本地存储 + await chrome.storage.local.set({[CRAWL_TASK_STORAGE_KEY]: state}); + // 将更新后的状态发送给正在执行任务的标签页内容脚本 + if (!state.tabId) return + sendTabMessage(state.tabId, 'CRAWL_STATE_UPDATE', state) +} + +/** + * 从本地存储中清除当前的爬取任务状态(通常用于任务结束或彻底重置) + */ +export async function clearCrawlTaskState(): Promise { + await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY); +} + +/** + * 局部更新爬取任务状态 + * 只有当任务 ID 匹配且任务未被取消时,才会执行更新逻辑 + * @param {string} taskId - 任务的唯一标识符 + * @param {(state: CrawlTaskState) => CrawlTaskState} updater - 接收旧状态并返回新状态的回调函数 + */ +export async function updateCrawlTaskState( + taskId: string, + updater: (state: CrawlTaskState) => CrawlTaskState, +): Promise { + const state = await getCrawlTaskState(); + + // 检查任务是否存在、ID 是否一致、以及任务是否已被标记为取消 + if (!state || state.id !== taskId || state.status === 'canceled') { + return; + } + + // 执行更新并保存 + await setCrawlTaskState(updater(state)); +} diff --git a/src/background/types.ts b/src/background/types.ts index 30c7d99..691df93 100644 --- a/src/background/types.ts +++ b/src/background/types.ts @@ -1,54 +1,5 @@ import type { CrawlTaskState } from '@/types'; -// 启动爬取任务的后台消息。 -export interface StartCrawlCommand { - // 消息动作类型:请求 background 创建爬取窗口并初始化任务状态。 - action: 'START_CRAWL'; - // 启动爬取所需参数。 - payload: { - // 当前要爬取的平台 ID,对应 config/platforms.ts 中的平台配置。 - platformId: string; - }; -} - -// 获取当前爬取任务状态的后台消息。 -export interface GetCrawlStateCommand { - // 消息动作类型:请求 background 返回当前任务快照。 - action: 'GET_CRAWL_STATE'; -} - -// 取消当前爬取任务的后台消息。 -export interface CancelCrawlCommand { - // 消息动作类型:请求 background 标记任务取消并关闭爬取窗口。 - action: 'CANCEL_CRAWL'; -} - -// 继续当前暂停中的爬取任务。 -export interface ResumeCrawlCommand { - // 消息动作类型:用户已处理登录/验证码,允许 background 继续重试当前步骤。 - action: 'RESUME_CRAWL'; -} - -// 取消终态自动关窗(保持窗口打开)的后台消息。 -export interface CancelAutocloseCommand { - // 消息动作类型:用户在 overlay 中点“保持打开”,阻止 background 自动关闭爬取窗口。 - action: 'CANCEL_AUTOCLOSE'; -} - -// 清理当前爬取任务快照(用于 popup 的 Dismiss/Close)。 -export interface DismissCrawlCommand { - // 消息动作类型:清空 crawlTaskState,让 popup 回到 idle。 - action: 'DISMISS_CRAWL'; -} - -// popup/content script 能发送给 background 的全部消息类型。 -export type BackgroundCommand = - | StartCrawlCommand - | GetCrawlStateCommand - | CancelCrawlCommand - | ResumeCrawlCommand - | CancelAutocloseCommand - | DismissCrawlCommand; // background 统一响应结构。 export interface BackgroundResponse { diff --git a/src/popup/App.vue b/src/popup/App.vue index 4f9b28f..649929a 100644 --- a/src/popup/App.vue +++ b/src/popup/App.vue @@ -1,5 +1,4 @@