diff --git a/src/background/domScraper.ts b/src/background/domScraper.ts index 7714c8a..96d56dd 100644 --- a/src/background/domScraper.ts +++ b/src/background/domScraper.ts @@ -1,5 +1,7 @@ import type {PlatformFieldConfig} from '@/types'; +export type DomScrapeResult = Record; + /** * 等待重试机制 */ @@ -225,4 +227,4 @@ async function processTable(config: PlatformFieldConfig, rootDom: ParentNode) { } return allTableData; -} \ No newline at end of file +} diff --git a/src/background/index.ts b/src/background/index.ts index deefa26..430b92e 100644 --- a/src/background/index.ts +++ b/src/background/index.ts @@ -1,7 +1,13 @@ -import {handleBackgroundCommand, handleWindowRemoved} from './service'; -import type {BackgroundCommand} from './types'; +import { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service'; +import type { BackgroundCommand } from './types'; +chrome.runtime.onInstalled.addListener(() => { + void handleInstalled(); +}); +chrome.runtime.onStartup.addListener(() => { + void handleStartup(); +}); chrome.runtime.onMessage.addListener((message: BackgroundCommand, _sender, sendResponse) => { void handleBackgroundMessage(message, sendResponse); @@ -12,20 +18,19 @@ chrome.windows.onRemoved.addListener((windowId) => { void handleWindowRemoved(windowId); }); -chrome.runtime.onMessageExternal.addListener((message, sender, sendResponse) => { - if (message.type === "STORE_AI_PING") { - // 返回版本号等信息 +chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => { + if (message.type === 'STORE_AI_PING') { sendResponse({ success: true, - version: chrome.runtime.getManifest().version + version: chrome.runtime.getManifest().version, }); } - // 注意:外部消息处理必须返回 true 才能支持异步 sendResponse + return true; }); /** - * 统一包装后台消息处理,确保异步错误能回给调用方。 + * Wrap background command handling so async errors can still be returned to callers. */ async function handleBackgroundMessage( message: BackgroundCommand, @@ -36,6 +41,6 @@ async function handleBackgroundMessage( sendResponse(result); } catch (error: unknown) { const messageText = error instanceof Error ? error.message : 'Unknown error'; - sendResponse({ok: false, error: messageText}); + sendResponse({ ok: false, data: null, error: messageText }); } } diff --git a/src/background/service/crawlTask.ts b/src/background/service/crawlTask.ts index cf6ed73..a598d5a 100644 --- a/src/background/service/crawlTask.ts +++ b/src/background/service/crawlTask.ts @@ -2,7 +2,7 @@ import { getPlatformById } from '@/config/platforms'; import type { CrawlPauseInfo, CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types'; import type { DomScrapeResult } from '../domScraper'; import type { CrawlStateResponse } from '../types'; -import { getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState'; +import { clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState'; interface PageRunnerResponse { ok: boolean; @@ -11,6 +11,8 @@ interface PageRunnerResponse { error?: string; } +const activeCrawlControllers = new Map(); + /** * 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 */ @@ -47,9 +49,13 @@ export async function startCrawl(platformId: string): Promise { + activeCrawlControllers.delete(stateWithWindow.id); + }); return { ok: true, data: stateWithWindow }; } catch (error: unknown) { @@ -76,21 +82,15 @@ export async function cancelCrawl(): Promise { return { ok: true, data: null }; } - const canceledState: CrawlTaskState = { - ...state, - status: 'canceled', - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'failed', message: '用户已取消' } : step, - ), - }; + abortActiveCrawl(state.id); - await setCrawlTaskState(canceledState); + await clearCrawlTaskState(); if (state.windowId) { await chrome.windows.remove(state.windowId).catch(() => undefined); } - return { ok: true, data: canceledState }; + return { ok: true, data: null }; } /** @@ -122,10 +122,12 @@ export async function resumeCrawl(): Promise { export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise { const state = await getCrawlTaskState(); - if (state?.windowId !== windowId || state.status !== 'running') { + if (state?.windowId !== windowId || !['running', 'paused'].includes(state.status)) { return; } + abortActiveCrawl(state.id); + await setCrawlTaskState({ ...state, status: 'canceled', @@ -135,10 +137,38 @@ export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise { + const state = await getCrawlTaskState(); + + if (!state || !['running', 'paused'].includes(state.status)) { + return; + } + + const isWindowAlive = state.windowId ? await hasWindow(state.windowId) : false; + + if (isWindowAlive) { + return; + } + + abortActiveCrawl(state.id); + + await setCrawlTaskState({ + ...state, + status: 'canceled', + steps: state.steps.map((step, index) => + index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭,任务已取消' } : step, + ), + }); +} + +function abortActiveCrawl(taskId: string): void { + activeCrawlControllers.get(taskId)?.abort(); +} + /** * 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 */ -async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState): Promise { +async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState, signal: AbortSignal): Promise { if (!initialState.windowId) { return; } @@ -151,12 +181,12 @@ async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskSt while (shouldRetryStep) { const currentState = await getCrawlTaskState(); - if (currentState?.id !== initialState.id || currentState.status === 'canceled') { + if (signal.aborted || currentState?.id !== initialState.id || currentState.status === 'canceled') { return; } if (currentState.status === 'paused') { - const resumed = await waitUntilResumed(initialState.id); + const resumed = await waitUntilResumed(initialState.id, signal); if (!resumed) { return; @@ -177,13 +207,21 @@ async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskSt const tabId = await getWindowActiveTabId(initialState.windowId); await chrome.tabs.update(tabId, { url: step.url, active: true }); - await waitForTabLoaded(tabId); + const tabLoaded = await waitForTabLoaded(tabId, signal); - const response = await scrapeStepInContent(tabId, step); + if (!tabLoaded || signal.aborted) { + return; + } + + const response = await scrapeStepInContent(tabId, step, signal); + + if (signal.aborted) { + return; + } if (response.interrupt) { await pauseForInterrupt(initialState.id, stepIndex, response.interrupt); - const resumed = await waitUntilResumed(initialState.id); + const resumed = await waitUntilResumed(initialState.id, signal); if (!resumed) { return; @@ -258,23 +296,33 @@ async function getWindowActiveTabId(windowId: number): Promise { /** * 让 content script 直接在目标页面执行检查和抓取。 */ -async function scrapeStepInContent(tabId: number, step: PlatformStepConfig): Promise { +async function scrapeStepInContent( + tabId: number, + step: PlatformStepConfig, + signal: AbortSignal, +): Promise { const startedAt = Date.now(); while (Date.now() - startedAt < 20000) { + if (signal.aborted) { + return { ok: false, error: 'canceled' }; + } + const response = await sendPageRunnerMessage(tabId, { action: 'SCRAPE_STEP', payload: { fields: step.fields, checkSelector: step.checkSelector, }, - }); + }, signal); if (response.ok || response.interrupt || !isPageRunnerNotReadyError(response.error)) { return response; } - await sleep(500); + if (!(await sleep(500, signal))) { + return { ok: false, error: 'canceled' }; + } } return { ok: false, error: '页面脚本未响应,请刷新扩展后重试' }; @@ -283,7 +331,15 @@ async function scrapeStepInContent(tabId: number, step: PlatformStepConfig): Pro /** * 给目标页的 content script 发送页面执行消息。 */ -async function sendPageRunnerMessage(tabId: number, message: unknown): Promise { +async function sendPageRunnerMessage(tabId: number, message: unknown, signal: AbortSignal): Promise { + if (signal.aborted) { + return { ok: false, error: 'canceled' }; + } + + return raceWithAbort(sendPageRunnerMessageOnce(tabId, message), signal); +} + +async function sendPageRunnerMessageOnce(tabId: number, message: unknown): Promise { try { const response = await chrome.tabs.sendMessage(tabId, message); @@ -326,8 +382,12 @@ async function pauseForInterrupt(taskId: string, stepIndex: number, interrupt: C /** * 暂停后等待用户点继续或取消。 */ -async function waitUntilResumed(taskId: string): Promise { +async function waitUntilResumed(taskId: string, signal: AbortSignal): Promise { while (true) { + if (signal.aborted) { + return false; + } + const state = await getCrawlTaskState(); if (!state || state.id !== taskId || state.status === 'canceled' || state.status === 'failed') { @@ -338,7 +398,9 @@ async function waitUntilResumed(taskId: string): Promise { return true; } - await sleep(1000); + if (!(await sleep(1000, signal))) { + return false; + } } } @@ -377,30 +439,123 @@ function createCrawlWindow(url: string): Promise { /** * 等待 tab 完成页面加载。 */ -function waitForTabLoaded(tabId: number): Promise { +function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise { return new Promise((resolve) => { + if (signal.aborted) { + resolve(false); + return; + } + const timeout = globalThis.setTimeout(() => { - chrome.tabs.onUpdated.removeListener(handleUpdated); - resolve(); + cleanup(); + resolve(true); }, 15000); + function cleanup() { + globalThis.clearTimeout(timeout); + chrome.tabs.onUpdated.removeListener(handleUpdated); + signal.removeEventListener('abort', handleAbort); + } + + function handleAbort() { + cleanup(); + resolve(false); + } + function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) { if (updatedTabId === tabId && changeInfo.status === 'complete') { - globalThis.clearTimeout(timeout); - chrome.tabs.onUpdated.removeListener(handleUpdated); - resolve(); + cleanup(); + resolve(true); } } chrome.tabs.onUpdated.addListener(handleUpdated); + signal.addEventListener('abort', handleAbort, { once: true }); }); } /** * 简单等待工具。 */ -function sleep(ms: number): Promise { - return new Promise((resolve) => { - globalThis.setTimeout(resolve, ms); +async function hasWindow(windowId: number): Promise { + try { + await chrome.windows.get(windowId); + return true; + } catch { + return false; + } +} + +function raceWithAbort(promise: Promise, signal: AbortSignal): Promise { + return new Promise((resolve, reject) => { + if (signal.aborted) { + resolve({ ok: false, error: 'canceled' } as T); + return; + } + + let isSettled = false; + + function cleanup() { + signal.removeEventListener('abort', handleAbort); + } + + function handleAbort() { + if (isSettled) { + return; + } + + isSettled = true; + cleanup(); + resolve({ ok: false, error: 'canceled' } as T); + } + + signal.addEventListener('abort', handleAbort, { once: true }); + + promise.then( + (value) => { + if (isSettled) { + return; + } + + isSettled = true; + cleanup(); + resolve(value); + }, + (error) => { + if (isSettled) { + return; + } + + isSettled = true; + cleanup(); + reject(error); + }, + ); + }); +} + +function sleep(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve) => { + if (signal?.aborted) { + resolve(false); + return; + } + + const timeout = globalThis.setTimeout(() => { + cleanup(); + resolve(true); + }, ms); + + function cleanup() { + globalThis.clearTimeout(timeout); + signal?.removeEventListener('abort', handleAbort); + } + + function handleAbort() { + cleanup(); + resolve(false); + } + + signal?.addEventListener('abort', handleAbort, { once: true }); }); } diff --git a/src/background/service/lifecycle.ts b/src/background/service/lifecycle.ts index e62b373..c65f013 100644 --- a/src/background/service/lifecycle.ts +++ b/src/background/service/lifecycle.ts @@ -1,5 +1,5 @@ import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types'; -import { cancelCrawl, cancelCrawlWhenWindowRemoved, resumeCrawl, startCrawl } from './crawlTask'; +import { cancelCrawl, cancelCrawlWhenWindowRemoved, cancelStaleCrawlWhenWindowMissing, resumeCrawl, startCrawl } from './crawlTask'; import { getCrawlTaskState } from './taskState'; /** @@ -14,6 +14,7 @@ export async function handleInstalled(): Promise { */ export async function handleStartup(): Promise { console.log('[background] startup'); + await cancelStaleCrawlWhenWindowMissing(); } /** @@ -34,6 +35,7 @@ export async function handleBackgroundCommand( case 'START_CRAWL': return startCrawl(message.payload.platformId); case 'GET_CRAWL_STATE': + await cancelStaleCrawlWhenWindowMissing(); return { ok: true, data: await getCrawlTaskState() }; case 'CANCEL_CRAWL': return cancelCrawl(); diff --git a/src/background/service/taskState.ts b/src/background/service/taskState.ts index 8f18259..808d145 100644 --- a/src/background/service/taskState.ts +++ b/src/background/service/taskState.ts @@ -1,27 +1,21 @@ import type { CrawlTaskState } from '@/types'; -// chrome.storage.local 中保存当前爬取任务状态的键名。 const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; -/** - * 从 chrome.storage.local 读取当前爬取任务状态。 - */ export async function getCrawlTaskState(): Promise { const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); const state = result[CRAWL_TASK_STORAGE_KEY]; return isCrawlTaskState(state) ? state : null; } -/** - * 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 - */ export async function setCrawlTaskState(state: CrawlTaskState): Promise { await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state }); } -/** - * 读取任务状态后执行不可变更新,避免覆盖已取消或已替换的任务。 - */ +export async function clearCrawlTaskState(): Promise { + await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY); +} + export async function updateCrawlTaskState( taskId: string, updater: (state: CrawlTaskState) => CrawlTaskState, @@ -35,9 +29,6 @@ export async function updateCrawlTaskState( await setCrawlTaskState(updater(state)); } -/** - * 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 - */ function isCrawlTaskState(value: unknown): value is CrawlTaskState { return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; } diff --git a/src/popup/App.vue b/src/popup/App.vue index 08c318e..0e2e2ce 100644 --- a/src/popup/App.vue +++ b/src/popup/App.vue @@ -18,6 +18,7 @@ const { isScanning, crawlState, handleScan, + handleCancelCrawl, elapsedSeconds } = useScan() @@ -29,12 +30,6 @@ const shouldShowCrawlProgress = computed(() => crawlState.value != null ); -/** - * 取消爬取 - */ -const handleCancelCrawl = () => { - -} /** * 获取进度样式 diff --git a/src/popup/hook/use-scan.ts b/src/popup/hook/use-scan.ts index 8d8df72..76b9525 100644 --- a/src/popup/hook/use-scan.ts +++ b/src/popup/hook/use-scan.ts @@ -1,49 +1,87 @@ -import {onMounted, onUnmounted, ref} from "vue"; -import {platformConfigs} from "@/config/platforms"; -import {CrawlTaskState} from "@/types"; -import {sendBackgroundMessage} from "@/shared/message"; +import { onMounted, onUnmounted, ref } from 'vue'; +import { platformConfigs } from '@/config/platforms'; +import type { CrawlTaskState } from '@/types'; +import { sendBackgroundMessage } from '@/shared/message'; + +const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; +const ACTIVE_STATUSES = new Set(['running', 'paused']); export const useScan = () => { - //选中id const selectedPlatformId = ref(platformConfigs[0]?.id ?? ''); - //防抖 - const isScanning = ref(false) - //步骤数据 + const isScanning = ref(false); const crawlState = ref(null); - //爬取时间 - const elapsedSeconds = ref(0) - + const elapsedSeconds = ref(0); let timer: number | undefined; - /** - * 开始爬取 - */ + const handleScan = async () => { - if (isScanning.value) return - isScanning.value = true + if (isScanning.value) { + return; + } + + isScanning.value = true; + try { - updateSeconds() - //定时器 - timer = window.setInterval(() => { - updateSeconds(); - }, 1000); - //发送 + ensureElapsedTimer(); + const response = await sendBackgroundMessage({ action: 'START_CRAWL', - payload: {platformId: selectedPlatformId.value}, + payload: { platformId: selectedPlatformId.value }, }); - if (response.data) { - crawlState.value = response.data; + + if (response.ok) { + syncCrawlState(response.data ?? null); + } else { + console.error('[crawl] start failed', response.error); } } finally { isScanning.value = false; } + }; + + const handleCancelCrawl = async () => { + const response = await sendBackgroundMessage({ action: 'CANCEL_CRAWL' }); + + if (response.ok) { + syncCrawlState(response.data ?? null); + return; + } + + console.error('[crawl] cancel failed', response.error); + await refreshCrawlState(); + }; + + function syncCrawlState(state: CrawlTaskState | null) { + crawlState.value = state; + updateSeconds(); + + if (state && ACTIVE_STATUSES.has(state.status)) { + ensureElapsedTimer(); + return; + } + + clearElapsedTimer(); } + function ensureElapsedTimer() { + if (timer !== undefined) { + return; + } + + timer = window.setInterval(() => { + updateSeconds(); + }, 1000); + } + + function clearElapsedTimer() { + if (timer === undefined) { + return; + } + + window.clearInterval(timer); + timer = undefined; + } - /** - * 更新时间 - */ function updateSeconds() { if (!crawlState.value) { elapsedSeconds.value = 0; @@ -53,33 +91,54 @@ export const useScan = () => { elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000)); } - /** - * 同步爬取状态 - */ async function refreshCrawlState() { - const response = await sendBackgroundMessage({action: 'GET_CRAWL_STATE'}); + const response = await sendBackgroundMessage({ action: 'GET_CRAWL_STATE' }); - if (response.data) { - crawlState.value = response.data ?? null; - updateSeconds(); + if (response.ok) { + syncCrawlState(response.data ?? null); } } + function handleStorageChanged(changes: Record, areaName: string) { + if (areaName !== 'local') { + return; + } + + const change = changes[CRAWL_TASK_STORAGE_KEY]; + + if (!change) { + return; + } + + syncCrawlState(isCrawlTaskState(change.newValue) ? change.newValue : null); + } + onMounted(async () => { - await refreshCrawlState() - }) + await refreshCrawlState(); + + if (typeof chrome !== 'undefined' && chrome.storage?.onChanged) { + chrome.storage.onChanged.addListener(handleStorageChanged); + } + }); onUnmounted(() => { - if (timer) { - window.clearInterval(timer); + clearElapsedTimer(); + + if (typeof chrome !== 'undefined' && chrome.storage?.onChanged) { + chrome.storage.onChanged.removeListener(handleStorageChanged); } - }) + }); return { selectedPlatformId, isScanning, crawlState, handleScan, + handleCancelCrawl, elapsedSeconds, - } -} \ No newline at end of file + }; +}; + +function isCrawlTaskState(value: unknown): value is CrawlTaskState { + return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; +} diff --git a/src/shared/message.ts b/src/shared/message.ts index 12faf4a..b4cf279 100644 --- a/src/shared/message.ts +++ b/src/shared/message.ts @@ -1,24 +1,27 @@ export type MessageAction = - | "GET_CRAWL_STATE" // 获取爬虫的当前状态 - | "START_CRAWL" // 开始爬取 + | 'GET_CRAWL_STATE' + | 'START_CRAWL' + | 'CANCEL_CRAWL' + | 'RESUME_CRAWL'; -interface BackgroundMessage { - action: MessageAction; // 标识要执行的操作 - payload?: T; // 附带的数据 +interface BackgroundMessage { + action: MessageAction; + payload?: T; } -interface BackgroundResponse { - data: T | null +interface BackgroundResponse { + ok: boolean; + data: T | null; + error?: string; } /** - * 定义发送给 Background Script 的消息类型 + * Send a command to the background service worker. */ export function sendBackgroundMessage(data: BackgroundMessage): Promise> { - // 检查是否在 Chrome 扩展环境中运行 if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) { - return Promise.resolve({data: null}); + return Promise.resolve({ ok: true, data: null }); } return chrome.runtime.sendMessage(data); -} \ No newline at end of file +} diff --git a/tsconfig.tsbuildinfo b/tsconfig.tsbuildinfo index 9921a74..eb11c05 100644 --- a/tsconfig.tsbuildinfo +++ b/tsconfig.tsbuildinfo @@ -1 +1 @@ -{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawltask.ts","./src/background/service/lifecycle.ts","./src/background/service/taskstate.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/content/pagerunner.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts","./storeai-extension-v0.1.0/service-worker-loader.js","./storeai-extension-v0.1.0/assets/config-cf-xklo9.js","./storeai-extension-v0.1.0/assets/fetch-hook.ts-bvrghr__.js","./storeai-extension-v0.1.0/assets/index-dxg1qimp.js","./storeai-extension-v0.1.0/assets/index.ts-dirvxn_b.js","./storeai-extension-v0.1.0/assets/orchestrator.ts-bleul1fk.js","./storeai-extension-v0.1.0/assets/orchestrator.ts-loader-drev6v6h.js","./storeai-extension-v0.1.0/assets/popup-dbgvbs2c.js","./storeai-extension-v0.1.0/assets/selectors-xrdds_u0.js"],"version":"5.9.3"} \ No newline at end of file +{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawltask.ts","./src/background/service/lifecycle.ts","./src/background/service/taskstate.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/content/pagerunner.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/popup/hook/use-login.ts","./src/popup/hook/use-scan.ts","./src/shared/auth.ts","./src/shared/message.ts","./src/shared/time_format.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"} \ No newline at end of file diff --git a/vite.config.ts b/vite.config.ts index 8cdeeb4..880c7d4 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -3,7 +3,7 @@ import {crx} from '@crxjs/vite-plugin' import tailwindcss from '@tailwindcss/vite' import vue from '@vitejs/plugin-vue' import {defineConfig} from 'vite' -import manifest from './manifest.config.ts' +import manifest from './manifest.config' export default defineConfig({ resolve: {