From d78d70bde0ef694dc353ccc42e06e582becc322d Mon Sep 17 00:00:00 2001 From: zhu <1812073942@qq.com> Date: Wed, 6 May 2026 10:52:45 +0800 Subject: [PATCH] 1 --- src/background/service.ts | 479 +--------------------------- src/background/service/crawlTask.ts | 304 ++++++++++++++++++ src/background/service/lifecycle.ts | 43 +++ src/background/service/taskState.ts | 43 +++ tsconfig.tsbuildinfo | 2 +- 5 files changed, 392 insertions(+), 479 deletions(-) create mode 100644 src/background/service/crawlTask.ts create mode 100644 src/background/service/lifecycle.ts create mode 100644 src/background/service/taskState.ts diff --git a/src/background/service.ts b/src/background/service.ts index 3f4ab51..17330cd 100644 --- a/src/background/service.ts +++ b/src/background/service.ts @@ -1,478 +1 @@ -import { getPlatformById } from '@/config/platforms'; -import type { CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types'; -import { scrapeDomFields, type DomScrapeResult } from './domScraper'; -import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from './types'; - -// chrome.storage.local 中保存当前爬取任务状态的键名。 -const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; - -/** - * 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 - */ -export async function handleInstalled(): Promise { - console.log('[background] installed'); -} - -/** - * 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 - */ -export async function handleStartup(): Promise { - console.log('[background] startup'); -} - -/** - * 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 - */ -export async function handleWindowRemoved(windowId: number): Promise { - console.log('[background] window removed', windowId); - - // 当前保存的爬取任务状态。 - const state = await getCrawlTaskState(); - - if (state?.windowId === windowId && state.status === 'running') { - await setCrawlTaskState({ - ...state, - status: 'canceled', - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step, - ), - }); - } -} - -/** - * 根据 popup/content 发来的 action 分发到对应的后台处理函数。 - */ -export async function handleBackgroundCommand( - message: BackgroundCommand, -): Promise { - switch (message.action) { - case 'START_CRAWL': - return startCrawl(message.payload.platformId); - case 'GET_CRAWL_STATE': - return { ok: true, data: await getCrawlTaskState() }; - case 'CANCEL_CRAWL': - return cancelCrawl(); - default: - return { ok: false, error: '未知的后台指令' }; - } -} - -/** - * 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 - */ -async function startCrawl(platformId: string): Promise { - // 根据平台 ID 找到对应的平台爬取配置。 - const platform = getPlatformById(platformId); - - if (!platform) { - return { ok: false, error: '平台配置不存在' }; - } - - const firstStep = platform.steps[0]; - - if (!firstStep) { - return { ok: false, error: '平台未配置爬取步骤' }; - } - - // 当前任务的开始时间戳,用于计算正计时。 - const startedAt = Date.now(); - // 窗口创建前的初始任务状态。 - // 先写入 storage 让所有页面能立即感知爬取开始。 - const nextState: CrawlTaskState = { - id: `${platform.id}-${startedAt}`, - platformId: platform.id, - platformName: platform.name, - startedAt, - status: 'running', - currentStepIndex: 0, - steps: platform.steps.map((step, index) => ({ - name: step.name, - uniqueKey: step.uniqueKey, - status: index === 0 ? 'running' : 'pending', - })), - }; - - await setCrawlTaskState(nextState); - - try { - // background 创建出来的目标平台窗口信息。 - const windowInfo = await createCrawlWindow(firstStep.url); - // 补充 windowId 后的任务状态。 - // 后续可用于取消或监听窗口关闭。 - const stateWithWindow = { ...nextState, windowId: windowInfo.id }; - await setCrawlTaskState(stateWithWindow); - void runCrawlSteps(platform, stateWithWindow); - return { ok: true, data: stateWithWindow }; - } catch (error: unknown) { - // 窗口创建失败时写入的失败状态。 - // 供 popup/content 显示错误进度。 - const failedState: CrawlTaskState = { - ...nextState, - status: 'failed', - steps: nextState.steps.map((step, index) => - index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step, - ), - }; - await setCrawlTaskState(failedState); - return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' }; - } -} - -/** - * 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 - */ -async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState): Promise { - if (!initialState.windowId) { - return; - } - - try { - // 新窗口中的目标标签页 ID。 - // 后续所有跳转和脚本注入都依赖它。 - const tabId = await getWindowActiveTabId(initialState.windowId); - - for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) { - // 当前正在执行的平台页面步骤配置。 - const step = platform.steps[stepIndex]; - - if (!(await isTaskRunning(initialState.id))) { - return; - } - - await markStepRunning(initialState.id, stepIndex); - await openStepPage(tabId, step.url); - - // 当前页面核心 DOM 是否已经出现。 - const isReady = await waitForStepReady(tabId, step); - - if (!isReady) { - await markStepFailed(initialState.id, stepIndex, '页面关键 DOM 未加载完成'); - await markTaskFailed(initialState.id); - return; - } - - // 注入页面执行后的字段抓取结果。 - const data = await scrapeStepFields(tabId, step); - console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, data); - await markStepSuccess(initialState.id, stepIndex); - } - - await markTaskCompleted(initialState.id); - } catch (error: unknown) { - console.error('[crawl] 执行失败', error); - await markTaskFailed(initialState.id, error instanceof Error ? error.message : '爬取执行失败'); - } -} - -/** - * 获取指定窗口中的活动 tab ID。 - */ -async function getWindowActiveTabId(windowId: number): Promise { - // 指定窗口中查询到的标签页列表。 - const tabs = await chrome.tabs.query({ windowId, active: true }); - // 当前窗口里用于承载爬取页面的活动标签页。 - const tab = tabs[0]; - - if (!tab?.id) { - throw new Error('未找到爬取窗口中的标签页'); - } - - return tab.id; -} - -/** - * 打开某个 steps 页面,并等待浏览器报告 tab 加载完成。 - */ -async function openStepPage(tabId: number, url: string): Promise { - await chrome.tabs.update(tabId, { url, active: true }); - await waitForTabLoaded(tabId); -} - -/** - * 等待 tab 完成页面加载。 - */ -function waitForTabLoaded(tabId: number): Promise { - return new Promise((resolve) => { - // 页面加载兜底定时器。 - // 避免某些站点不触发 complete 时流程永久挂起。 - const timeout = globalThis.setTimeout(() => { - chrome.tabs.onUpdated.removeListener(handleUpdated); - resolve(); - }, 15000); - - // chrome.tabs.onUpdated 的监听器。 - // 用于捕获指定 tab 的 complete 状态。 - function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) { - if (updatedTabId === tabId && changeInfo.status === 'complete') { - globalThis.clearTimeout(timeout); - chrome.tabs.onUpdated.removeListener(handleUpdated); - resolve(); - } - } - - chrome.tabs.onUpdated.addListener(handleUpdated); - }); -} - -/** - * 等待步骤配置中的 checkSelector 出现;第一次超时后刷新页面再重试一次。 - */ -async function waitForStepReady(tabId: number, step: PlatformStepConfig): Promise { - if (await waitForSelector(tabId, step.checkSelector, 5000)) { - return true; - } - - await chrome.tabs.reload(tabId); - await waitForTabLoaded(tabId); - return waitForSelector(tabId, step.checkSelector, 5000); -} - -/** - * 在目标页面轮询检查指定 selector 是否存在。 - */ -async function waitForSelector(tabId: number, selector: string, timeoutMs: number): Promise { - // 轮询开始时间,用于控制最大等待时长。 - const startedAt = Date.now(); - - while (Date.now() - startedAt < timeoutMs) { - // 当前页面是否已经能查询到目标元素。 - const exists = await checkSelectorExists(tabId, selector); - - if (exists) { - return true; - } - - await sleep(500); - } - - return false; -} - -/** - * 注入轻量脚本检查页面里是否存在指定 selector。 - */ -async function checkSelectorExists(tabId: number, selector: string): Promise { - // chrome.scripting.executeScript 返回的注入执行结果。 - const results = await chrome.scripting.executeScript({ - target: { tabId }, - func: (targetSelector: string) => Boolean(document.querySelector(targetSelector)), - args: [selector], - }); - - return Boolean(results[0]?.result); -} - -/** - * 注入 domScraper 到目标页面,并根据当前 step.fields 提取页面数据。 - */ -async function scrapeStepFields(tabId: number, step: PlatformStepConfig): Promise { - // 目标页面执行 DOM 抓取后返回的结果数组。 - const results = await chrome.scripting.executeScript({ - target: { tabId }, - func: scrapeDomFields, - args: [step.fields], - }); - - return results[0]?.result ?? null; -} - -/** - * 判断指定任务是否仍处于 running 状态。 - */ -async function isTaskRunning(taskId: string): Promise { - // 当前 storage 中的任务状态。 - const state = await getCrawlTaskState(); - return state?.id === taskId && state.status === 'running'; -} - -/** - * 将指定步骤标记为运行中,同时把其它未完成步骤保持为等待。 - */ -async function markStepRunning(taskId: string, stepIndex: number): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - currentStepIndex: stepIndex, - status: 'running', - steps: state.steps.map((step, index) => ({ - ...step, - status: index === stepIndex ? 'running' : step.status, - message: index === stepIndex ? undefined : step.message, - })), - })); -} - -/** - * 将指定步骤标记为成功。 - */ -async function markStepSuccess(taskId: string, stepIndex: number): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - steps: state.steps.map((step, index) => - index === stepIndex ? { ...step, status: 'success', message: undefined } : step, - ), - })); -} - -/** - * 将指定步骤标记为失败,并记录失败原因。 - */ -async function markStepFailed(taskId: string, stepIndex: number, message: string): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - currentStepIndex: stepIndex, - steps: state.steps.map((step, index) => - index === stepIndex ? { ...step, status: 'failed', message } : step, - ), - })); -} - -/** - * 将整个任务标记为完成。 - */ -async function markTaskCompleted(taskId: string): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - status: 'completed', - steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)), - })); -} - -/** - * 将整个任务标记为失败。 - */ -async function markTaskFailed(taskId: string, message = '爬取失败'): Promise { - await updateCrawlTaskState(taskId, (state) => ({ - ...state, - status: 'failed', - steps: state.steps.map((step, index) => - index === state.currentStepIndex && step.status === 'running' ? { ...step, status: 'failed', message } : step, - ), - })); -} - -/** - * 读取任务状态后执行不可变更新,避免覆盖已取消或已替换的任务。 - */ -async function updateCrawlTaskState( - taskId: string, - updater: (state: CrawlTaskState) => CrawlTaskState, -): Promise { - // 当前 storage 中最新的任务状态。 - const state = await getCrawlTaskState(); - - if (!state || state.id !== taskId || state.status === 'canceled') { - return; - } - - await setCrawlTaskState(updater(state)); -} - -/** - * 睡眠工具,用于轮询 DOM 等待。 - */ -function sleep(ms: number): Promise { - return new Promise((resolve) => { - globalThis.setTimeout(resolve, ms); - }); -} - -/** - * 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 - */ -async function cancelCrawl(): Promise { - // 当前保存的爬取任务状态。 - const state = await getCrawlTaskState(); - - if (!state) { - return { ok: true, data: null }; - } - - // 用户取消后的任务状态。 - // 当前执行步骤会显示为失败并附带取消原因。 - const canceledState: CrawlTaskState = { - ...state, - status: 'canceled', - steps: state.steps.map((step, index) => - index === state.currentStepIndex ? { ...step, status: 'failed', message: '用户已取消' } : step, - ), - }; - - await setCrawlTaskState(canceledState); - - if (state.windowId) { - await removeWindow(state.windowId); - } - - return { ok: true, data: canceledState }; -} - -/** - * 从 chrome.storage.local 读取当前爬取任务状态。 - */ -async function getCrawlTaskState(): Promise { - // chrome.storage.local 返回的原始键值对象。 - const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); - // 取出的任务状态候选值。 - // 需要经过结构校验后才能使用。 - const state = result[CRAWL_TASK_STORAGE_KEY]; - return isCrawlTaskState(state) ? state : null; -} - -/** - * 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 - */ -async function setCrawlTaskState(state: CrawlTaskState): Promise { - await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state }); -} - -/** - * 打开一个普通浏览器窗口承载目标平台页面。 - */ -function createCrawlWindow(url: string): Promise { - return new Promise((resolve, reject) => { - chrome.windows.create( - { - url, - type: 'popup', - focused: true, - width: 1280, - height: 900, - }, - (windowInfo) => { - // Chrome 扩展 API 回调中的运行时错误。 - const runtimeError = chrome.runtime.lastError; - - if (runtimeError) { - reject(new Error(runtimeError.message)); - return; - } - - if (!windowInfo?.id) { - reject(new Error('窗口创建失败')); - return; - } - - resolve(windowInfo); - }, - ); - }); -} - -/** - * 根据窗口 ID 关闭爬取窗口;关闭失败时不阻塞取消状态写入。 - */ -function removeWindow(windowId: number): Promise { - return new Promise((resolve) => { - chrome.windows.remove(windowId, () => { - resolve(); - }); - }); -} - -/** - * 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 - */ -function isCrawlTaskState(value: unknown): value is CrawlTaskState { - return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; -} +export { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service/lifecycle'; diff --git a/src/background/service/crawlTask.ts b/src/background/service/crawlTask.ts new file mode 100644 index 0000000..5b0f1f0 --- /dev/null +++ b/src/background/service/crawlTask.ts @@ -0,0 +1,304 @@ +import { getPlatformById } from '@/config/platforms'; +import type { CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types'; +import { scrapeDomFields, type DomScrapeResult } from '../domScraper'; +import type { CrawlStateResponse } from '../types'; +import { getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState'; + +/** + * 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 + */ +export async function startCrawl(platformId: string): Promise { + const platform = getPlatformById(platformId); + + if (!platform) { + return { ok: false, error: '平台配置不存在' }; + } + + const firstStep = platform.steps[0]; + + if (!firstStep) { + return { ok: false, error: '平台未配置爬取步骤' }; + } + + const startedAt = Date.now(); + const nextState: CrawlTaskState = { + id: `${platform.id}-${startedAt}`, + platformId: platform.id, + platformName: platform.name, + startedAt, + status: 'running', + currentStepIndex: 0, + steps: platform.steps.map((step, index) => ({ + name: step.name, + uniqueKey: step.uniqueKey, + status: index === 0 ? 'running' : 'pending', + })), + }; + + await setCrawlTaskState(nextState); + + try { + const windowInfo = await createCrawlWindow(firstStep.url); + const stateWithWindow = { ...nextState, windowId: windowInfo.id }; + + await setCrawlTaskState(stateWithWindow); + void runCrawlSteps(platform, stateWithWindow); + + return { ok: true, data: stateWithWindow }; + } catch (error: unknown) { + const failedState: CrawlTaskState = { + ...nextState, + status: 'failed', + steps: nextState.steps.map((step, index) => + index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step, + ), + }; + + await setCrawlTaskState(failedState); + return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' }; + } +} + +/** + * 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 + */ +export async function cancelCrawl(): Promise { + const state = await getCrawlTaskState(); + + if (!state) { + return { ok: true, data: null }; + } + + const canceledState: CrawlTaskState = { + ...state, + status: 'canceled', + steps: state.steps.map((step, index) => + index === state.currentStepIndex ? { ...step, status: 'failed', message: '用户已取消' } : step, + ), + }; + + await setCrawlTaskState(canceledState); + + if (state.windowId) { + await chrome.windows.remove(state.windowId).catch(() => undefined); + } + + return { ok: true, data: canceledState }; +} + +/** + * 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。 + */ +export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise { + const state = await getCrawlTaskState(); + + if (state?.windowId !== windowId || state.status !== 'running') { + return; + } + + await setCrawlTaskState({ + ...state, + status: 'canceled', + steps: state.steps.map((step, index) => + index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step, + ), + }); +} + +/** + * 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 + */ +async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState): Promise { + if (!initialState.windowId) { + return; + } + + try { + const tabId = await getWindowActiveTabId(initialState.windowId); + + for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) { + const step = platform.steps[stepIndex]; + const currentState = await getCrawlTaskState(); + + if (currentState?.id !== initialState.id || currentState.status !== 'running') { + return; + } + + await updateCrawlTaskState(initialState.id, (state) => ({ + ...state, + currentStepIndex: stepIndex, + status: 'running', + steps: state.steps.map((item, index) => ({ + ...item, + status: index === stepIndex ? 'running' : item.status, + message: index === stepIndex ? undefined : item.message, + })), + })); + + await chrome.tabs.update(tabId, { url: step.url, active: true }); + await waitForTabLoaded(tabId); + + const isReady = await waitForStepReady(tabId, step); + + if (!isReady) { + await updateCrawlTaskState(initialState.id, (state) => ({ + ...state, + status: 'failed', + currentStepIndex: stepIndex, + steps: state.steps.map((item, index) => + index === stepIndex ? { ...item, status: 'failed', message: '页面关键 DOM 未加载完成' } : item, + ), + })); + return; + } + + const data = await scrapeStepFields(tabId, step); + console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, data); + + await updateCrawlTaskState(initialState.id, (state) => ({ + ...state, + steps: state.steps.map((item, index) => + index === stepIndex ? { ...item, status: 'success', message: undefined } : item, + ), + })); + } + + await updateCrawlTaskState(initialState.id, (state) => ({ + ...state, + status: 'completed', + steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)), + })); + } catch (error: unknown) { + console.error('[crawl] 执行失败', error); + + await updateCrawlTaskState(initialState.id, (state) => ({ + ...state, + status: 'failed', + steps: state.steps.map((step, index) => + index === state.currentStepIndex && step.status === 'running' + ? { ...step, status: 'failed', message: error instanceof Error ? error.message : '爬取执行失败' } + : step, + ), + })); + } +} + +/** + * 获取指定窗口中的活动 tab ID。 + */ +async function getWindowActiveTabId(windowId: number): Promise { + const tabs = await chrome.tabs.query({ windowId, active: true }); + const tab = tabs[0]; + + if (!tab?.id) { + throw new Error('未找到爬取窗口中的标签页'); + } + + return tab.id; +} + +/** + * 等待步骤配置中的 checkSelector 出现;第一次超时后刷新页面再重试一次。 + */ +async function waitForStepReady(tabId: number, step: PlatformStepConfig): Promise { + if (await waitForSelector(tabId, step.checkSelector, 5000)) { + return true; + } + + await chrome.tabs.reload(tabId); + await waitForTabLoaded(tabId); + + return waitForSelector(tabId, step.checkSelector, 5000); +} + +/** + * 在目标页面轮询检查指定 selector 是否存在。 + */ +async function waitForSelector(tabId: number, selector: string, timeoutMs: number): Promise { + const startedAt = Date.now(); + + while (Date.now() - startedAt < timeoutMs) { + const results = await chrome.scripting.executeScript({ + target: { tabId }, + func: (targetSelector: string) => Boolean(document.querySelector(targetSelector)), + args: [selector], + }); + + if (Boolean(results[0]?.result)) { + return true; + } + + await new Promise((resolve) => { + globalThis.setTimeout(resolve, 500); + }); + } + + return false; +} + +/** + * 注入 domScraper 到目标页面,并根据当前 step.fields 提取页面数据。 + */ +async function scrapeStepFields(tabId: number, step: PlatformStepConfig): Promise { + const results = await chrome.scripting.executeScript({ + target: { tabId }, + func: scrapeDomFields, + args: [step.fields], + }); + + return results[0]?.result ?? null; +} + +/** + * 打开一个普通浏览器窗口承载目标平台页面。 + */ +function createCrawlWindow(url: string): Promise { + return new Promise((resolve, reject) => { + chrome.windows.create( + { + url, + type: 'popup', + focused: true, + width: 1280, + height: 900, + }, + (windowInfo) => { + const runtimeError = chrome.runtime.lastError; + + if (runtimeError) { + reject(new Error(runtimeError.message)); + return; + } + + if (!windowInfo?.id) { + reject(new Error('窗口创建失败')); + return; + } + + resolve(windowInfo); + }, + ); + }); +} + +/** + * 等待 tab 完成页面加载。 + */ +function waitForTabLoaded(tabId: number): Promise { + return new Promise((resolve) => { + const timeout = globalThis.setTimeout(() => { + chrome.tabs.onUpdated.removeListener(handleUpdated); + resolve(); + }, 15000); + + function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) { + if (updatedTabId === tabId && changeInfo.status === 'complete') { + globalThis.clearTimeout(timeout); + chrome.tabs.onUpdated.removeListener(handleUpdated); + resolve(); + } + } + + chrome.tabs.onUpdated.addListener(handleUpdated); + }); +} diff --git a/src/background/service/lifecycle.ts b/src/background/service/lifecycle.ts new file mode 100644 index 0000000..55bda5f --- /dev/null +++ b/src/background/service/lifecycle.ts @@ -0,0 +1,43 @@ +import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types'; +import { cancelCrawl, cancelCrawlWhenWindowRemoved, startCrawl } from './crawlTask'; +import { getCrawlTaskState } from './taskState'; + +/** + * 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 + */ +export async function handleInstalled(): Promise { + console.log('[background] installed'); +} + +/** + * 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 + */ +export async function handleStartup(): Promise { + console.log('[background] startup'); +} + +/** + * 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 + */ +export async function handleWindowRemoved(windowId: number): Promise { + console.log('[background] window removed', windowId); + await cancelCrawlWhenWindowRemoved(windowId); +} + +/** + * 根据 popup/content 发来的 action 分发到对应的后台处理函数。 + */ +export async function handleBackgroundCommand( + message: BackgroundCommand, +): Promise { + switch (message.action) { + case 'START_CRAWL': + return startCrawl(message.payload.platformId); + case 'GET_CRAWL_STATE': + return { ok: true, data: await getCrawlTaskState() }; + case 'CANCEL_CRAWL': + return cancelCrawl(); + default: + return { ok: false, error: '未知的后台指令' }; + } +} diff --git a/src/background/service/taskState.ts b/src/background/service/taskState.ts new file mode 100644 index 0000000..8f18259 --- /dev/null +++ b/src/background/service/taskState.ts @@ -0,0 +1,43 @@ +import type { CrawlTaskState } from '@/types'; + +// chrome.storage.local 中保存当前爬取任务状态的键名。 +const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; + +/** + * 从 chrome.storage.local 读取当前爬取任务状态。 + */ +export async function getCrawlTaskState(): Promise { + const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); + const state = result[CRAWL_TASK_STORAGE_KEY]; + return isCrawlTaskState(state) ? state : null; +} + +/** + * 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 + */ +export async function setCrawlTaskState(state: CrawlTaskState): Promise { + await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state }); +} + +/** + * 读取任务状态后执行不可变更新,避免覆盖已取消或已替换的任务。 + */ +export async function updateCrawlTaskState( + taskId: string, + updater: (state: CrawlTaskState) => CrawlTaskState, +): Promise { + const state = await getCrawlTaskState(); + + if (!state || state.id !== taskId || state.status === 'canceled') { + return; + } + + await setCrawlTaskState(updater(state)); +} + +/** + * 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 + */ +function isCrawlTaskState(value: unknown): value is CrawlTaskState { + return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; +} diff --git a/tsconfig.tsbuildinfo b/tsconfig.tsbuildinfo index d8fbc61..47401c4 100644 --- a/tsconfig.tsbuildinfo +++ b/tsconfig.tsbuildinfo @@ -1 +1 @@ -{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"} \ No newline at end of file +{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawltask.ts","./src/background/service/lifecycle.ts","./src/background/service/taskstate.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"} \ No newline at end of file