11
This commit is contained in:
@@ -1,40 +1,66 @@
|
||||
import { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service';
|
||||
import { broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage } from './service/externalBridge';
|
||||
import type { BackgroundCommand } from './types';
|
||||
import { cancelStaleCrawlWhenWindowMissing } from './service/crawlTask';
|
||||
import { getCrawlTaskState } from './service/taskState';
|
||||
import {broadcastCrawlStorageChange, handleExternalConnect, handleExternalMessage} from './service/externalBridge';
|
||||
import {MessageAction} from "@/shared/message";
|
||||
import {cancelCrawl, startCrawl} from "./task/crawlTask";
|
||||
import {getCrawlTaskState} from "./task/taskState";
|
||||
|
||||
chrome.runtime.onInstalled.addListener(() => {
|
||||
void handleInstalled();
|
||||
});
|
||||
|
||||
chrome.runtime.onStartup.addListener(() => {
|
||||
void handleStartup();
|
||||
});
|
||||
|
||||
chrome.runtime.onMessage.addListener((message: BackgroundCommand | { action?: string }, sender, sendResponse) => {
|
||||
if (message && typeof message === 'object' && message.action === 'GET_CRAWL_STATE_FOR_TAB') {
|
||||
void (async () => {
|
||||
await cancelStaleCrawlWhenWindowMissing();
|
||||
const state = await getCrawlTaskState();
|
||||
const tabId = sender.tab?.id;
|
||||
if (state && typeof tabId === 'number' && state.tabId === tabId) {
|
||||
sendResponse({ ok: true, data: state });
|
||||
return;
|
||||
/**
|
||||
* 接受popup的指令
|
||||
*/
|
||||
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
|
||||
// 1. 统一提取 action 和 payload
|
||||
const action = message.action as MessageAction;
|
||||
const payload = message.payload;
|
||||
|
||||
// 2. 使用一个异步立即执行函数来处理逻辑
|
||||
(async () => {
|
||||
try {
|
||||
let resultData: any = null;
|
||||
|
||||
// 3. 根据 action 分发任务
|
||||
switch (action) {
|
||||
case "START_CRAWL":
|
||||
resultData = await startCrawl(payload.platformId);
|
||||
break;
|
||||
|
||||
case "GET_CRAWL_STATE":
|
||||
resultData = await getCrawlTaskState();
|
||||
break;
|
||||
|
||||
case "CANCEL_CRAWL":
|
||||
await cancelCrawl()
|
||||
break;
|
||||
default:
|
||||
throw new Error(`未知的后台指令: ${action}`);
|
||||
}
|
||||
|
||||
sendResponse({ok: true, data: resultData});
|
||||
|
||||
} catch (error: any) {
|
||||
console.error(`[Background] Action ${action} failed:`, error);
|
||||
sendResponse({ok: false, error: error.message || 'Unknown error'});
|
||||
}
|
||||
sendResponse({ ok: true, data: null });
|
||||
})();
|
||||
return true;
|
||||
}
|
||||
|
||||
void handleBackgroundMessage(message as BackgroundCommand, sendResponse);
|
||||
return true;
|
||||
});
|
||||
|
||||
/**
|
||||
* 监听窗口关闭:
|
||||
* 用户手动关掉爬虫窗口时,自动触发任务清理逻辑(取消任务、停掉后台循环)。
|
||||
*/
|
||||
chrome.windows.onRemoved.addListener((windowId) => {
|
||||
void handleWindowRemoved(windowId);
|
||||
});
|
||||
|
||||
/**
|
||||
* 接收外部网页消息:
|
||||
* 允许在 manifest.json 中授权的官网域名(如 your-app.com)直接调起插件功能。
|
||||
*/
|
||||
chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) => {
|
||||
void handleExternalMessage(message).then(sendResponse).catch((error: unknown) => {
|
||||
sendResponse({
|
||||
@@ -43,27 +69,19 @@ chrome.runtime.onMessageExternal.addListener((message, _sender, sendResponse) =>
|
||||
});
|
||||
});
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
chrome.runtime.onConnectExternal.addListener(handleExternalConnect);
|
||||
|
||||
chrome.storage.onChanged.addListener((changes, areaName) => {
|
||||
broadcastCrawlStorageChange(changes, areaName);
|
||||
return true; // 保持异步响应通道开启
|
||||
});
|
||||
|
||||
/**
|
||||
* Wrap background command handling so async errors can still be returned to callers.
|
||||
* 处理外部长连接:
|
||||
* 用于官网页面与插件后台建立持久通信,实现实时的数据流同步。
|
||||
*/
|
||||
async function handleBackgroundMessage(
|
||||
message: BackgroundCommand,
|
||||
sendResponse: (response?: unknown) => void,
|
||||
) {
|
||||
try {
|
||||
const result = await handleBackgroundCommand(message);
|
||||
sendResponse(result);
|
||||
} catch (error: unknown) {
|
||||
const messageText = error instanceof Error ? error.message : 'Unknown error';
|
||||
sendResponse({ ok: false, data: null, error: messageText });
|
||||
}
|
||||
}
|
||||
chrome.runtime.onConnectExternal.addListener(handleExternalConnect);
|
||||
|
||||
/**
|
||||
* 监听存储变化:
|
||||
* 只要插件的本地数据(storage)发生改动,就立即广播给所有 UI(Popup/网页),实现进度条同步。
|
||||
*/
|
||||
chrome.storage.onChanged.addListener((changes, areaName) => {
|
||||
broadcastCrawlStorageChange(changes, areaName);
|
||||
});
|
||||
@@ -1 +0,0 @@
|
||||
export { handleBackgroundCommand, handleInstalled, handleStartup, handleWindowRemoved } from './service/lifecycle';
|
||||
@@ -1,671 +0,0 @@
|
||||
import { getPlatformById } from '@/config/platforms';
|
||||
import type { CrawlPauseInfo, CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types';
|
||||
import type { DomScrapeResult } from '../domScraper';
|
||||
import type { CrawlStateResponse } from '../types';
|
||||
import { clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState';
|
||||
|
||||
interface PageRunnerResponse {
|
||||
ok: boolean;
|
||||
data?: DomScrapeResult | null;
|
||||
interrupt?: CrawlPauseInfo;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
const activeCrawlControllers = new Map<string, AbortController>();
|
||||
const autoCloseTimers = new Map<string, number>();
|
||||
const DEFAULT_AUTOCLOSE_DELAY_MS = 10_000;
|
||||
|
||||
/**
|
||||
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
|
||||
*/
|
||||
export async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
||||
const platform = getPlatformById(platformId);
|
||||
const currentState = await getCrawlTaskState();
|
||||
|
||||
if (currentState && ['running', 'paused'].includes(currentState.status)) {
|
||||
return { ok: true, data: currentState };
|
||||
}
|
||||
|
||||
if (!platform) {
|
||||
return { ok: false, error: '平台配置不存在' };
|
||||
}
|
||||
|
||||
const firstStep = platform.steps[0];
|
||||
|
||||
if (!firstStep) {
|
||||
return { ok: false, error: '平台未配置爬取步骤' };
|
||||
}
|
||||
|
||||
const startedAt = Date.now();
|
||||
const nextState: CrawlTaskState = {
|
||||
id: `${platform.id}-${startedAt}`,
|
||||
platformId: platform.id,
|
||||
platformName: platform.name,
|
||||
startedAt,
|
||||
status: 'running',
|
||||
currentStepIndex: 0,
|
||||
steps: platform.steps.map<CrawlProgressStep>((step, index) => ({
|
||||
name: step.name,
|
||||
uniqueKey: step.uniqueKey,
|
||||
status: index === 0 ? 'running' : 'pending',
|
||||
})),
|
||||
};
|
||||
|
||||
await setCrawlTaskState(nextState);
|
||||
|
||||
try {
|
||||
const windowInfo = await createCrawlWindow(firstStep.url);
|
||||
let tabId: number | undefined;
|
||||
try {
|
||||
if (windowInfo.id) {
|
||||
tabId = await getWindowActiveTabId(windowInfo.id);
|
||||
}
|
||||
} catch {
|
||||
tabId = undefined;
|
||||
}
|
||||
|
||||
const stateWithWindow = { ...nextState, windowId: windowInfo.id, tabId };
|
||||
const controller = new AbortController();
|
||||
|
||||
await setCrawlTaskState(stateWithWindow);
|
||||
activeCrawlControllers.set(stateWithWindow.id, controller);
|
||||
void runCrawlSteps(platform, stateWithWindow, controller.signal).finally(() => {
|
||||
activeCrawlControllers.delete(stateWithWindow.id);
|
||||
});
|
||||
|
||||
return { ok: true, data: stateWithWindow };
|
||||
} catch (error: unknown) {
|
||||
const failedState: CrawlTaskState = {
|
||||
...nextState,
|
||||
status: 'failed',
|
||||
steps: nextState.steps.map((step, index) =>
|
||||
index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step,
|
||||
),
|
||||
};
|
||||
|
||||
await setCrawlTaskState(failedState);
|
||||
return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。
|
||||
*/
|
||||
export async function cancelCrawl(): Promise<CrawlStateResponse> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state) {
|
||||
return { ok: true, data: null };
|
||||
}
|
||||
|
||||
abortActiveCrawl(state.id);
|
||||
clearAutoCloseTimer(state.id);
|
||||
|
||||
const canceledState: CrawlTaskState = {
|
||||
...state,
|
||||
status: 'canceled',
|
||||
autocloseAt: state.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === state.currentStepIndex && step.status === 'running'
|
||||
? { ...step, status: 'failed', message: '用户取消爬取任务' }
|
||||
: step,
|
||||
),
|
||||
};
|
||||
|
||||
await setCrawlTaskState(canceledState);
|
||||
|
||||
if (canceledState.windowId) {
|
||||
scheduleAutoCloseWindow(canceledState.id, canceledState.windowId, canceledState.autocloseAt);
|
||||
}
|
||||
|
||||
return { ok: true, data: canceledState };
|
||||
}
|
||||
|
||||
/**
|
||||
* 用户处理完登录、验证码或风控后,恢复当前暂停中的爬取任务。
|
||||
*/
|
||||
export async function resumeCrawl(): Promise<CrawlStateResponse> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state || state.status !== 'paused') {
|
||||
return { ok: true, data: state };
|
||||
}
|
||||
|
||||
const resumedState: CrawlTaskState = {
|
||||
...state,
|
||||
status: 'running',
|
||||
pause: undefined,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === state.currentStepIndex ? { ...step, status: 'running', message: undefined } : step,
|
||||
),
|
||||
};
|
||||
|
||||
await setCrawlTaskState(resumedState);
|
||||
return { ok: true, data: resumedState };
|
||||
}
|
||||
|
||||
/**
|
||||
* 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。
|
||||
*/
|
||||
export async function cancelCrawlWhenWindowRemoved(windowId: number): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (state?.windowId !== windowId || !['running', 'paused'].includes(state.status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
abortActiveCrawl(state.id);
|
||||
clearAutoCloseTimer(state.id);
|
||||
|
||||
await setCrawlTaskState({
|
||||
...state,
|
||||
status: 'canceled',
|
||||
autocloseAt: null,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
export async function cancelStaleCrawlWhenWindowMissing(): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state || !['running', 'paused'].includes(state.status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const isWindowAlive = state.windowId ? await hasWindow(state.windowId) : false;
|
||||
|
||||
if (isWindowAlive) {
|
||||
return;
|
||||
}
|
||||
|
||||
abortActiveCrawl(state.id);
|
||||
clearAutoCloseTimer(state.id);
|
||||
|
||||
await setCrawlTaskState({
|
||||
...state,
|
||||
status: 'canceled',
|
||||
autocloseAt: null,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭,任务已取消' } : step,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
function abortActiveCrawl(taskId: string): void {
|
||||
activeCrawlControllers.get(taskId)?.abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消终态自动关窗(overlay“保持打开”)。
|
||||
*/
|
||||
export async function cancelAutoclose(): Promise<CrawlStateResponse> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state) {
|
||||
return { ok: true, data: null };
|
||||
}
|
||||
|
||||
clearAutoCloseTimer(state.id);
|
||||
|
||||
const nextState: CrawlTaskState = {
|
||||
...state,
|
||||
autocloseAt: null,
|
||||
};
|
||||
|
||||
await setCrawlTaskState(nextState);
|
||||
return { ok: true, data: nextState };
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理当前任务快照(popup 的 Close/Dismiss)。不强制关窗,只影响 UI。
|
||||
*/
|
||||
export async function dismissCrawl(): Promise<CrawlStateResponse> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state) {
|
||||
return { ok: true, data: null };
|
||||
}
|
||||
|
||||
clearAutoCloseTimer(state.id);
|
||||
await clearCrawlTaskState();
|
||||
return { ok: true, data: null };
|
||||
}
|
||||
|
||||
function scheduleAutoCloseWindow(taskId: string, windowId: number, autocloseAt?: number | null): void {
|
||||
if (!autocloseAt) {
|
||||
return;
|
||||
}
|
||||
|
||||
clearAutoCloseTimer(taskId);
|
||||
|
||||
const delayMs = Math.max(0, autocloseAt - Date.now());
|
||||
const timer = setTimeout(() => {
|
||||
autoCloseTimers.delete(taskId);
|
||||
chrome.windows.remove(windowId).catch(() => undefined);
|
||||
}, delayMs) as unknown as number;
|
||||
|
||||
autoCloseTimers.set(taskId, timer);
|
||||
}
|
||||
|
||||
function clearAutoCloseTimer(taskId: string): void {
|
||||
const timer = autoCloseTimers.get(taskId);
|
||||
if (timer === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
clearTimeout(timer);
|
||||
autoCloseTimers.delete(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。
|
||||
*/
|
||||
async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState, signal: AbortSignal): Promise<void> {
|
||||
if (!initialState.windowId) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
|
||||
const step = platform.steps[stepIndex];
|
||||
let shouldRetryStep = true;
|
||||
|
||||
while (shouldRetryStep) {
|
||||
const currentState = await getCrawlTaskState();
|
||||
|
||||
if (signal.aborted || currentState?.id !== initialState.id || currentState.status === 'canceled') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (currentState.status === 'paused') {
|
||||
const resumed = await waitUntilResumed(initialState.id, signal);
|
||||
|
||||
if (!resumed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||
...state,
|
||||
currentStepIndex: stepIndex,
|
||||
status: 'running',
|
||||
pause: undefined,
|
||||
steps: state.steps.map((item, index) => ({
|
||||
...item,
|
||||
status: index === stepIndex ? 'running' : item.status,
|
||||
message: index === stepIndex ? undefined : item.message,
|
||||
})),
|
||||
}));
|
||||
|
||||
const tabId = await getWindowActiveTabId(initialState.windowId);
|
||||
await chrome.tabs.update(tabId, { url: step.url, active: true });
|
||||
const tabLoaded = await waitForTabLoaded(tabId, signal);
|
||||
|
||||
if (!tabLoaded || signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await scrapeStepInContent(tabId, step, signal);
|
||||
|
||||
if (signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (response.interrupt) {
|
||||
await pauseForInterrupt(initialState.id, stepIndex, response.interrupt);
|
||||
const resumed = await waitUntilResumed(initialState.id, signal);
|
||||
|
||||
if (!resumed) {
|
||||
return;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const message = response.error ?? '页面抓取失败';
|
||||
|
||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||
...state,
|
||||
status: 'failed',
|
||||
currentStepIndex: stepIndex,
|
||||
steps: state.steps.map((item, index) =>
|
||||
index === stepIndex ? { ...item, status: 'failed', message } : item,
|
||||
),
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, response.data);
|
||||
|
||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||
...state,
|
||||
steps: state.steps.map((item, index) =>
|
||||
index === stepIndex
|
||||
? { ...item, status: 'success', message: undefined, result: response.data }
|
||||
: item,
|
||||
),
|
||||
}));
|
||||
|
||||
shouldRetryStep = false;
|
||||
}
|
||||
}
|
||||
|
||||
const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null;
|
||||
|
||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||
...state,
|
||||
status: 'completed',
|
||||
autocloseAt,
|
||||
steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)),
|
||||
}));
|
||||
|
||||
if (initialState.windowId) {
|
||||
scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt);
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
console.error('[crawl] 执行失败', error);
|
||||
|
||||
const autocloseAt = initialState.windowId ? Date.now() + DEFAULT_AUTOCLOSE_DELAY_MS : null;
|
||||
|
||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||
...state,
|
||||
status: 'failed',
|
||||
autocloseAt,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === state.currentStepIndex && step.status === 'running'
|
||||
? { ...step, status: 'failed', message: error instanceof Error ? error.message : '爬取执行失败' }
|
||||
: step,
|
||||
),
|
||||
}));
|
||||
|
||||
if (initialState.windowId) {
|
||||
scheduleAutoCloseWindow(initialState.id, initialState.windowId, autocloseAt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定窗口中的活动 tab ID。
|
||||
*/
|
||||
async function getWindowActiveTabId(windowId: number): Promise<number> {
|
||||
const tabs = await chrome.tabs.query({ windowId, active: true });
|
||||
const tab = tabs[0];
|
||||
|
||||
if (!tab?.id) {
|
||||
throw new Error('未找到爬取窗口中的标签页');
|
||||
}
|
||||
|
||||
return tab.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* 让 content script 直接在目标页面执行检查和抓取。
|
||||
*/
|
||||
async function scrapeStepInContent(
|
||||
tabId: number,
|
||||
step: PlatformStepConfig,
|
||||
signal: AbortSignal,
|
||||
): Promise<PageRunnerResponse> {
|
||||
const startedAt = Date.now();
|
||||
|
||||
while (Date.now() - startedAt < 20000) {
|
||||
if (signal.aborted) {
|
||||
return { ok: false, error: 'canceled' };
|
||||
}
|
||||
|
||||
const response = await sendPageRunnerMessage(tabId, {
|
||||
action: 'SCRAPE_STEP',
|
||||
payload: {
|
||||
fields: step.fields,
|
||||
checkSelector: step.checkSelector,
|
||||
},
|
||||
}, signal);
|
||||
|
||||
if (response.ok || response.interrupt || !isPageRunnerNotReadyError(response.error)) {
|
||||
return response;
|
||||
}
|
||||
|
||||
if (!(await sleep(500, signal))) {
|
||||
return { ok: false, error: 'canceled' };
|
||||
}
|
||||
}
|
||||
|
||||
return { ok: false, error: '页面脚本未响应,请刷新扩展后重试' };
|
||||
}
|
||||
|
||||
/**
|
||||
* 给目标页的 content script 发送页面执行消息。
|
||||
*/
|
||||
async function sendPageRunnerMessage(tabId: number, message: unknown, signal: AbortSignal): Promise<PageRunnerResponse> {
|
||||
if (signal.aborted) {
|
||||
return { ok: false, error: 'canceled' };
|
||||
}
|
||||
|
||||
return raceWithAbort(sendPageRunnerMessageOnce(tabId, message), signal);
|
||||
}
|
||||
|
||||
async function sendPageRunnerMessageOnce(tabId: number, message: unknown): Promise<PageRunnerResponse> {
|
||||
try {
|
||||
const response = await chrome.tabs.sendMessage(tabId, message);
|
||||
|
||||
if (response && typeof response === 'object') {
|
||||
return response as PageRunnerResponse;
|
||||
}
|
||||
|
||||
return { ok: false, error: '页面脚本返回为空' };
|
||||
} catch (error: unknown) {
|
||||
return { ok: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断错误是否只是 content script 尚未注入完成。
|
||||
*/
|
||||
function isPageRunnerNotReadyError(error?: string): boolean {
|
||||
if (!error) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return /receiving end does not exist|could not establish connection|no receiving end/i.test(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* 因登录、验证码或页面异常暂停当前任务。
|
||||
*/
|
||||
async function pauseForInterrupt(taskId: string, stepIndex: number, interrupt: CrawlPauseInfo): Promise<void> {
|
||||
await updateCrawlTaskState(taskId, (state) => ({
|
||||
...state,
|
||||
status: 'paused',
|
||||
pause: interrupt,
|
||||
currentStepIndex: stepIndex,
|
||||
steps: state.steps.map((step, index) =>
|
||||
index === stepIndex ? { ...step, status: 'running', message: interrupt.message } : step,
|
||||
),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 暂停后等待用户点继续或取消。
|
||||
*/
|
||||
async function waitUntilResumed(taskId: string, signal: AbortSignal): Promise<boolean> {
|
||||
while (true) {
|
||||
if (signal.aborted) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state || state.id !== taskId || state.status === 'canceled' || state.status === 'failed') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (state.status === 'running') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(await sleep(1000, signal))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 打开一个普通浏览器窗口承载目标平台页面。
|
||||
*/
|
||||
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.windows.create(
|
||||
{
|
||||
url,
|
||||
type: 'popup',
|
||||
focused: false,
|
||||
state: 'normal',
|
||||
width: 1280,
|
||||
height: 900,
|
||||
},
|
||||
(windowInfo) => {
|
||||
const runtimeError = chrome.runtime.lastError;
|
||||
|
||||
if (runtimeError) {
|
||||
reject(new Error(runtimeError.message));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!windowInfo?.id) {
|
||||
reject(new Error('窗口创建失败'));
|
||||
return;
|
||||
}
|
||||
|
||||
void chrome.windows.update(windowInfo.id, { drawAttention: true }).catch(() => undefined);
|
||||
resolve(windowInfo);
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 等待 tab 完成页面加载。
|
||||
*/
|
||||
function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
if (signal.aborted) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = globalThis.setTimeout(() => {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}, 15000);
|
||||
|
||||
function cleanup() {
|
||||
globalThis.clearTimeout(timeout);
|
||||
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
||||
signal.removeEventListener('abort', handleAbort);
|
||||
}
|
||||
|
||||
function handleAbort() {
|
||||
cleanup();
|
||||
resolve(false);
|
||||
}
|
||||
|
||||
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
|
||||
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}
|
||||
}
|
||||
|
||||
chrome.tabs.onUpdated.addListener(handleUpdated);
|
||||
signal.addEventListener('abort', handleAbort, { once: true });
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单等待工具。
|
||||
*/
|
||||
async function hasWindow(windowId: number): Promise<boolean> {
|
||||
try {
|
||||
await chrome.windows.get(windowId);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function raceWithAbort<T>(promise: Promise<T>, signal: AbortSignal): Promise<T> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (signal.aborted) {
|
||||
resolve({ ok: false, error: 'canceled' } as T);
|
||||
return;
|
||||
}
|
||||
|
||||
let isSettled = false;
|
||||
|
||||
function cleanup() {
|
||||
signal.removeEventListener('abort', handleAbort);
|
||||
}
|
||||
|
||||
function handleAbort() {
|
||||
if (isSettled) {
|
||||
return;
|
||||
}
|
||||
|
||||
isSettled = true;
|
||||
cleanup();
|
||||
resolve({ ok: false, error: 'canceled' } as T);
|
||||
}
|
||||
|
||||
signal.addEventListener('abort', handleAbort, { once: true });
|
||||
|
||||
promise.then(
|
||||
(value) => {
|
||||
if (isSettled) {
|
||||
return;
|
||||
}
|
||||
|
||||
isSettled = true;
|
||||
cleanup();
|
||||
resolve(value);
|
||||
},
|
||||
(error) => {
|
||||
if (isSettled) {
|
||||
return;
|
||||
}
|
||||
|
||||
isSettled = true;
|
||||
cleanup();
|
||||
reject(error);
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function sleep(ms: number, signal?: AbortSignal): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
if (signal?.aborted) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = globalThis.setTimeout(() => {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}, ms);
|
||||
|
||||
function cleanup() {
|
||||
globalThis.clearTimeout(timeout);
|
||||
signal?.removeEventListener('abort', handleAbort);
|
||||
}
|
||||
|
||||
function handleAbort() {
|
||||
cleanup();
|
||||
resolve(false);
|
||||
}
|
||||
|
||||
signal?.addEventListener('abort', handleAbort, { once: true });
|
||||
});
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import { platformConfigs } from '@/config/platforms';
|
||||
import type { CrawlTaskState } from '@/types';
|
||||
import { cancelCrawl, startCrawl } from './crawlTask';
|
||||
import { getCrawlTaskState } from './taskState';
|
||||
import {getCrawlTaskState} from "@/background/task/taskState";
|
||||
import {cancelCrawl, startCrawl} from "@/background/task/crawlTask";
|
||||
|
||||
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
||||
const EXTERNAL_PORT_NAME = 'DIANSHAN_CRAWL';
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types';
|
||||
import {
|
||||
cancelAutoclose,
|
||||
cancelCrawl,
|
||||
cancelCrawlWhenWindowRemoved,
|
||||
cancelStaleCrawlWhenWindowMissing,
|
||||
dismissCrawl,
|
||||
resumeCrawl,
|
||||
startCrawl,
|
||||
} from './crawlTask';
|
||||
import { getCrawlTaskState } from './taskState';
|
||||
|
||||
/**
|
||||
* 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。
|
||||
*/
|
||||
export async function handleInstalled(): Promise<void> {
|
||||
console.log('[background] installed');
|
||||
}
|
||||
|
||||
/**
|
||||
* 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。
|
||||
*/
|
||||
export async function handleStartup(): Promise<void> {
|
||||
console.log('[background] startup');
|
||||
await cancelStaleCrawlWhenWindowMissing();
|
||||
}
|
||||
|
||||
/**
|
||||
* 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。
|
||||
*/
|
||||
export async function handleWindowRemoved(windowId: number): Promise<void> {
|
||||
console.log('[background] window removed', windowId);
|
||||
await cancelCrawlWhenWindowRemoved(windowId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 popup/content 发来的 action 分发到对应的后台处理函数。
|
||||
*/
|
||||
export async function handleBackgroundCommand(
|
||||
message: BackgroundCommand,
|
||||
): Promise<BackgroundResponse | CrawlStateResponse> {
|
||||
switch (message.action) {
|
||||
case 'START_CRAWL':
|
||||
return startCrawl(message.payload.platformId);
|
||||
case 'GET_CRAWL_STATE':
|
||||
await cancelStaleCrawlWhenWindowMissing();
|
||||
return { ok: true, data: await getCrawlTaskState() };
|
||||
case 'CANCEL_CRAWL':
|
||||
return cancelCrawl();
|
||||
case 'RESUME_CRAWL':
|
||||
return resumeCrawl();
|
||||
case 'CANCEL_AUTOCLOSE':
|
||||
return cancelAutoclose();
|
||||
case 'DISMISS_CRAWL':
|
||||
return dismissCrawl();
|
||||
default:
|
||||
return { ok: false, error: '未知的后台指令' };
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
import type { CrawlTaskState } from '@/types';
|
||||
|
||||
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
||||
|
||||
export async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
||||
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
||||
const state = result[CRAWL_TASK_STORAGE_KEY];
|
||||
return isCrawlTaskState(state) ? state : null;
|
||||
}
|
||||
|
||||
export async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
||||
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
|
||||
broadcastToCrawlTab(state);
|
||||
}
|
||||
|
||||
export async function clearCrawlTaskState(): Promise<void> {
|
||||
await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY);
|
||||
}
|
||||
|
||||
export async function updateCrawlTaskState(
|
||||
taskId: string,
|
||||
updater: (state: CrawlTaskState) => CrawlTaskState,
|
||||
): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state || state.id !== taskId || state.status === 'canceled') {
|
||||
return;
|
||||
}
|
||||
|
||||
await setCrawlTaskState(updater(state));
|
||||
}
|
||||
|
||||
function broadcastToCrawlTab(state: CrawlTaskState): void {
|
||||
if (!state.tabId) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
void chrome.tabs.sendMessage(state.tabId, { type: 'crawl_state_update', state }).catch(() => undefined);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
|
||||
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
|
||||
}
|
||||
147
src/background/task/crawlTask.ts
Normal file
147
src/background/task/crawlTask.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import {getPlatformById} from "@/config/platforms";
|
||||
import {CrawlTaskState, PlatformStepConfig} from "@/types";
|
||||
import {openSingleTabWindow, scrapeStepInContent, sleep, waitForTabLoaded} from "@/background/task/helper";
|
||||
import {clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState} from "./taskState";
|
||||
|
||||
|
||||
const activeCrawlControllers = new Map<string, AbortController>();
|
||||
|
||||
/**
|
||||
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
|
||||
* @param platformId 平台id
|
||||
*/
|
||||
export async function startCrawl(platformId: string): Promise<any> {
|
||||
const platform = getPlatformById(platformId);
|
||||
if (!platform) {
|
||||
return {error: '平台配置不存在'};
|
||||
}
|
||||
|
||||
//打开窗口
|
||||
let windowInfo = await openSingleTabWindow(platform.steps[0].url)
|
||||
//初始化数据
|
||||
const startedAt = Date.now();
|
||||
const nextState: CrawlTaskState = {
|
||||
id: `${platform.id}-${startedAt}`,
|
||||
windowId: windowInfo.windowId,
|
||||
tabId: windowInfo.tabId,
|
||||
platformId: platform.id,
|
||||
platformName: platform.name,
|
||||
startedAt,
|
||||
status: 'running',
|
||||
currentStepIndex: 0,
|
||||
steps: platform.steps.map((item, index) => {
|
||||
return {
|
||||
name: item.name,
|
||||
uniqueKey: item.uniqueKey,
|
||||
status: index === 0 ? 'running' : 'pending',
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
await setCrawlTaskState(nextState);
|
||||
|
||||
//写入任务,用于取消
|
||||
const controller = new AbortController();
|
||||
activeCrawlControllers.set(nextState.id, controller);
|
||||
//启动
|
||||
void runCrawlSteps(nextState.id, nextState.tabId!, platform.steps, controller.signal).finally(() => {
|
||||
activeCrawlControllers.delete(nextState.id);
|
||||
});
|
||||
//自动开始爬取
|
||||
return nextState
|
||||
}
|
||||
|
||||
/**
|
||||
* 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。
|
||||
* @param steps 平台步骤配置
|
||||
* @param signal 中断信号
|
||||
*/
|
||||
/**
|
||||
* 执行器
|
||||
*/
|
||||
async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepConfig[], signal: AbortSignal) {
|
||||
for (let i = 0; i < steps.length; i += 1) {
|
||||
const step = steps[i];
|
||||
let shouldRetryStep = true;
|
||||
|
||||
// 【修改 2】进入新步骤,立刻更新状态机里的索引和步骤状态
|
||||
await updateCrawlTaskState(taskId, s => ({
|
||||
...s,
|
||||
currentStepIndex: i,
|
||||
steps: s.steps.map((stepItem, idx) => ({
|
||||
...stepItem,
|
||||
status: idx === i ? 'running' : stepItem.status
|
||||
}))
|
||||
}));
|
||||
|
||||
while (shouldRetryStep) {
|
||||
if (signal.aborted) return;
|
||||
|
||||
// 1. 等待网页加载
|
||||
await chrome.tabs.update(tabId, {url: step.url, active: true});
|
||||
const loaded = await waitForTabLoaded(tabId, signal);
|
||||
if (!loaded) return;
|
||||
|
||||
// 2. 检测撞盾/抓取
|
||||
const res: any = await scrapeStepInContent(tabId, step, signal);
|
||||
if (signal.aborted) return;
|
||||
|
||||
// 3. 处理中断(验证码等)
|
||||
if (res.interrupt) {
|
||||
await updateCrawlTaskState(taskId, s => ({...s, status: 'paused', pause: res.interrupt}));
|
||||
|
||||
// 死等恢复
|
||||
while ((await getCrawlTaskState())?.status === 'paused') {
|
||||
if (signal.aborted) return;
|
||||
if (!(await sleep(1000, signal))) return;
|
||||
}
|
||||
continue; // 恢复后重新触发 while 循环(重刷页面)
|
||||
}
|
||||
|
||||
// 4. 处理结果
|
||||
if (res.ok) {
|
||||
await updateCrawlTaskState(taskId, s => ({
|
||||
...s,
|
||||
steps: s.steps.map((item, idx) =>
|
||||
idx === i ? {...item, status: 'success', result: res.data} : item
|
||||
)
|
||||
}));
|
||||
shouldRetryStep = false; // 退出 while,准备进下一个 for 循环步骤
|
||||
} else {
|
||||
// 抓取失败重试
|
||||
if (!(await sleep(2000, signal))) return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 【修改 3】全部步骤完成,标记任务结束
|
||||
await updateCrawlTaskState(taskId, s => ({...s, status: 'completed'}));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。
|
||||
*/
|
||||
export async function cancelCrawl() {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
if (!state) return
|
||||
|
||||
// 立即触发 Abort 信号,让脚本自动停止
|
||||
const controller = activeCrawlControllers.get(state.id);
|
||||
if (controller) {
|
||||
controller.abort();
|
||||
activeCrawlControllers.delete(state.id);
|
||||
}
|
||||
|
||||
//清楚缓存
|
||||
await clearCrawlTaskState();
|
||||
|
||||
//关闭窗口
|
||||
if (state.windowId) {
|
||||
chrome.windows.remove(state.windowId).catch(() => {
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
171
src/background/task/helper.ts
Normal file
171
src/background/task/helper.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
import {CrawlPauseInfo, PlatformStepConfig} from "@/types";
|
||||
import {DomScrapeResult} from "@/background/domScraper";
|
||||
|
||||
/**
|
||||
* 打开一个纯净的单标签窗口,并提醒用户注意
|
||||
* @param url 目标网址
|
||||
*/
|
||||
export async function openSingleTabWindow(url: string) {
|
||||
return new Promise<{ windowId: number; tabId: number }>((resolve, reject) => {
|
||||
chrome.windows.create({
|
||||
url,
|
||||
type: 'popup',
|
||||
width: 1260,
|
||||
height: 900,
|
||||
focused: true // 初始设为聚焦,方便窗口弹出
|
||||
}, (win) => {
|
||||
// 1. 检查创建是否报错
|
||||
if (chrome.runtime.lastError) {
|
||||
return reject(new Error(chrome.runtime.lastError.message));
|
||||
}
|
||||
|
||||
if (win?.id && win.tabs?.[0]?.id) {
|
||||
// 2. 让窗口在任务栏“闪烁”,提醒用户(比如处理登录或验证码)
|
||||
// 使用 void 表示不等待结果,catch 防止窗口意外关闭导致崩溃
|
||||
void chrome.windows.update(win.id, { drawAttention: true }).catch(() => {});
|
||||
|
||||
// 3. 返回双 ID 供后续爬取逻辑使用
|
||||
resolve({
|
||||
windowId: win.id,
|
||||
tabId: win.tabs[0].id
|
||||
});
|
||||
} else {
|
||||
reject(new Error('窗口初始化失败'));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 等待指定的标签页加载完成
|
||||
* @param tabId 标签页ID
|
||||
* @param signal 中断信号
|
||||
*/
|
||||
export function waitForTabLoaded(tabId: number, signal: AbortSignal): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
if (signal.aborted) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = globalThis.setTimeout(() => {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}, 15000);
|
||||
|
||||
function cleanup() {
|
||||
globalThis.clearTimeout(timeout);
|
||||
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
||||
signal.removeEventListener('abort', handleAbort);
|
||||
}
|
||||
|
||||
function handleAbort() {
|
||||
cleanup();
|
||||
resolve(false);
|
||||
}
|
||||
|
||||
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
|
||||
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}
|
||||
}
|
||||
|
||||
chrome.tabs.onUpdated.addListener(handleUpdated);
|
||||
signal.addEventListener('abort', handleAbort, {once: true});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 让 content script 在目标页面执行抓取或探测
|
||||
*/
|
||||
interface PageRunnerResponse {
|
||||
ok: boolean;
|
||||
data?: DomScrapeResult | null;
|
||||
interrupt?: CrawlPauseInfo;
|
||||
error?: string;
|
||||
}
|
||||
export async function scrapeStepInContent(tabId: number, step: PlatformStepConfig, signal: AbortSignal): Promise<PageRunnerResponse> {
|
||||
const startTime = Date.now();
|
||||
const TIMEOUT = 20000; // 最多等 20 秒
|
||||
|
||||
while (Date.now() - startTime < TIMEOUT) {
|
||||
if (signal.aborted) return {ok: false, error: 'canceled'};
|
||||
|
||||
try {
|
||||
// 给 Content Script 发消息
|
||||
const res: any = await chrome.tabs.sendMessage(tabId, {
|
||||
action: 'SCRAPE_STEP',
|
||||
payload: {
|
||||
fields: step.fields, // 要抓哪些字段
|
||||
checkSelector: step.checkSelector // 用来检测是否“撞盾”的特征选择器
|
||||
}
|
||||
});
|
||||
|
||||
// 情况 1:撞盾了(比如检测到了登录框、验证码)
|
||||
// Content Script 发现特征后会返回 interrupt 对象
|
||||
if (res.interrupt) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// 情况 2:抓取成功
|
||||
if (res.ok) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// 情况 3:如果 res.ok 是 false 且没有 interrupt,说明页面还没渲染出来
|
||||
// 继续循环重试
|
||||
|
||||
} catch (err: any) {
|
||||
// 特殊处理:如果报错是“接收端不存在”,说明 Content Script 还没加载完
|
||||
// 这属于正常情况,忽略它,等下一轮循环重试
|
||||
if (!err.message.includes('receiving end does not exist')) {
|
||||
console.warn('通信异常:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// 等 500ms 再问下一次
|
||||
const canContinue = await sleep(500, signal);
|
||||
if (!canContinue) break;
|
||||
}
|
||||
|
||||
return {ok: false, error: '页面响应超时,可能需要刷新'};
|
||||
}
|
||||
|
||||
/**
|
||||
* 延迟指定毫秒数,并支持随时中断
|
||||
* @param ms 延迟毫秒数
|
||||
* @param signal 中断信号
|
||||
* @returns {Promise<boolean>} 返回 true 表示等完了,返回 false 表示被中断了
|
||||
*/
|
||||
export function sleep(ms: number, signal?: AbortSignal): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
// 1. 如果信号已经中断了,直接返回 false
|
||||
if (signal?.aborted) {
|
||||
return resolve(false);
|
||||
}
|
||||
|
||||
// 2. 正常设置定时器
|
||||
const timer = setTimeout(() => {
|
||||
cleanup();
|
||||
resolve(true);
|
||||
}, ms);
|
||||
|
||||
// 3. 定义清理逻辑
|
||||
const cleanup = () => {
|
||||
clearTimeout(timer);
|
||||
signal?.removeEventListener('abort', onAbort);
|
||||
};
|
||||
|
||||
// 4. 监听中断事件
|
||||
const onAbort = () => {
|
||||
cleanup();
|
||||
resolve(false); // 一旦中断,立刻返回 false
|
||||
};
|
||||
|
||||
// 5. 注册监听(只监听一次)
|
||||
signal?.addEventListener('abort', onAbort, {once: true});
|
||||
});
|
||||
}
|
||||
54
src/background/task/taskState.ts
Normal file
54
src/background/task/taskState.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import type {CrawlTaskState} from '@/types';
|
||||
import {sendTabMessage} from "@/shared/tab";
|
||||
|
||||
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
||||
|
||||
/**
|
||||
* 从本地存储中获取当前的爬取任务状态
|
||||
* @returns {Promise<CrawlTaskState | null>} 返回任务状态对象,如果不存在或数据非法则返回 null
|
||||
*/
|
||||
export async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
||||
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
||||
const state = result[CRAWL_TASK_STORAGE_KEY];
|
||||
return (state as CrawlTaskState) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置并保存爬取任务状态,并同步广播给对应的标签页
|
||||
* @param {CrawlTaskState} state - 需要保存的新状态对象
|
||||
*/
|
||||
export async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
||||
// 持久化到本地存储
|
||||
await chrome.storage.local.set({[CRAWL_TASK_STORAGE_KEY]: state});
|
||||
// 将更新后的状态发送给正在执行任务的标签页内容脚本
|
||||
if (!state.tabId) return
|
||||
sendTabMessage(state.tabId, 'CRAWL_STATE_UPDATE', state)
|
||||
}
|
||||
|
||||
/**
|
||||
* 从本地存储中清除当前的爬取任务状态(通常用于任务结束或彻底重置)
|
||||
*/
|
||||
export async function clearCrawlTaskState(): Promise<void> {
|
||||
await chrome.storage.local.remove(CRAWL_TASK_STORAGE_KEY);
|
||||
}
|
||||
|
||||
/**
|
||||
* 局部更新爬取任务状态
|
||||
* 只有当任务 ID 匹配且任务未被取消时,才会执行更新逻辑
|
||||
* @param {string} taskId - 任务的唯一标识符
|
||||
* @param {(state: CrawlTaskState) => CrawlTaskState} updater - 接收旧状态并返回新状态的回调函数
|
||||
*/
|
||||
export async function updateCrawlTaskState(
|
||||
taskId: string,
|
||||
updater: (state: CrawlTaskState) => CrawlTaskState,
|
||||
): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
|
||||
// 检查任务是否存在、ID 是否一致、以及任务是否已被标记为取消
|
||||
if (!state || state.id !== taskId || state.status === 'canceled') {
|
||||
return;
|
||||
}
|
||||
|
||||
// 执行更新并保存
|
||||
await setCrawlTaskState(updater(state));
|
||||
}
|
||||
@@ -1,54 +1,5 @@
|
||||
import type { CrawlTaskState } from '@/types';
|
||||
|
||||
// 启动爬取任务的后台消息。
|
||||
export interface StartCrawlCommand {
|
||||
// 消息动作类型:请求 background 创建爬取窗口并初始化任务状态。
|
||||
action: 'START_CRAWL';
|
||||
// 启动爬取所需参数。
|
||||
payload: {
|
||||
// 当前要爬取的平台 ID,对应 config/platforms.ts 中的平台配置。
|
||||
platformId: string;
|
||||
};
|
||||
}
|
||||
|
||||
// 获取当前爬取任务状态的后台消息。
|
||||
export interface GetCrawlStateCommand {
|
||||
// 消息动作类型:请求 background 返回当前任务快照。
|
||||
action: 'GET_CRAWL_STATE';
|
||||
}
|
||||
|
||||
// 取消当前爬取任务的后台消息。
|
||||
export interface CancelCrawlCommand {
|
||||
// 消息动作类型:请求 background 标记任务取消并关闭爬取窗口。
|
||||
action: 'CANCEL_CRAWL';
|
||||
}
|
||||
|
||||
// 继续当前暂停中的爬取任务。
|
||||
export interface ResumeCrawlCommand {
|
||||
// 消息动作类型:用户已处理登录/验证码,允许 background 继续重试当前步骤。
|
||||
action: 'RESUME_CRAWL';
|
||||
}
|
||||
|
||||
// 取消终态自动关窗(保持窗口打开)的后台消息。
|
||||
export interface CancelAutocloseCommand {
|
||||
// 消息动作类型:用户在 overlay 中点“保持打开”,阻止 background 自动关闭爬取窗口。
|
||||
action: 'CANCEL_AUTOCLOSE';
|
||||
}
|
||||
|
||||
// 清理当前爬取任务快照(用于 popup 的 Dismiss/Close)。
|
||||
export interface DismissCrawlCommand {
|
||||
// 消息动作类型:清空 crawlTaskState,让 popup 回到 idle。
|
||||
action: 'DISMISS_CRAWL';
|
||||
}
|
||||
|
||||
// popup/content script 能发送给 background 的全部消息类型。
|
||||
export type BackgroundCommand =
|
||||
| StartCrawlCommand
|
||||
| GetCrawlStateCommand
|
||||
| CancelCrawlCommand
|
||||
| ResumeCrawlCommand
|
||||
| CancelAutocloseCommand
|
||||
| DismissCrawlCommand;
|
||||
|
||||
// background 统一响应结构。
|
||||
export interface BackgroundResponse<T = unknown> {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
<script setup lang="ts">
|
||||
import { onBeforeUnmount} from 'vue';
|
||||
import {platformConfigs} from '@/config/platforms';
|
||||
import {formatSeconds} from '@/shared/time_format';
|
||||
import {useLogin} from './hook/use-login';
|
||||
@@ -17,6 +16,8 @@ const {
|
||||
handleCancelCrawl,
|
||||
handleResumeCrawl,
|
||||
} = useScan();
|
||||
console.log(crawlState.value)
|
||||
|
||||
|
||||
/** 从扩展 manifest 读取版本号(兜底 `0.0.0`)。 */
|
||||
const manifestVersion = (() => {
|
||||
@@ -38,39 +39,16 @@ async function focusCrawlWindow(): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
let cancelConfirmTimer: number | null = null;
|
||||
|
||||
/** “Cancel” 二次确认:第一次点击变成 `Cancel?`,再次点击才真正取消。 */
|
||||
/**
|
||||
* 取消
|
||||
*/
|
||||
function requestCancel(): void {
|
||||
const btn = document.getElementById('popup-cancel-btn') as HTMLButtonElement | null;
|
||||
if (!btn) {
|
||||
void handleCancelCrawl();
|
||||
return;
|
||||
}
|
||||
|
||||
if (btn.dataset.confirming === '1') {
|
||||
btn.dataset.confirming = '0';
|
||||
btn.textContent = 'Cancel';
|
||||
if (cancelConfirmTimer) window.clearTimeout(cancelConfirmTimer);
|
||||
cancelConfirmTimer = null;
|
||||
void handleCancelCrawl();
|
||||
return;
|
||||
}
|
||||
|
||||
btn.dataset.confirming = '1';
|
||||
btn.textContent = 'Cancel?';
|
||||
cancelConfirmTimer = window.setTimeout(() => {
|
||||
btn.dataset.confirming = '0';
|
||||
btn.textContent = 'Cancel';
|
||||
cancelConfirmTimer = null;
|
||||
}, 3000);
|
||||
crawlState.value = null
|
||||
handleCancelCrawl()
|
||||
}
|
||||
|
||||
onBeforeUnmount(() => {
|
||||
/** 组件销毁前清理定时器,避免异步回调触发在已卸载的视图上。 */
|
||||
if (cancelConfirmTimer) window.clearTimeout(cancelConfirmTimer);
|
||||
cancelConfirmTimer = null;
|
||||
});
|
||||
|
||||
</script>
|
||||
|
||||
<template>
|
||||
@@ -93,6 +71,7 @@ onBeforeUnmount(() => {
|
||||
<template v-else>
|
||||
<!-- 未开始-->
|
||||
<template v-if="crawlState == null">
|
||||
|
||||
<label class="platform-select">
|
||||
<span class="account">平台选择</span>
|
||||
<select v-model="selectedPlatformId"
|
||||
|
||||
@@ -5,8 +5,6 @@ import {sendBackgroundMessage} from '@/shared/message';
|
||||
|
||||
/** 用于同步爬取任务状态的 `chrome.storage.local` key。 */
|
||||
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
||||
/** 会持续刷新计时器的任务状态集合。 */
|
||||
const ACTIVE_STATUSES = new Set(['running', 'paused']);
|
||||
|
||||
/**
|
||||
* Popup 内的爬取状态与操作集合。
|
||||
@@ -29,7 +27,9 @@ export const useScan = () => {
|
||||
|
||||
let timer: number | undefined;
|
||||
|
||||
/** 启动新的爬取任务(使用当前选择的平台)。 */
|
||||
/**
|
||||
* 动新的爬取任务
|
||||
*/
|
||||
const handleScan = async () => {
|
||||
if (isScanning.value) {
|
||||
return;
|
||||
@@ -38,8 +38,6 @@ export const useScan = () => {
|
||||
isScanning.value = true;
|
||||
|
||||
try {
|
||||
ensureElapsedTimer();
|
||||
|
||||
const response = await sendBackgroundMessage<CrawlTaskState>({
|
||||
action: 'START_CRAWL',
|
||||
payload: {platformId: selectedPlatformId.value},
|
||||
@@ -94,49 +92,26 @@ export const useScan = () => {
|
||||
await refreshCrawlState();
|
||||
};
|
||||
|
||||
/** 应用任务状态:刷新 elapsed,并根据状态管理计时器的开启/关闭。 */
|
||||
/**
|
||||
* 设置状态值,并设置时间
|
||||
*/
|
||||
function syncCrawlState(state: CrawlTaskState | null) {
|
||||
crawlState.value = state;
|
||||
updateSeconds();
|
||||
startElapsedTimer()
|
||||
}
|
||||
|
||||
if (state && ACTIVE_STATUSES.has(state.status)) {
|
||||
ensureElapsedTimer();
|
||||
/**
|
||||
* 启动定时器
|
||||
*/
|
||||
function startElapsedTimer() {
|
||||
if (crawlState.value === null || timer) {
|
||||
return;
|
||||
}
|
||||
|
||||
clearElapsedTimer();
|
||||
}
|
||||
|
||||
/** 确保 1 秒一次的计时器正在运行。 */
|
||||
function ensureElapsedTimer() {
|
||||
if (timer !== undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
timer = window.setInterval(() => {
|
||||
updateSeconds();
|
||||
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value!.startedAt) / 1000));
|
||||
}, 1000);
|
||||
}
|
||||
|
||||
/** 停止计时器(如果存在)。 */
|
||||
function clearElapsedTimer() {
|
||||
if (timer === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
window.clearInterval(timer);
|
||||
timer = undefined;
|
||||
}
|
||||
|
||||
/** 根据任务 `startedAt` 更新时间(秒)。 */
|
||||
function updateSeconds() {
|
||||
if (!crawlState.value) {
|
||||
elapsedSeconds.value = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000));
|
||||
}
|
||||
|
||||
/** 从 background 拉取最新任务状态。 */
|
||||
async function refreshCrawlState() {
|
||||
@@ -173,7 +148,7 @@ export const useScan = () => {
|
||||
|
||||
onUnmounted(() => {
|
||||
/** 清理计时器 + 取消订阅 storage 事件。 */
|
||||
clearElapsedTimer();
|
||||
clearInterval(timer);
|
||||
|
||||
if (typeof chrome !== 'undefined' && chrome.storage?.onChanged) {
|
||||
chrome.storage.onChanged.removeListener(handleStorageChanged);
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
export type MessageAction =
|
||||
/** 获取当前爬取任务的状态*/
|
||||
| 'GET_CRAWL_STATE'
|
||||
|
||||
/** 启动一个新的爬取任务 */
|
||||
| 'START_CRAWL'
|
||||
|
||||
/** 彻底取消并停止当前的爬取任务 */
|
||||
| 'CANCEL_CRAWL'
|
||||
|
||||
/** 恢复之前被暂停或因中断而停止的爬取任务 */
|
||||
| 'RESUME_CRAWL'
|
||||
| 'CANCEL_AUTOCLOSE'
|
||||
|
||||
|
||||
/** 忽略/关闭当前爬取任务的 UI 提示或通知(通常指任务结束后清理界面) */
|
||||
| 'DISMISS_CRAWL';
|
||||
|
||||
interface BackgroundMessage<T = unknown> {
|
||||
@@ -18,12 +27,15 @@ interface BackgroundResponse<T = unknown> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a command to the background service worker.
|
||||
* 发送消息给服务
|
||||
*/
|
||||
export function sendBackgroundMessage<T>(data: BackgroundMessage): Promise<BackgroundResponse<T>> {
|
||||
if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) {
|
||||
return Promise.resolve({ ok: true, data: null });
|
||||
return Promise.resolve({ok: true, data: null});
|
||||
}
|
||||
|
||||
return chrome.runtime.sendMessage(data);
|
||||
}
|
||||
|
||||
|
||||
//接受
|
||||
34
src/shared/tab.ts
Normal file
34
src/shared/tab.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* 后台发给网页(Tab)的消息行为
|
||||
*/
|
||||
export type TabAction =
|
||||
/** 任务状态更新(进度、状态改变等) */
|
||||
| 'CRAWL_STATE_UPDATE'
|
||||
/** 任务发生错误 */
|
||||
| 'CRAWL_ERROR'
|
||||
/** 任务完成 */
|
||||
| 'CRAWL_COMPLETED';
|
||||
|
||||
/**
|
||||
* 后台发给网页的消息格式
|
||||
*/
|
||||
interface TabMessage<T = unknown> {
|
||||
action: TabAction;
|
||||
payload?: T;
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送消息给特定的标签页(由后台调用)
|
||||
*/
|
||||
export function sendTabMessage<T>(tabId: number, action: TabAction, payload?: T): void {
|
||||
if (typeof chrome === 'undefined' || !chrome.tabs?.sendMessage) {
|
||||
return;
|
||||
}
|
||||
|
||||
const message: TabMessage<T> = {action, payload};
|
||||
|
||||
chrome.tabs.sendMessage(tabId, message).catch((err) => {
|
||||
// 这里的错误通常是因为 Tab 被关闭了或者页面刷新了,属于正常现象
|
||||
console.warn(`[Message] Failed to send ${action} to tab ${tabId}:`, err);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user