1
This commit is contained in:
@@ -2,6 +2,7 @@ import {getPlatformById} from "@/config/platforms";
|
||||
import {CrawlTaskState, PlatformStepConfig} from "@/types";
|
||||
import {openSingleTabWindow, scrapeStepInContent, sleep, waitForTabLoaded} from "@/background/task/helper";
|
||||
import {clearCrawlTaskState, getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState} from "./taskState";
|
||||
import {sendTabMessage} from "@/shared/tab";
|
||||
|
||||
|
||||
const activeCrawlControllers = new Map<string, AbortController>();
|
||||
@@ -59,8 +60,9 @@ export async function startCrawl(platformId: string): Promise<any> {
|
||||
/**
|
||||
* 执行器
|
||||
*/
|
||||
async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepConfig[], signal: AbortSignal) {
|
||||
for (let i = 0; i < steps.length; i += 1) {
|
||||
async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepConfig[], signal: AbortSignal, startIndex = 0) {
|
||||
// 中文备注:startIndex 用于“继续/恢复”场景,从上次没爬完的步骤开始跑。
|
||||
for (let i = startIndex; i < steps.length; i += 1) {
|
||||
const step = steps[i];
|
||||
let shouldRetryStep = true;
|
||||
|
||||
@@ -116,6 +118,10 @@ async function runCrawlSteps(taskId: string, tabId: number, steps: PlatformStepC
|
||||
|
||||
// 【修改 3】全部步骤完成,标记任务结束
|
||||
await updateCrawlTaskState(taskId, s => ({...s, status: 'completed'}));
|
||||
|
||||
// 中文备注:全部爬取完成后,需要把数据发送给网页,然后清空本次任务记录数据、关掉爬取窗口。
|
||||
// 这里由 background 统一做“完成后收尾”,避免 UI 侧各自处理导致状态不同步。
|
||||
await finalizeCompletedTask(taskId, signal);
|
||||
}
|
||||
|
||||
|
||||
@@ -145,3 +151,209 @@ export async function cancelCrawl() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 当爬取窗口被用户手动关闭时触发:把任务标记为暂停,并中止当前的执行器。
|
||||
* 中文备注:这里“暂停”不是取消,任务进度(steps/result/currentStepIndex)会保留,供后续“继续”恢复。
|
||||
*/
|
||||
export async function pauseCrawlOnWindowRemoved(windowId: number): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
if (!state) return;
|
||||
if (state.status !== 'running') return;
|
||||
if (state.windowId !== windowId) return;
|
||||
|
||||
// 中文备注:窗口被关掉后继续跑会频繁报 tab 不存在;这里直接 abort 当前 controller,等待用户点击“继续”后重启。
|
||||
const controller = activeCrawlControllers.get(state.id);
|
||||
if (controller) {
|
||||
controller.abort();
|
||||
activeCrawlControllers.delete(state.id);
|
||||
}
|
||||
|
||||
await updateCrawlTaskState(state.id, (s) => ({
|
||||
...s,
|
||||
status: 'paused',
|
||||
pause: {
|
||||
reason: 'window_closed',
|
||||
message: '检测到爬取窗口被关闭。点击“继续”后将重新打开窗口,并从上次进度继续爬取。',
|
||||
},
|
||||
// 中文备注:窗口/tab 已经不存在,置空避免 UI 侧再尝试聚焦旧窗口。
|
||||
windowId: undefined,
|
||||
tabId: undefined,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 当爬取 tab 被关闭时触发:同样按“窗口被关闭”处理。
|
||||
* 中文备注:有些情况下只会触发 tabs.onRemoved,这里单独兜底。
|
||||
*/
|
||||
export async function pauseCrawlOnTabRemoved(tabId: number): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
if (!state) return;
|
||||
if (state.status !== 'running') return;
|
||||
if (state.tabId !== tabId) return;
|
||||
|
||||
// 直接复用 window 关闭的暂停逻辑(windowId 可能为空,但不影响暂停)
|
||||
const controller = activeCrawlControllers.get(state.id);
|
||||
if (controller) {
|
||||
controller.abort();
|
||||
activeCrawlControllers.delete(state.id);
|
||||
}
|
||||
|
||||
await updateCrawlTaskState(state.id, (s) => ({
|
||||
...s,
|
||||
status: 'paused',
|
||||
pause: {
|
||||
reason: 'window_closed',
|
||||
message: '检测到爬取页面被关闭。点击“继续”后将重新打开窗口,并从上次进度继续爬取。',
|
||||
},
|
||||
windowId: undefined,
|
||||
tabId: undefined,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 继续/恢复暂停的任务。
|
||||
* 中文备注:
|
||||
* - 如果是登录/验证码导致的暂停:只需要把状态从 paused 切回 running,让原来的执行器继续跑(不重启)。
|
||||
* - 如果是窗口被关闭导致的暂停:需要重新打开窗口,并从上次没完成的步骤开始重新跑。
|
||||
*/
|
||||
export async function resumeCrawl(): Promise<CrawlTaskState | null> {
|
||||
const state = await getCrawlTaskState();
|
||||
if (!state) return null;
|
||||
|
||||
if (state.status !== 'paused') {
|
||||
return state;
|
||||
}
|
||||
|
||||
// 1) 登录/验证码等中断:窗口仍存在时,直接恢复即可
|
||||
if (state.pause?.reason !== 'window_closed' && state.windowId && state.tabId) {
|
||||
await updateCrawlTaskState(state.id, (s) => ({...s, status: 'running', pause: undefined}));
|
||||
return await getCrawlTaskState();
|
||||
}
|
||||
|
||||
// 2) 窗口关闭导致的暂停:重新打开窗口,并从上次进度继续
|
||||
const platform = getPlatformById(state.platformId);
|
||||
if (!platform) {
|
||||
// 中文备注:平台配置找不到时只能保持暂停态
|
||||
return state;
|
||||
}
|
||||
|
||||
const resumeIndex = Math.max(0, Math.min(state.currentStepIndex ?? 0, platform.steps.length - 1));
|
||||
|
||||
// 中文备注:如果 currentStepIndex 对应 step 已经 success,说明暂停发生在步骤切换间隙,往后找第一个未完成的步骤。
|
||||
let startIndex = resumeIndex;
|
||||
for (let i = resumeIndex; i < state.steps.length; i += 1) {
|
||||
if (state.steps[i]?.status !== 'success') {
|
||||
startIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const openUrl = platform.steps[startIndex]?.url ?? platform.steps[resumeIndex]?.url ?? platform.steps[0].url;
|
||||
const windowInfo = await openSingleTabWindow(openUrl);
|
||||
|
||||
const nextState: CrawlTaskState = {
|
||||
...state,
|
||||
windowId: windowInfo.windowId,
|
||||
tabId: windowInfo.tabId,
|
||||
status: 'running',
|
||||
pause: undefined,
|
||||
currentStepIndex: startIndex,
|
||||
steps: state.steps.map((step, idx) => ({
|
||||
...step,
|
||||
// 中文备注:继续时把当前要执行的 step 标记为 running(success 不动,避免覆盖已完成步骤)
|
||||
status: idx === startIndex && step.status !== 'success' ? 'running' : step.status,
|
||||
})),
|
||||
};
|
||||
|
||||
await setCrawlTaskState(nextState);
|
||||
|
||||
// 中文备注:重启执行器,从 startIndex 开始继续跑
|
||||
const controller = new AbortController();
|
||||
activeCrawlControllers.set(nextState.id, controller);
|
||||
void runCrawlSteps(nextState.id, nextState.tabId!, platform.steps, controller.signal, startIndex).finally(() => {
|
||||
activeCrawlControllers.delete(nextState.id);
|
||||
});
|
||||
|
||||
return nextState;
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭/忽略当前任务的 UI 提示(只清空状态,不强制走取消逻辑)。
|
||||
* 中文备注:用于 UI 侧把卡片隐藏掉;如果窗口还存在也会顺手关闭,避免残留。
|
||||
*/
|
||||
export async function dismissCrawl(): Promise<void> {
|
||||
const state = await getCrawlTaskState();
|
||||
if (!state) {
|
||||
await clearCrawlTaskState();
|
||||
return;
|
||||
}
|
||||
|
||||
// 中文备注:如果仍有执行器在跑,dismiss 等同取消,避免后台继续执行。
|
||||
const controller = activeCrawlControllers.get(state.id);
|
||||
if (controller) {
|
||||
controller.abort();
|
||||
activeCrawlControllers.delete(state.id);
|
||||
}
|
||||
|
||||
await clearCrawlTaskState();
|
||||
|
||||
if (state.windowId) {
|
||||
chrome.windows.remove(state.windowId).catch(() => {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 完成后的统一收尾:发送结果 -> 清空 storage -> 关闭爬取窗口
|
||||
* 中文备注:
|
||||
* - “发送给网页”:外部网页(externally_connectable)会通过 storage 广播拿到 completed 状态和结果;
|
||||
* - 同时也给爬取 tab 发一份 `CRAWL_COMPLETED`,方便页面内(content script)有需要时直接接收。
|
||||
*/
|
||||
async function finalizeCompletedTask(taskId: string, signal: AbortSignal) {
|
||||
const state = await getCrawlTaskState();
|
||||
if (!state || state.id !== taskId) return;
|
||||
if (state.status !== 'completed') return;
|
||||
|
||||
// 1) 发送给爬取 tab(如果 tab 还存在且页面内有监听方)
|
||||
if (state.tabId) {
|
||||
sendTabMessage(state.tabId, 'CRAWL_COMPLETED', {
|
||||
taskId: state.id,
|
||||
platformId: state.platformId,
|
||||
platformName: state.platformName,
|
||||
startedAt: state.startedAt,
|
||||
result: collectStepResults(state),
|
||||
});
|
||||
}
|
||||
|
||||
// 2) 留一点时间给 storage.onChanged -> external ports 广播完成态(DIANSHAN_CRAWL_DONE)
|
||||
// 中文备注:不宜太久,避免完成后窗口迟迟不关;这里 300ms 足够让消息出队。
|
||||
await sleep(300, signal);
|
||||
|
||||
// 3) 清空任务记录(popup 会收到 storage 变化自动重置 UI)
|
||||
await clearCrawlTaskState();
|
||||
|
||||
// 4) 关闭爬取窗口
|
||||
if (state.windowId) {
|
||||
chrome.windows.remove(state.windowId).catch(() => {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 收集每个 step 的结果数据,统一输出为 { [uniqueKey]: { ... } } 结构。
|
||||
* 中文备注:该结构与 externalBridge.ts 里对外输出一致,方便网页侧消费。
|
||||
*/
|
||||
function collectStepResults(state: CrawlTaskState): Record<string, unknown> {
|
||||
return Object.fromEntries(
|
||||
state.steps.map((step) => [
|
||||
step.uniqueKey,
|
||||
{
|
||||
name: step.name,
|
||||
status: step.status,
|
||||
result: step.result ?? null,
|
||||
message: step.message ?? null,
|
||||
},
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user