1
This commit is contained in:
@@ -8,403 +8,409 @@ const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
|||||||
|
|
||||||
/** 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
/** 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
||||||
export async function handleInstalled(): Promise<void> {
|
export async function handleInstalled(): Promise<void> {
|
||||||
console.log('[background] installed');
|
console.log('[background] installed');
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
/** 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
||||||
export async function handleStartup(): Promise<void> {
|
export async function handleStartup(): Promise<void> {
|
||||||
console.log('[background] startup');
|
console.log('[background] startup');
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 */
|
/** 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 */
|
||||||
export async function handleWindowRemoved(windowId: number): Promise<void> {
|
export async function handleWindowRemoved(windowId: number): Promise<void> {
|
||||||
console.log('[background] window removed', windowId);
|
console.log('[background] window removed', windowId);
|
||||||
|
|
||||||
/** 当前保存的爬取任务状态。 */
|
/** 当前保存的爬取任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
if (state?.windowId === windowId && state.status === 'running') {
|
if (state?.windowId === windowId && state.status === 'running') {
|
||||||
await setCrawlTaskState({
|
await setCrawlTaskState({
|
||||||
...state,
|
...state,
|
||||||
status: 'canceled',
|
status: 'canceled',
|
||||||
steps: state.steps.map((step, index) =>
|
steps: state.steps.map((step, index) =>
|
||||||
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step,
|
index === state.currentStepIndex ? { ...step, status: 'failed', message: '爬取窗口已关闭' } : step,
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 根据 popup/content 发来的 action 分发到对应的后台处理函数。 */
|
/** 根据 popup/content 发来的 action 分发到对应的后台处理函数。 */
|
||||||
export async function handleBackgroundCommand(
|
export async function handleBackgroundCommand(
|
||||||
message: BackgroundCommand,
|
message: BackgroundCommand,
|
||||||
): Promise<BackgroundResponse | CrawlStateResponse> {
|
): Promise<BackgroundResponse | CrawlStateResponse> {
|
||||||
switch (message.action) {
|
switch (message.action) {
|
||||||
case 'START_CRAWL':
|
case 'START_CRAWL':
|
||||||
return startCrawl(message.payload.platformId);
|
return startCrawl(message.payload.platformId);
|
||||||
case 'GET_CRAWL_STATE':
|
case 'GET_CRAWL_STATE':
|
||||||
return { ok: true, data: await getCrawlTaskState() };
|
return { ok: true, data: await getCrawlTaskState() };
|
||||||
case 'CANCEL_CRAWL':
|
case 'CANCEL_CRAWL':
|
||||||
return cancelCrawl();
|
return cancelCrawl();
|
||||||
default:
|
default:
|
||||||
return { ok: false, error: '未知的后台指令' };
|
return { ok: false, error: '未知的后台指令' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 */
|
/** 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 */
|
||||||
async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
||||||
/** 根据平台 ID 找到对应的平台爬取配置。 */
|
/** 根据平台 ID 找到对应的平台爬取配置。 */
|
||||||
const platform = getPlatformById(platformId);
|
const platform = getPlatformById(platformId);
|
||||||
|
|
||||||
if (!platform) {
|
if (!platform) {
|
||||||
return { ok: false, error: '平台配置不存在' };
|
return { ok: false, error: '平台配置不存在' };
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 当前任务的开始时间戳,用于计算正计时。 */
|
const firstStep = platform.steps[0];
|
||||||
const startedAt = Date.now();
|
|
||||||
/** 窗口创建前的初始任务状态,先写入 storage 让所有页面能立即感知爬取开始。 */
|
|
||||||
const nextState: CrawlTaskState = {
|
|
||||||
id: `${platform.id}-${startedAt}`,
|
|
||||||
platformId: platform.id,
|
|
||||||
platformName: platform.name,
|
|
||||||
startedAt,
|
|
||||||
status: 'running',
|
|
||||||
currentStepIndex: 0,
|
|
||||||
steps: platform.steps.map<CrawlProgressStep>((step, index) => ({
|
|
||||||
name: step.name,
|
|
||||||
uniqueKey: step.uniqueKey,
|
|
||||||
status: index === 0 ? 'running' : 'pending',
|
|
||||||
})),
|
|
||||||
};
|
|
||||||
|
|
||||||
await setCrawlTaskState(nextState);
|
if (!firstStep) {
|
||||||
|
return { ok: false, error: '平台未配置爬取步骤' };
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
/** 当前任务的开始时间戳,用于计算正计时。 */
|
||||||
/** background 创建出来的目标平台窗口信息。 */
|
const startedAt = Date.now();
|
||||||
const windowInfo = await createCrawlWindow(platform.baseUrl);
|
/** 窗口创建前的初始任务状态,先写入 storage 让所有页面能立即感知爬取开始。 */
|
||||||
/** 补充 windowId 后的任务状态,后续可用于取消或监听窗口关闭。 */
|
const nextState: CrawlTaskState = {
|
||||||
const stateWithWindow = { ...nextState, windowId: windowInfo.id };
|
id: `${platform.id}-${startedAt}`,
|
||||||
await setCrawlTaskState(stateWithWindow);
|
platformId: platform.id,
|
||||||
void runCrawlSteps(platform, stateWithWindow);
|
platformName: platform.name,
|
||||||
return { ok: true, data: stateWithWindow };
|
startedAt,
|
||||||
} catch (error: unknown) {
|
status: 'running',
|
||||||
/** 窗口创建失败时写入的失败状态,供 popup/content 显示错误进度。 */
|
currentStepIndex: 0,
|
||||||
const failedState: CrawlTaskState = {
|
steps: platform.steps.map<CrawlProgressStep>((step, index) => ({
|
||||||
...nextState,
|
name: step.name,
|
||||||
status: 'failed',
|
uniqueKey: step.uniqueKey,
|
||||||
steps: nextState.steps.map((step, index) =>
|
status: index === 0 ? 'running' : 'pending',
|
||||||
index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step,
|
})),
|
||||||
),
|
|
||||||
};
|
};
|
||||||
await setCrawlTaskState(failedState);
|
|
||||||
return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' };
|
await setCrawlTaskState(nextState);
|
||||||
}
|
|
||||||
|
try {
|
||||||
|
/** background 创建出来的目标平台窗口信息。 */
|
||||||
|
const windowInfo = await createCrawlWindow(firstStep.url);
|
||||||
|
/** 补充 windowId 后的任务状态,后续可用于取消或监听窗口关闭。 */
|
||||||
|
const stateWithWindow = { ...nextState, windowId: windowInfo.id };
|
||||||
|
await setCrawlTaskState(stateWithWindow);
|
||||||
|
void runCrawlSteps(platform, stateWithWindow);
|
||||||
|
return { ok: true, data: stateWithWindow };
|
||||||
|
} catch (error: unknown) {
|
||||||
|
/** 窗口创建失败时写入的失败状态,供 popup/content 显示错误进度。 */
|
||||||
|
const failedState: CrawlTaskState = {
|
||||||
|
...nextState,
|
||||||
|
status: 'failed',
|
||||||
|
steps: nextState.steps.map((step, index) =>
|
||||||
|
index === 0 ? { ...step, status: 'failed', message: '打开平台窗口失败' } : step,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
await setCrawlTaskState(failedState);
|
||||||
|
return { ok: false, data: failedState, error: error instanceof Error ? error.message : '打开平台窗口失败' };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 */
|
/** 按平台 steps 顺序执行页面跳转、DOM 等待、字段抓取和进度更新。 */
|
||||||
async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState): Promise<void> {
|
async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskState): Promise<void> {
|
||||||
if (!initialState.windowId) {
|
if (!initialState.windowId) {
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
/** 新窗口中的目标标签页 ID,后续所有跳转和脚本注入都依赖它。 */
|
|
||||||
const tabId = await getWindowActiveTabId(initialState.windowId);
|
|
||||||
|
|
||||||
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
|
|
||||||
/** 当前正在执行的平台页面步骤配置。 */
|
|
||||||
const step = platform.steps[stepIndex];
|
|
||||||
|
|
||||||
if (!(await isTaskRunning(initialState.id))) {
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
await markStepRunning(initialState.id, stepIndex);
|
|
||||||
await openStepPage(tabId, step.url);
|
|
||||||
|
|
||||||
/** 当前页面核心 DOM 是否已经出现。 */
|
|
||||||
const isReady = await waitForStepReady(tabId, step);
|
|
||||||
|
|
||||||
if (!isReady) {
|
|
||||||
await markStepFailed(initialState.id, stepIndex, '页面关键 DOM 未加载完成');
|
|
||||||
await markTaskFailed(initialState.id);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** 注入页面执行后的字段抓取结果。 */
|
|
||||||
const data = await scrapeStepFields(tabId, step);
|
|
||||||
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, data);
|
|
||||||
await markStepSuccess(initialState.id, stepIndex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await markTaskCompleted(initialState.id);
|
try {
|
||||||
} catch (error: unknown) {
|
/** 新窗口中的目标标签页 ID,后续所有跳转和脚本注入都依赖它。 */
|
||||||
console.error('[crawl] 执行失败', error);
|
const tabId = await getWindowActiveTabId(initialState.windowId);
|
||||||
await markTaskFailed(initialState.id, error instanceof Error ? error.message : '爬取执行失败');
|
|
||||||
}
|
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
|
||||||
|
/** 当前正在执行的平台页面步骤配置。 */
|
||||||
|
const step = platform.steps[stepIndex];
|
||||||
|
|
||||||
|
if (!(await isTaskRunning(initialState.id))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await markStepRunning(initialState.id, stepIndex);
|
||||||
|
await openStepPage(tabId, step.url);
|
||||||
|
|
||||||
|
/** 当前页面核心 DOM 是否已经出现。 */
|
||||||
|
const isReady = await waitForStepReady(tabId, step);
|
||||||
|
|
||||||
|
if (!isReady) {
|
||||||
|
await markStepFailed(initialState.id, stepIndex, '页面关键 DOM 未加载完成');
|
||||||
|
await markTaskFailed(initialState.id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 注入页面执行后的字段抓取结果。 */
|
||||||
|
const data = await scrapeStepFields(tabId, step);
|
||||||
|
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, data);
|
||||||
|
await markStepSuccess(initialState.id, stepIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
await markTaskCompleted(initialState.id);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('[crawl] 执行失败', error);
|
||||||
|
await markTaskFailed(initialState.id, error instanceof Error ? error.message : '爬取执行失败');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 获取指定窗口中的活动 tab ID。 */
|
/** 获取指定窗口中的活动 tab ID。 */
|
||||||
async function getWindowActiveTabId(windowId: number): Promise<number> {
|
async function getWindowActiveTabId(windowId: number): Promise<number> {
|
||||||
/** 指定窗口中查询到的标签页列表。 */
|
/** 指定窗口中查询到的标签页列表。 */
|
||||||
const tabs = await chrome.tabs.query({ windowId, active: true });
|
const tabs = await chrome.tabs.query({ windowId, active: true });
|
||||||
/** 当前窗口里用于承载爬取页面的活动标签页。 */
|
/** 当前窗口里用于承载爬取页面的活动标签页。 */
|
||||||
const tab = tabs[0];
|
const tab = tabs[0];
|
||||||
|
|
||||||
if (!tab?.id) {
|
if (!tab?.id) {
|
||||||
throw new Error('未找到爬取窗口中的标签页');
|
throw new Error('未找到爬取窗口中的标签页');
|
||||||
}
|
}
|
||||||
|
|
||||||
return tab.id;
|
return tab.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 打开某个 steps 页面,并等待浏览器报告 tab 加载完成。 */
|
/** 打开某个 steps 页面,并等待浏览器报告 tab 加载完成。 */
|
||||||
async function openStepPage(tabId: number, url: string): Promise<void> {
|
async function openStepPage(tabId: number, url: string): Promise<void> {
|
||||||
await chrome.tabs.update(tabId, { url, active: true });
|
await chrome.tabs.update(tabId, { url, active: true });
|
||||||
await waitForTabLoaded(tabId);
|
await waitForTabLoaded(tabId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 等待 tab 完成页面加载。 */
|
/** 等待 tab 完成页面加载。 */
|
||||||
function waitForTabLoaded(tabId: number): Promise<void> {
|
function waitForTabLoaded(tabId: number): Promise<void> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
/** 页面加载兜底定时器,避免某些站点不触发 complete 时流程永久挂起。 */
|
/** 页面加载兜底定时器,避免某些站点不触发 complete 时流程永久挂起。 */
|
||||||
const timeout = globalThis.setTimeout(() => {
|
const timeout = globalThis.setTimeout(() => {
|
||||||
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
||||||
resolve();
|
resolve();
|
||||||
}, 15000);
|
}, 15000);
|
||||||
|
|
||||||
/** chrome.tabs.onUpdated 的监听器,用于捕获指定 tab 的 complete 状态。 */
|
/** chrome.tabs.onUpdated 的监听器,用于捕获指定 tab 的 complete 状态。 */
|
||||||
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
|
function handleUpdated(updatedTabId: number, changeInfo: { status?: string }) {
|
||||||
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||||
globalThis.clearTimeout(timeout);
|
globalThis.clearTimeout(timeout);
|
||||||
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
chrome.tabs.onUpdated.removeListener(handleUpdated);
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome.tabs.onUpdated.addListener(handleUpdated);
|
chrome.tabs.onUpdated.addListener(handleUpdated);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 等待步骤配置中的 checkSelector 出现;第一次超时后刷新页面再重试一次。 */
|
/** 等待步骤配置中的 checkSelector 出现;第一次超时后刷新页面再重试一次。 */
|
||||||
async function waitForStepReady(tabId: number, step: PlatformStepConfig): Promise<boolean> {
|
async function waitForStepReady(tabId: number, step: PlatformStepConfig): Promise<boolean> {
|
||||||
if (await waitForSelector(tabId, step.checkSelector, 5000)) {
|
if (await waitForSelector(tabId, step.checkSelector, 5000)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
await chrome.tabs.reload(tabId);
|
await chrome.tabs.reload(tabId);
|
||||||
await waitForTabLoaded(tabId);
|
await waitForTabLoaded(tabId);
|
||||||
return waitForSelector(tabId, step.checkSelector, 5000);
|
return waitForSelector(tabId, step.checkSelector, 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 在目标页面轮询检查指定 selector 是否存在。 */
|
/** 在目标页面轮询检查指定 selector 是否存在。 */
|
||||||
async function waitForSelector(tabId: number, selector: string, timeoutMs: number): Promise<boolean> {
|
async function waitForSelector(tabId: number, selector: string, timeoutMs: number): Promise<boolean> {
|
||||||
/** 轮询开始时间,用于控制最大等待时长。 */
|
/** 轮询开始时间,用于控制最大等待时长。 */
|
||||||
const startedAt = Date.now();
|
const startedAt = Date.now();
|
||||||
|
|
||||||
while (Date.now() - startedAt < timeoutMs) {
|
while (Date.now() - startedAt < timeoutMs) {
|
||||||
/** 当前页面是否已经能查询到目标元素。 */
|
/** 当前页面是否已经能查询到目标元素。 */
|
||||||
const exists = await checkSelectorExists(tabId, selector);
|
const exists = await checkSelectorExists(tabId, selector);
|
||||||
|
|
||||||
if (exists) {
|
if (exists) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(500);
|
||||||
}
|
}
|
||||||
|
|
||||||
await sleep(500);
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 注入轻量脚本检查页面里是否存在指定 selector。 */
|
/** 注入轻量脚本检查页面里是否存在指定 selector。 */
|
||||||
async function checkSelectorExists(tabId: number, selector: string): Promise<boolean> {
|
async function checkSelectorExists(tabId: number, selector: string): Promise<boolean> {
|
||||||
/** chrome.scripting.executeScript 返回的注入执行结果。 */
|
/** chrome.scripting.executeScript 返回的注入执行结果。 */
|
||||||
const results = await chrome.scripting.executeScript({
|
const results = await chrome.scripting.executeScript({
|
||||||
target: { tabId },
|
target: { tabId },
|
||||||
func: (targetSelector: string) => Boolean(document.querySelector(targetSelector)),
|
func: (targetSelector: string) => Boolean(document.querySelector(targetSelector)),
|
||||||
args: [selector],
|
args: [selector],
|
||||||
});
|
});
|
||||||
|
|
||||||
return Boolean(results[0]?.result);
|
return Boolean(results[0]?.result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 注入 domScraper 到目标页面,并根据当前 step.fields 提取页面数据。 */
|
/** 注入 domScraper 到目标页面,并根据当前 step.fields 提取页面数据。 */
|
||||||
async function scrapeStepFields(tabId: number, step: PlatformStepConfig): Promise<DomScrapeResult | null> {
|
async function scrapeStepFields(tabId: number, step: PlatformStepConfig): Promise<DomScrapeResult | null> {
|
||||||
/** 目标页面执行 DOM 抓取后返回的结果数组。 */
|
/** 目标页面执行 DOM 抓取后返回的结果数组。 */
|
||||||
const results = await chrome.scripting.executeScript({
|
const results = await chrome.scripting.executeScript({
|
||||||
target: { tabId },
|
target: { tabId },
|
||||||
func: scrapeDomFields,
|
func: scrapeDomFields,
|
||||||
args: [step.fields],
|
args: [step.fields],
|
||||||
});
|
});
|
||||||
|
|
||||||
return results[0]?.result ?? null;
|
return results[0]?.result ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 判断指定任务是否仍处于 running 状态。 */
|
/** 判断指定任务是否仍处于 running 状态。 */
|
||||||
async function isTaskRunning(taskId: string): Promise<boolean> {
|
async function isTaskRunning(taskId: string): Promise<boolean> {
|
||||||
/** 当前 storage 中的任务状态。 */
|
/** 当前 storage 中的任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
return state?.id === taskId && state.status === 'running';
|
return state?.id === taskId && state.status === 'running';
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将指定步骤标记为运行中,同时把其它未完成步骤保持为等待。 */
|
/** 将指定步骤标记为运行中,同时把其它未完成步骤保持为等待。 */
|
||||||
async function markStepRunning(taskId: string, stepIndex: number): Promise<void> {
|
async function markStepRunning(taskId: string, stepIndex: number): Promise<void> {
|
||||||
await updateCrawlTaskState(taskId, (state) => ({
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
currentStepIndex: stepIndex,
|
currentStepIndex: stepIndex,
|
||||||
status: 'running',
|
status: 'running',
|
||||||
steps: state.steps.map((step, index) => ({
|
steps: state.steps.map((step, index) => ({
|
||||||
...step,
|
...step,
|
||||||
status: index === stepIndex ? 'running' : step.status,
|
status: index === stepIndex ? 'running' : step.status,
|
||||||
message: index === stepIndex ? undefined : step.message,
|
message: index === stepIndex ? undefined : step.message,
|
||||||
})),
|
})),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将指定步骤标记为成功。 */
|
/** 将指定步骤标记为成功。 */
|
||||||
async function markStepSuccess(taskId: string, stepIndex: number): Promise<void> {
|
async function markStepSuccess(taskId: string, stepIndex: number): Promise<void> {
|
||||||
await updateCrawlTaskState(taskId, (state) => ({
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
steps: state.steps.map((step, index) =>
|
steps: state.steps.map((step, index) =>
|
||||||
index === stepIndex ? { ...step, status: 'success', message: undefined } : step,
|
index === stepIndex ? { ...step, status: 'success', message: undefined } : step,
|
||||||
),
|
),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将指定步骤标记为失败,并记录失败原因。 */
|
/** 将指定步骤标记为失败,并记录失败原因。 */
|
||||||
async function markStepFailed(taskId: string, stepIndex: number, message: string): Promise<void> {
|
async function markStepFailed(taskId: string, stepIndex: number, message: string): Promise<void> {
|
||||||
await updateCrawlTaskState(taskId, (state) => ({
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
currentStepIndex: stepIndex,
|
currentStepIndex: stepIndex,
|
||||||
steps: state.steps.map((step, index) =>
|
steps: state.steps.map((step, index) =>
|
||||||
index === stepIndex ? { ...step, status: 'failed', message } : step,
|
index === stepIndex ? { ...step, status: 'failed', message } : step,
|
||||||
),
|
),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将整个任务标记为完成。 */
|
/** 将整个任务标记为完成。 */
|
||||||
async function markTaskCompleted(taskId: string): Promise<void> {
|
async function markTaskCompleted(taskId: string): Promise<void> {
|
||||||
await updateCrawlTaskState(taskId, (state) => ({
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)),
|
steps: state.steps.map((step) => (step.status === 'running' ? { ...step, status: 'success' } : step)),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将整个任务标记为失败。 */
|
/** 将整个任务标记为失败。 */
|
||||||
async function markTaskFailed(taskId: string, message = '爬取失败'): Promise<void> {
|
async function markTaskFailed(taskId: string, message = '爬取失败'): Promise<void> {
|
||||||
await updateCrawlTaskState(taskId, (state) => ({
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
status: 'failed',
|
status: 'failed',
|
||||||
steps: state.steps.map((step, index) =>
|
steps: state.steps.map((step, index) =>
|
||||||
index === state.currentStepIndex && step.status === 'running' ? { ...step, status: 'failed', message } : step,
|
index === state.currentStepIndex && step.status === 'running' ? { ...step, status: 'failed', message } : step,
|
||||||
),
|
),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 读取任务状态后执行不可变更新,避免覆盖已取消或已替换的任务。 */
|
/** 读取任务状态后执行不可变更新,避免覆盖已取消或已替换的任务。 */
|
||||||
async function updateCrawlTaskState(
|
async function updateCrawlTaskState(
|
||||||
taskId: string,
|
taskId: string,
|
||||||
updater: (state: CrawlTaskState) => CrawlTaskState,
|
updater: (state: CrawlTaskState) => CrawlTaskState,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
/** 当前 storage 中最新的任务状态。 */
|
/** 当前 storage 中最新的任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
if (!state || state.id !== taskId || state.status === 'canceled') {
|
if (!state || state.id !== taskId || state.status === 'canceled') {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await setCrawlTaskState(updater(state));
|
await setCrawlTaskState(updater(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 睡眠工具,用于轮询 DOM 等待。 */
|
/** 睡眠工具,用于轮询 DOM 等待。 */
|
||||||
function sleep(ms: number): Promise<void> {
|
function sleep(ms: number): Promise<void> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
globalThis.setTimeout(resolve, ms);
|
globalThis.setTimeout(resolve, ms);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 */
|
/** 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 */
|
||||||
async function cancelCrawl(): Promise<CrawlStateResponse> {
|
async function cancelCrawl(): Promise<CrawlStateResponse> {
|
||||||
/** 当前保存的爬取任务状态。 */
|
/** 当前保存的爬取任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
if (!state) {
|
if (!state) {
|
||||||
return { ok: true, data: null };
|
return { ok: true, data: null };
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 用户取消后的任务状态,当前执行步骤会显示为失败并附带取消原因。 */
|
/** 用户取消后的任务状态,当前执行步骤会显示为失败并附带取消原因。 */
|
||||||
const canceledState: CrawlTaskState = {
|
const canceledState: CrawlTaskState = {
|
||||||
...state,
|
...state,
|
||||||
status: 'canceled',
|
status: 'canceled',
|
||||||
steps: state.steps.map((step, index) =>
|
steps: state.steps.map((step, index) =>
|
||||||
index === state.currentStepIndex ? { ...step, status: 'failed', message: '用户已取消' } : step,
|
index === state.currentStepIndex ? { ...step, status: 'failed', message: '用户已取消' } : step,
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
await setCrawlTaskState(canceledState);
|
await setCrawlTaskState(canceledState);
|
||||||
|
|
||||||
if (state.windowId) {
|
if (state.windowId) {
|
||||||
await removeWindow(state.windowId);
|
await removeWindow(state.windowId);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { ok: true, data: canceledState };
|
return { ok: true, data: canceledState };
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 从 chrome.storage.local 读取当前爬取任务状态。 */
|
/** 从 chrome.storage.local 读取当前爬取任务状态。 */
|
||||||
async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
||||||
/** chrome.storage.local 返回的原始键值对象。 */
|
/** chrome.storage.local 返回的原始键值对象。 */
|
||||||
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
||||||
/** 取出的任务状态候选值,需要经过结构校验后才能使用。 */
|
/** 取出的任务状态候选值,需要经过结构校验后才能使用。 */
|
||||||
const state = result[CRAWL_TASK_STORAGE_KEY];
|
const state = result[CRAWL_TASK_STORAGE_KEY];
|
||||||
return isCrawlTaskState(state) ? state : null;
|
return isCrawlTaskState(state) ? state : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 */
|
/** 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 */
|
||||||
async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
||||||
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
|
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 打开一个普通浏览器窗口承载目标平台页面。 */
|
/** 打开一个普通浏览器窗口承载目标平台页面。 */
|
||||||
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.windows.create(
|
chrome.windows.create(
|
||||||
{
|
{
|
||||||
url,
|
url,
|
||||||
type: 'normal',
|
type: 'popup',
|
||||||
focused: true,
|
focused: true,
|
||||||
width: 1280,
|
width: 1280,
|
||||||
height: 900,
|
height: 900,
|
||||||
},
|
},
|
||||||
(windowInfo) => {
|
(windowInfo) => {
|
||||||
/** Chrome 扩展 API 回调中的运行时错误。 */
|
/** Chrome 扩展 API 回调中的运行时错误。 */
|
||||||
const runtimeError = chrome.runtime.lastError;
|
const runtimeError = chrome.runtime.lastError;
|
||||||
|
|
||||||
if (runtimeError) {
|
if (runtimeError) {
|
||||||
reject(new Error(runtimeError.message));
|
reject(new Error(runtimeError.message));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!windowInfo?.id) {
|
if (!windowInfo?.id) {
|
||||||
reject(new Error('窗口创建失败'));
|
reject(new Error('窗口创建失败'));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve(windowInfo);
|
resolve(windowInfo);
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 根据窗口 ID 关闭爬取窗口;关闭失败时不阻塞取消状态写入。 */
|
/** 根据窗口 ID 关闭爬取窗口;关闭失败时不阻塞取消状态写入。 */
|
||||||
function removeWindow(windowId: number): Promise<void> {
|
function removeWindow(windowId: number): Promise<void> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
chrome.windows.remove(windowId, () => {
|
chrome.windows.remove(windowId, () => {
|
||||||
resolve();
|
resolve();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 */
|
/** 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 */
|
||||||
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
|
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
|
||||||
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
|
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawl.ts","./src/background/service/lifecycle.ts","./src/background/service/state.ts","./src/background/service/tab.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"}
|
{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"}
|
||||||
Reference in New Issue
Block a user