This commit is contained in:
zhu
2026-04-30 11:17:16 +08:00
parent 08a6a69bd6
commit 7ca9dabaf9
6 changed files with 192 additions and 132 deletions

View File

View File

@@ -2,19 +2,24 @@ import { getPlatformById } from '@/config/platforms';
import type { CrawlProgressStep, CrawlTaskState } from '@/types'; import type { CrawlProgressStep, CrawlTaskState } from '@/types';
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from './types'; import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from './types';
/** chrome.storage.local 中保存当前爬取任务状态的键名。 */
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState'; const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
/** 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 */
export async function handleInstalled(): Promise<void> { export async function handleInstalled(): Promise<void> {
console.log('[background] installed'); console.log('[background] installed');
} }
/** 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 */
export async function handleStartup(): Promise<void> { export async function handleStartup(): Promise<void> {
console.log('[background] startup'); console.log('[background] startup');
} }
/** 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 */
export async function handleWindowRemoved(windowId: number): Promise<void> { export async function handleWindowRemoved(windowId: number): Promise<void> {
console.log('[background] window removed', windowId); console.log('[background] window removed', windowId);
/** 当前保存的爬取任务状态。 */
const state = await getCrawlTaskState(); const state = await getCrawlTaskState();
if (state?.windowId === windowId && state.status === 'running') { if (state?.windowId === windowId && state.status === 'running') {
@@ -28,6 +33,7 @@ export async function handleWindowRemoved(windowId: number): Promise<void> {
} }
} }
/** 根据 popup/content 发来的 action 分发到对应的后台处理函数。 */
export async function handleBackgroundCommand( export async function handleBackgroundCommand(
message: BackgroundCommand, message: BackgroundCommand,
): Promise<BackgroundResponse | CrawlStateResponse> { ): Promise<BackgroundResponse | CrawlStateResponse> {
@@ -43,14 +49,18 @@ export async function handleBackgroundCommand(
} }
} }
/** 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 */
async function startCrawl(platformId: string): Promise<CrawlStateResponse> { async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
/** 根据平台 ID 找到对应的平台爬取配置。 */
const platform = getPlatformById(platformId); const platform = getPlatformById(platformId);
if (!platform) { if (!platform) {
return { ok: false, error: '平台配置不存在' }; return { ok: false, error: '平台配置不存在' };
} }
/** 当前任务的开始时间戳,用于计算正计时。 */
const startedAt = Date.now(); const startedAt = Date.now();
/** 窗口创建前的初始任务状态,先写入 storage 让所有页面能立即感知爬取开始。 */
const nextState: CrawlTaskState = { const nextState: CrawlTaskState = {
id: `${platform.id}-${startedAt}`, id: `${platform.id}-${startedAt}`,
platformId: platform.id, platformId: platform.id,
@@ -68,11 +78,14 @@ async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
await setCrawlTaskState(nextState); await setCrawlTaskState(nextState);
try { try {
/** background 创建出来的目标平台窗口信息。 */
const windowInfo = await createCrawlWindow(platform.baseUrl); const windowInfo = await createCrawlWindow(platform.baseUrl);
/** 补充 windowId 后的任务状态,后续可用于取消或监听窗口关闭。 */
const stateWithWindow = { ...nextState, windowId: windowInfo.id }; const stateWithWindow = { ...nextState, windowId: windowInfo.id };
await setCrawlTaskState(stateWithWindow); await setCrawlTaskState(stateWithWindow);
return { ok: true, data: stateWithWindow }; return { ok: true, data: stateWithWindow };
} catch (error: unknown) { } catch (error: unknown) {
/** 窗口创建失败时写入的失败状态,供 popup/content 显示错误进度。 */
const failedState: CrawlTaskState = { const failedState: CrawlTaskState = {
...nextState, ...nextState,
status: 'failed', status: 'failed',
@@ -85,13 +98,16 @@ async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
} }
} }
/** 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 */
async function cancelCrawl(): Promise<CrawlStateResponse> { async function cancelCrawl(): Promise<CrawlStateResponse> {
/** 当前保存的爬取任务状态。 */
const state = await getCrawlTaskState(); const state = await getCrawlTaskState();
if (!state) { if (!state) {
return { ok: true, data: null }; return { ok: true, data: null };
} }
/** 用户取消后的任务状态,当前执行步骤会显示为失败并附带取消原因。 */
const canceledState: CrawlTaskState = { const canceledState: CrawlTaskState = {
...state, ...state,
status: 'canceled', status: 'canceled',
@@ -109,16 +125,21 @@ async function cancelCrawl(): Promise<CrawlStateResponse> {
return { ok: true, data: canceledState }; return { ok: true, data: canceledState };
} }
/** 从 chrome.storage.local 读取当前爬取任务状态。 */
async function getCrawlTaskState(): Promise<CrawlTaskState | null> { async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
/** chrome.storage.local 返回的原始键值对象。 */
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY); const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
/** 取出的任务状态候选值,需要经过结构校验后才能使用。 */
const state = result[CRAWL_TASK_STORAGE_KEY]; const state = result[CRAWL_TASK_STORAGE_KEY];
return isCrawlTaskState(state) ? state : null; return isCrawlTaskState(state) ? state : null;
} }
/** 将最新爬取任务状态写入 chrome.storage.local供 popup 和 content script 同步读取。 */
async function setCrawlTaskState(state: CrawlTaskState): Promise<void> { async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state }); await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
} }
/** 打开一个普通浏览器窗口承载目标平台页面。 */
function createCrawlWindow(url: string): Promise<chrome.windows.Window> { function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
chrome.windows.create( chrome.windows.create(
@@ -130,6 +151,7 @@ function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
height: 900, height: 900,
}, },
(windowInfo) => { (windowInfo) => {
/** Chrome 扩展 API 回调中的运行时错误。 */
const runtimeError = chrome.runtime.lastError; const runtimeError = chrome.runtime.lastError;
if (runtimeError) { if (runtimeError) {
@@ -148,6 +170,7 @@ function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
}); });
} }
/** 根据窗口 ID 关闭爬取窗口;关闭失败时不阻塞取消状态写入。 */
function removeWindow(windowId: number): Promise<void> { function removeWindow(windowId: number): Promise<void> {
return new Promise((resolve) => { return new Promise((resolve) => {
chrome.windows.remove(windowId, () => { chrome.windows.remove(windowId, () => {
@@ -156,6 +179,7 @@ function removeWindow(windowId: number): Promise<void> {
}); });
} }
/** 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 */
function isCrawlTaskState(value: unknown): value is CrawlTaskState { function isCrawlTaskState(value: unknown): value is CrawlTaskState {
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value; return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
} }

View File

@@ -1,26 +1,40 @@
import type { CrawlTaskState } from '@/types'; import type { CrawlTaskState } from '@/types';
/** 启动爬取任务的后台消息。 */
export interface StartCrawlCommand { export interface StartCrawlCommand {
/** 消息动作类型:请求 background 创建爬取窗口并初始化任务状态。 */
action: 'START_CRAWL'; action: 'START_CRAWL';
/** 启动爬取所需参数。 */
payload: { payload: {
/** 当前要爬取的平台 ID对应 config/platforms.ts 中的平台配置。 */
platformId: string; platformId: string;
}; };
} }
/** 获取当前爬取任务状态的后台消息。 */
export interface GetCrawlStateCommand { export interface GetCrawlStateCommand {
/** 消息动作类型:请求 background 返回当前任务快照。 */
action: 'GET_CRAWL_STATE'; action: 'GET_CRAWL_STATE';
} }
/** 取消当前爬取任务的后台消息。 */
export interface CancelCrawlCommand { export interface CancelCrawlCommand {
/** 消息动作类型:请求 background 标记任务取消并关闭爬取窗口。 */
action: 'CANCEL_CRAWL'; action: 'CANCEL_CRAWL';
} }
/** popup/content script 能发送给 background 的全部消息类型。 */
export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand; export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand;
/** background 统一响应结构。 */
export interface BackgroundResponse<T = unknown> { export interface BackgroundResponse<T = unknown> {
/** 当前请求是否处理成功。 */
ok: boolean; ok: boolean;
/** 成功或部分失败时返回的业务数据。 */
data?: T; data?: T;
/** 请求失败时返回的错误文案。 */
error?: string; error?: string;
} }
/** 获取或变更爬取任务后返回的响应结构。 */
export type CrawlStateResponse = BackgroundResponse<CrawlTaskState | null>; export type CrawlStateResponse = BackgroundResponse<CrawlTaskState | null>;

View File

@@ -2,218 +2,234 @@
import { computed, onMounted, onUnmounted, ref } from 'vue'; import { computed, onMounted, onUnmounted, ref } from 'vue';
import type { CrawlTaskState } from '@/types'; import type { CrawlTaskState } from '@/types';
/** 当前后台保存的爬取任务快照,用于决定是否展示右下角浮窗。 */
const crawlState = ref<CrawlTaskState | null>(null); const crawlState = ref<CrawlTaskState | null>(null);
/** 当前爬取任务已经运行的秒数,页面上会格式化为 mm:ss。 */
const elapsedSeconds = ref(0); const elapsedSeconds = ref(0);
/** 控制右下角时间轴面板是否展开。 */
const isPanelOpen = ref(false); const isPanelOpen = ref(false);
/** 轮询后台爬取状态和刷新计时器的定时器 ID。 */
let timer: number | undefined; let timer: number | undefined;
/** 只有任务处于运行中时,才在网页右下角展示计时按钮。 */
const isVisible = computed(() => crawlState.value?.status === 'running'); const isVisible = computed(() => crawlState.value?.status === 'running');
/** 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。 */
onMounted(() => { onMounted(() => {
void refreshCrawlState();
timer = window.setInterval(() => {
updateElapsedSeconds();
void refreshCrawlState(); void refreshCrawlState();
}, 1000); timer = window.setInterval(() => {
updateElapsedSeconds();
void refreshCrawlState();
}, 1000);
}); });
/** 内容脚本卸载时清理定时器,避免页面残留轮询。 */
onUnmounted(() => { onUnmounted(() => {
if (timer) { if (timer) {
window.clearInterval(timer); window.clearInterval(timer);
} }
}); });
/** 从 background 获取最新爬取任务状态,并在任务结束时自动收起面板。 */
async function refreshCrawlState() { async function refreshCrawlState() {
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' }); /** background 返回的当前爬取任务状态响应。 */
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' });
if (response.ok) { if (response.ok) {
crawlState.value = response.data ?? null; crawlState.value = response.data ?? null;
updateElapsedSeconds(); updateElapsedSeconds();
if (!isVisible.value) { if (!isVisible.value) {
isPanelOpen.value = false; isPanelOpen.value = false;
}
} }
}
} }
/** 根据任务开始时间实时计算已经运行的秒数。 */
function updateElapsedSeconds() { function updateElapsedSeconds() {
if (!crawlState.value) { if (!crawlState.value) {
elapsedSeconds.value = 0; elapsedSeconds.value = 0;
return; return;
} }
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000)); elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000));
} }
/** 将秒数格式化为 mm:ss展示在圆形计时按钮和面板标题里。 */
function formatElapsed(totalSeconds: number): string { function formatElapsed(totalSeconds: number): string {
const minutes = Math.floor(totalSeconds / 60).toString().padStart(2, '0'); /** 运行时长中的分钟部分。 */
const seconds = (totalSeconds % 60).toString().padStart(2, '0'); const minutes = Math.floor(totalSeconds / 60).toString().padStart(2, '0');
return `${minutes}:${seconds}`; /** 运行时长中的秒数部分。 */
const seconds = (totalSeconds % 60).toString().padStart(2, '0');
return `${minutes}:${seconds}`;
} }
/** 将步骤状态枚举转换成中文展示文案。 */
function getStepText(status: string): string { function getStepText(status: string): string {
const textMap: Record<string, string> = { /** 步骤状态到展示文案的映射表。 */
pending: '等待中', const textMap: Record<string, string> = {
running: '爬取中', pending: '等待中',
success: '已完成', running: '爬取中',
failed: '爬取失败', success: '已完成',
}; failed: '爬取失败',
};
return textMap[status] ?? status; return textMap[status] ?? status;
} }
/** 发送消息到 background非扩展环境下返回空成功响应方便本地页面不报错。 */
function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data?: T; error?: string }> { function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data?: T; error?: string }> {
if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) { if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) {
return Promise.resolve({ ok: true, data: null as T }); return Promise.resolve({ ok: true, data: null as T });
} }
return chrome.runtime.sendMessage(message); return chrome.runtime.sendMessage(message);
} }
</script> </script>
<template> <template>
<div v-if="isVisible && crawlState" class="dianshan-crawl-widget"> <div v-if="isVisible && crawlState" class="dianshan-crawl-widget">
<button class="dianshan-crawl-button" type="button" @click="isPanelOpen = !isPanelOpen"> <button class="dianshan-crawl-button" type="button" @click="isPanelOpen = !isPanelOpen">
{{ formatElapsed(elapsedSeconds) }} {{ formatElapsed(elapsedSeconds) }}
</button> </button>
<section v-if="isPanelOpen" class="dianshan-crawl-panel"> <section v-if="isPanelOpen" class="dianshan-crawl-panel">
<header class="dianshan-crawl-header"> <header class="dianshan-crawl-header">
<div> <div>
<p>{{ crawlState.platformName }}</p> <p>{{ crawlState.platformName }}</p>
<span>已运行 {{ formatElapsed(elapsedSeconds) }}</span> <span>已运行 {{ formatElapsed(elapsedSeconds) }}</span>
</div> </div>
</header> </header>
<ol class="dianshan-crawl-timeline"> <ol class="dianshan-crawl-timeline">
<li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`"> <li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`">
<span class="dianshan-crawl-dot"></span> <span class="dianshan-crawl-dot"></span>
<div class="dianshan-crawl-step"> <div class="dianshan-crawl-step">
<strong>{{ index + 1 }}. {{ step.name }}</strong> <strong>{{ index + 1 }}. {{ step.name }}</strong>
<em>{{ getStepText(step.status) }}</em> <em>{{ getStepText(step.status) }}</em>
<small v-if="step.message">{{ step.message }}</small> <small v-if="step.message">{{ step.message }}</small>
</div> </div>
</li> </li>
</ol> </ol>
</section> </section>
</div> </div>
</template> </template>
<style scoped> <style scoped>
.dianshan-crawl-widget { .dianshan-crawl-widget {
position: fixed; position: fixed;
right: 24px; right: 24px;
bottom: 24px; bottom: 24px;
z-index: 2147483647; z-index: 2147483647;
font-family: "Microsoft YaHei", "PingFang SC", Arial, sans-serif; font-family: "Microsoft YaHei", "PingFang SC", Arial, sans-serif;
} }
.dianshan-crawl-button { .dianshan-crawl-button {
width: 64px; width: 64px;
height: 64px; height: 64px;
border: 0; border: 0;
border-radius: 50%; border-radius: 50%;
color: #ffffff; color: #ffffff;
background: #059669; background: #059669;
box-shadow: 0 12px 30px rgba(15, 23, 42, 0.28); box-shadow: 0 12px 30px rgba(15, 23, 42, 0.28);
cursor: pointer; cursor: pointer;
font-size: 14px; font-size: 14px;
font-weight: 700; font-weight: 700;
} }
.dianshan-crawl-panel { .dianshan-crawl-panel {
position: absolute; position: absolute;
right: 0; right: 0;
bottom: 76px; bottom: 76px;
width: 300px; width: 300px;
padding: 16px; padding: 16px;
border: 1px solid #dbe3ea; border: 1px solid #dbe3ea;
border-radius: 8px; border-radius: 8px;
color: #172033; color: #172033;
background: #ffffff; background: #ffffff;
box-shadow: 0 16px 40px rgba(15, 23, 42, 0.2); box-shadow: 0 16px 40px rgba(15, 23, 42, 0.2);
} }
.dianshan-crawl-header { .dianshan-crawl-header {
margin-bottom: 14px; margin-bottom: 14px;
} }
.dianshan-crawl-header p { .dianshan-crawl-header p {
margin: 0 0 4px; margin: 0 0 4px;
font-size: 15px; font-size: 15px;
font-weight: 700; font-weight: 700;
} }
.dianshan-crawl-header span { .dianshan-crawl-header span {
color: #64748b; color: #64748b;
font-size: 12px; font-size: 12px;
} }
.dianshan-crawl-timeline { .dianshan-crawl-timeline {
display: grid; display: grid;
gap: 12px; gap: 12px;
margin: 0; margin: 0;
padding: 0; padding: 0;
list-style: none; list-style: none;
} }
.dianshan-crawl-timeline li { .dianshan-crawl-timeline li {
position: relative; position: relative;
padding-left: 18px; padding-left: 18px;
border-left: 2px solid #dbe3ea; border-left: 2px solid #dbe3ea;
} }
.dianshan-crawl-dot { .dianshan-crawl-dot {
position: absolute; position: absolute;
top: 8px; top: 8px;
left: -7px; left: -7px;
width: 12px; width: 12px;
height: 12px; height: 12px;
border: 2px solid #ffffff; border: 2px solid #ffffff;
border-radius: 50%; border-radius: 50%;
background: #cbd5e1; background: #cbd5e1;
} }
.dianshan-crawl-step { .dianshan-crawl-step {
display: grid; display: grid;
gap: 4px; gap: 4px;
padding: 10px; padding: 10px;
border: 1px solid #dbe3ea; border: 1px solid #dbe3ea;
border-radius: 8px; border-radius: 8px;
background: #ffffff; background: #ffffff;
} }
.dianshan-crawl-step strong { .dianshan-crawl-step strong {
font-size: 13px; font-size: 13px;
} }
.dianshan-crawl-step em { .dianshan-crawl-step em {
color: #64748b; color: #64748b;
font-size: 12px; font-size: 12px;
font-style: normal; font-style: normal;
} }
.dianshan-crawl-step small { .dianshan-crawl-step small {
color: #b91c1c; color: #b91c1c;
} }
.is-running .dianshan-crawl-dot, .is-running .dianshan-crawl-dot,
.is-success .dianshan-crawl-dot { .is-success .dianshan-crawl-dot {
background: #10b981; background: #10b981;
} }
.is-running .dianshan-crawl-step, .is-running .dianshan-crawl-step,
.is-success .dianshan-crawl-step { .is-success .dianshan-crawl-step {
border-color: #10b981; border-color: #10b981;
background: #ecfdf5; background: #ecfdf5;
} }
.is-failed .dianshan-crawl-dot { .is-failed .dianshan-crawl-dot {
background: #ef4444; background: #ef4444;
} }
.is-failed .dianshan-crawl-step { .is-failed .dianshan-crawl-step {
border-color: #ef4444; border-color: #ef4444;
background: #fef2f2; background: #fef2f2;
} }
</style> </style>

View File

@@ -3,22 +3,24 @@ import App from './App.vue';
/** 将内容脚本应用挂载到页面中。 */ /** 将内容脚本应用挂载到页面中。 */
function mountApp() { function mountApp() {
if (document.getElementById('dianshan-crx-root')) { if (document.getElementById('dianshan-crx-root')) {
return; return;
} }
const container = document.createElement('div'); /** 内容脚本在宿主页面中的根容器,用于避免污染业务页面结构。 */
container.id = 'dianshan-crx-root'; const container = document.createElement('div');
const appRoot = document.createElement('div'); container.id = 'dianshan-crx-root';
/** Vue 应用实际挂载的节点。 */
const appRoot = document.createElement('div');
container.appendChild(appRoot); container.appendChild(appRoot);
document.body.appendChild(container); document.body.appendChild(container);
createApp(App).mount(appRoot); createApp(App).mount(appRoot);
} }
if (document.readyState === 'loading') { if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', mountApp, { once: true }); document.addEventListener('DOMContentLoaded', mountApp, { once: true });
} else { } else {
mountApp(); mountApp();
} }

View File

@@ -46,3 +46,7 @@ src:.
- 点击圆形正计时时出现一个popup内容如下 - 点击圆形正计时时出现一个popup内容如下
- 以时间轴的形式表示当前爬取进度根据platforms.ts中的steps - 以时间轴的形式表示当前爬取进度根据platforms.ts中的steps
- 同时点击扩展的popup里的内容也变得和上面的时间轴内容一致显示爬取进度隐藏立即爬取等按钮 - 同时点击扩展的popup里的内容也变得和上面的时间轴内容一致显示爬取进度隐藏立即爬取等按钮
3.前提1和2都已完成ui和交互操作上ok
- 开始爬取网页中的数据查看message.js内容吧里面的爬取方法都提取出来放到background/domScraper.ts中去
- 基于2每次根据steps打开一个新网页后根据它的fields数组字段调用domScraper中的方法来提取数据并打印到控制台即可