1
This commit is contained in:
0
src/background/domScraper.ts
Normal file
0
src/background/domScraper.ts
Normal file
@@ -2,19 +2,24 @@ import { getPlatformById } from '@/config/platforms';
|
|||||||
import type { CrawlProgressStep, CrawlTaskState } from '@/types';
|
import type { CrawlProgressStep, CrawlTaskState } from '@/types';
|
||||||
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from './types';
|
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from './types';
|
||||||
|
|
||||||
|
/** chrome.storage.local 中保存当前爬取任务状态的键名。 */
|
||||||
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
const CRAWL_TASK_STORAGE_KEY = 'crawlTaskState';
|
||||||
|
|
||||||
|
/** 扩展安装完成时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
||||||
export async function handleInstalled(): Promise<void> {
|
export async function handleInstalled(): Promise<void> {
|
||||||
console.log('[background] installed');
|
console.log('[background] installed');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 浏览器启动并加载扩展时的初始化入口,当前仅保留日志方便调试生命周期。 */
|
||||||
export async function handleStartup(): Promise<void> {
|
export async function handleStartup(): Promise<void> {
|
||||||
console.log('[background] startup');
|
console.log('[background] startup');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 监听窗口关闭事件;如果关闭的是爬取窗口,就把当前任务标记为取消。 */
|
||||||
export async function handleWindowRemoved(windowId: number): Promise<void> {
|
export async function handleWindowRemoved(windowId: number): Promise<void> {
|
||||||
console.log('[background] window removed', windowId);
|
console.log('[background] window removed', windowId);
|
||||||
|
|
||||||
|
/** 当前保存的爬取任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
if (state?.windowId === windowId && state.status === 'running') {
|
if (state?.windowId === windowId && state.status === 'running') {
|
||||||
@@ -28,6 +33,7 @@ export async function handleWindowRemoved(windowId: number): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 根据 popup/content 发来的 action 分发到对应的后台处理函数。 */
|
||||||
export async function handleBackgroundCommand(
|
export async function handleBackgroundCommand(
|
||||||
message: BackgroundCommand,
|
message: BackgroundCommand,
|
||||||
): Promise<BackgroundResponse | CrawlStateResponse> {
|
): Promise<BackgroundResponse | CrawlStateResponse> {
|
||||||
@@ -43,14 +49,18 @@ export async function handleBackgroundCommand(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。 */
|
||||||
async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
||||||
|
/** 根据平台 ID 找到对应的平台爬取配置。 */
|
||||||
const platform = getPlatformById(platformId);
|
const platform = getPlatformById(platformId);
|
||||||
|
|
||||||
if (!platform) {
|
if (!platform) {
|
||||||
return { ok: false, error: '平台配置不存在' };
|
return { ok: false, error: '平台配置不存在' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 当前任务的开始时间戳,用于计算正计时。 */
|
||||||
const startedAt = Date.now();
|
const startedAt = Date.now();
|
||||||
|
/** 窗口创建前的初始任务状态,先写入 storage 让所有页面能立即感知爬取开始。 */
|
||||||
const nextState: CrawlTaskState = {
|
const nextState: CrawlTaskState = {
|
||||||
id: `${platform.id}-${startedAt}`,
|
id: `${platform.id}-${startedAt}`,
|
||||||
platformId: platform.id,
|
platformId: platform.id,
|
||||||
@@ -68,11 +78,14 @@ async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
|||||||
await setCrawlTaskState(nextState);
|
await setCrawlTaskState(nextState);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
/** background 创建出来的目标平台窗口信息。 */
|
||||||
const windowInfo = await createCrawlWindow(platform.baseUrl);
|
const windowInfo = await createCrawlWindow(platform.baseUrl);
|
||||||
|
/** 补充 windowId 后的任务状态,后续可用于取消或监听窗口关闭。 */
|
||||||
const stateWithWindow = { ...nextState, windowId: windowInfo.id };
|
const stateWithWindow = { ...nextState, windowId: windowInfo.id };
|
||||||
await setCrawlTaskState(stateWithWindow);
|
await setCrawlTaskState(stateWithWindow);
|
||||||
return { ok: true, data: stateWithWindow };
|
return { ok: true, data: stateWithWindow };
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
|
/** 窗口创建失败时写入的失败状态,供 popup/content 显示错误进度。 */
|
||||||
const failedState: CrawlTaskState = {
|
const failedState: CrawlTaskState = {
|
||||||
...nextState,
|
...nextState,
|
||||||
status: 'failed',
|
status: 'failed',
|
||||||
@@ -85,13 +98,16 @@ async function startCrawl(platformId: string): Promise<CrawlStateResponse> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 取消当前爬取任务,并尝试关闭正在爬取的平台窗口。 */
|
||||||
async function cancelCrawl(): Promise<CrawlStateResponse> {
|
async function cancelCrawl(): Promise<CrawlStateResponse> {
|
||||||
|
/** 当前保存的爬取任务状态。 */
|
||||||
const state = await getCrawlTaskState();
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
if (!state) {
|
if (!state) {
|
||||||
return { ok: true, data: null };
|
return { ok: true, data: null };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 用户取消后的任务状态,当前执行步骤会显示为失败并附带取消原因。 */
|
||||||
const canceledState: CrawlTaskState = {
|
const canceledState: CrawlTaskState = {
|
||||||
...state,
|
...state,
|
||||||
status: 'canceled',
|
status: 'canceled',
|
||||||
@@ -109,16 +125,21 @@ async function cancelCrawl(): Promise<CrawlStateResponse> {
|
|||||||
return { ok: true, data: canceledState };
|
return { ok: true, data: canceledState };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 从 chrome.storage.local 读取当前爬取任务状态。 */
|
||||||
async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
async function getCrawlTaskState(): Promise<CrawlTaskState | null> {
|
||||||
|
/** chrome.storage.local 返回的原始键值对象。 */
|
||||||
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
const result = await chrome.storage.local.get(CRAWL_TASK_STORAGE_KEY);
|
||||||
|
/** 取出的任务状态候选值,需要经过结构校验后才能使用。 */
|
||||||
const state = result[CRAWL_TASK_STORAGE_KEY];
|
const state = result[CRAWL_TASK_STORAGE_KEY];
|
||||||
return isCrawlTaskState(state) ? state : null;
|
return isCrawlTaskState(state) ? state : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 将最新爬取任务状态写入 chrome.storage.local,供 popup 和 content script 同步读取。 */
|
||||||
async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
async function setCrawlTaskState(state: CrawlTaskState): Promise<void> {
|
||||||
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
|
await chrome.storage.local.set({ [CRAWL_TASK_STORAGE_KEY]: state });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 打开一个普通浏览器窗口承载目标平台页面。 */
|
||||||
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.windows.create(
|
chrome.windows.create(
|
||||||
@@ -130,6 +151,7 @@ function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
|||||||
height: 900,
|
height: 900,
|
||||||
},
|
},
|
||||||
(windowInfo) => {
|
(windowInfo) => {
|
||||||
|
/** Chrome 扩展 API 回调中的运行时错误。 */
|
||||||
const runtimeError = chrome.runtime.lastError;
|
const runtimeError = chrome.runtime.lastError;
|
||||||
|
|
||||||
if (runtimeError) {
|
if (runtimeError) {
|
||||||
@@ -148,6 +170,7 @@ function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 根据窗口 ID 关闭爬取窗口;关闭失败时不阻塞取消状态写入。 */
|
||||||
function removeWindow(windowId: number): Promise<void> {
|
function removeWindow(windowId: number): Promise<void> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
chrome.windows.remove(windowId, () => {
|
chrome.windows.remove(windowId, () => {
|
||||||
@@ -156,6 +179,7 @@ function removeWindow(windowId: number): Promise<void> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 粗略判断 storage 中读取到的值是否像一个爬取任务状态对象。 */
|
||||||
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
|
function isCrawlTaskState(value: unknown): value is CrawlTaskState {
|
||||||
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
|
return typeof value === 'object' && value !== null && 'id' in value && 'steps' in value;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,26 +1,40 @@
|
|||||||
import type { CrawlTaskState } from '@/types';
|
import type { CrawlTaskState } from '@/types';
|
||||||
|
|
||||||
|
/** 启动爬取任务的后台消息。 */
|
||||||
export interface StartCrawlCommand {
|
export interface StartCrawlCommand {
|
||||||
|
/** 消息动作类型:请求 background 创建爬取窗口并初始化任务状态。 */
|
||||||
action: 'START_CRAWL';
|
action: 'START_CRAWL';
|
||||||
|
/** 启动爬取所需参数。 */
|
||||||
payload: {
|
payload: {
|
||||||
|
/** 当前要爬取的平台 ID,对应 config/platforms.ts 中的平台配置。 */
|
||||||
platformId: string;
|
platformId: string;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 获取当前爬取任务状态的后台消息。 */
|
||||||
export interface GetCrawlStateCommand {
|
export interface GetCrawlStateCommand {
|
||||||
|
/** 消息动作类型:请求 background 返回当前任务快照。 */
|
||||||
action: 'GET_CRAWL_STATE';
|
action: 'GET_CRAWL_STATE';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 取消当前爬取任务的后台消息。 */
|
||||||
export interface CancelCrawlCommand {
|
export interface CancelCrawlCommand {
|
||||||
|
/** 消息动作类型:请求 background 标记任务取消并关闭爬取窗口。 */
|
||||||
action: 'CANCEL_CRAWL';
|
action: 'CANCEL_CRAWL';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** popup/content script 能发送给 background 的全部消息类型。 */
|
||||||
export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand;
|
export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand;
|
||||||
|
|
||||||
|
/** background 统一响应结构。 */
|
||||||
export interface BackgroundResponse<T = unknown> {
|
export interface BackgroundResponse<T = unknown> {
|
||||||
|
/** 当前请求是否处理成功。 */
|
||||||
ok: boolean;
|
ok: boolean;
|
||||||
|
/** 成功或部分失败时返回的业务数据。 */
|
||||||
data?: T;
|
data?: T;
|
||||||
|
/** 请求失败时返回的错误文案。 */
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 获取或变更爬取任务后返回的响应结构。 */
|
||||||
export type CrawlStateResponse = BackgroundResponse<CrawlTaskState | null>;
|
export type CrawlStateResponse = BackgroundResponse<CrawlTaskState | null>;
|
||||||
|
|||||||
@@ -2,13 +2,19 @@
|
|||||||
import { computed, onMounted, onUnmounted, ref } from 'vue';
|
import { computed, onMounted, onUnmounted, ref } from 'vue';
|
||||||
import type { CrawlTaskState } from '@/types';
|
import type { CrawlTaskState } from '@/types';
|
||||||
|
|
||||||
|
/** 当前后台保存的爬取任务快照,用于决定是否展示右下角浮窗。 */
|
||||||
const crawlState = ref<CrawlTaskState | null>(null);
|
const crawlState = ref<CrawlTaskState | null>(null);
|
||||||
|
/** 当前爬取任务已经运行的秒数,页面上会格式化为 mm:ss。 */
|
||||||
const elapsedSeconds = ref(0);
|
const elapsedSeconds = ref(0);
|
||||||
|
/** 控制右下角时间轴面板是否展开。 */
|
||||||
const isPanelOpen = ref(false);
|
const isPanelOpen = ref(false);
|
||||||
|
/** 轮询后台爬取状态和刷新计时器的定时器 ID。 */
|
||||||
let timer: number | undefined;
|
let timer: number | undefined;
|
||||||
|
|
||||||
|
/** 只有任务处于运行中时,才在网页右下角展示计时按钮。 */
|
||||||
const isVisible = computed(() => crawlState.value?.status === 'running');
|
const isVisible = computed(() => crawlState.value?.status === 'running');
|
||||||
|
|
||||||
|
/** 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。 */
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
void refreshCrawlState();
|
void refreshCrawlState();
|
||||||
timer = window.setInterval(() => {
|
timer = window.setInterval(() => {
|
||||||
@@ -17,13 +23,16 @@ onMounted(() => {
|
|||||||
}, 1000);
|
}, 1000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/** 内容脚本卸载时清理定时器,避免页面残留轮询。 */
|
||||||
onUnmounted(() => {
|
onUnmounted(() => {
|
||||||
if (timer) {
|
if (timer) {
|
||||||
window.clearInterval(timer);
|
window.clearInterval(timer);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/** 从 background 获取最新爬取任务状态,并在任务结束时自动收起面板。 */
|
||||||
async function refreshCrawlState() {
|
async function refreshCrawlState() {
|
||||||
|
/** background 返回的当前爬取任务状态响应。 */
|
||||||
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' });
|
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' });
|
||||||
|
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
@@ -36,6 +45,7 @@ async function refreshCrawlState() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 根据任务开始时间实时计算已经运行的秒数。 */
|
||||||
function updateElapsedSeconds() {
|
function updateElapsedSeconds() {
|
||||||
if (!crawlState.value) {
|
if (!crawlState.value) {
|
||||||
elapsedSeconds.value = 0;
|
elapsedSeconds.value = 0;
|
||||||
@@ -45,13 +55,18 @@ function updateElapsedSeconds() {
|
|||||||
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000));
|
elapsedSeconds.value = Math.max(0, Math.floor((Date.now() - crawlState.value.startedAt) / 1000));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 将秒数格式化为 mm:ss,展示在圆形计时按钮和面板标题里。 */
|
||||||
function formatElapsed(totalSeconds: number): string {
|
function formatElapsed(totalSeconds: number): string {
|
||||||
|
/** 运行时长中的分钟部分。 */
|
||||||
const minutes = Math.floor(totalSeconds / 60).toString().padStart(2, '0');
|
const minutes = Math.floor(totalSeconds / 60).toString().padStart(2, '0');
|
||||||
|
/** 运行时长中的秒数部分。 */
|
||||||
const seconds = (totalSeconds % 60).toString().padStart(2, '0');
|
const seconds = (totalSeconds % 60).toString().padStart(2, '0');
|
||||||
return `${minutes}:${seconds}`;
|
return `${minutes}:${seconds}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 将步骤状态枚举转换成中文展示文案。 */
|
||||||
function getStepText(status: string): string {
|
function getStepText(status: string): string {
|
||||||
|
/** 步骤状态到展示文案的映射表。 */
|
||||||
const textMap: Record<string, string> = {
|
const textMap: Record<string, string> = {
|
||||||
pending: '等待中',
|
pending: '等待中',
|
||||||
running: '爬取中',
|
running: '爬取中',
|
||||||
@@ -62,6 +77,7 @@ function getStepText(status: string): string {
|
|||||||
return textMap[status] ?? status;
|
return textMap[status] ?? status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 发送消息到 background;非扩展环境下返回空成功响应,方便本地页面不报错。 */
|
||||||
function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data?: T; error?: string }> {
|
function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data?: T; error?: string }> {
|
||||||
if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) {
|
if (typeof chrome === 'undefined' || !chrome.runtime?.sendMessage) {
|
||||||
return Promise.resolve({ ok: true, data: null as T });
|
return Promise.resolve({ ok: true, data: null as T });
|
||||||
|
|||||||
@@ -7,8 +7,10 @@ function mountApp() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 内容脚本在宿主页面中的根容器,用于避免污染业务页面结构。 */
|
||||||
const container = document.createElement('div');
|
const container = document.createElement('div');
|
||||||
container.id = 'dianshan-crx-root';
|
container.id = 'dianshan-crx-root';
|
||||||
|
/** Vue 应用实际挂载的节点。 */
|
||||||
const appRoot = document.createElement('div');
|
const appRoot = document.createElement('div');
|
||||||
|
|
||||||
container.appendChild(appRoot);
|
container.appendChild(appRoot);
|
||||||
|
|||||||
4
step.md
4
step.md
@@ -46,3 +46,7 @@ src:.
|
|||||||
- 点击圆形正计时时,出现一个popup,内容如下
|
- 点击圆形正计时时,出现一个popup,内容如下
|
||||||
- 以时间轴的形式,表示当前爬取进度,即:根据platforms.ts中的steps
|
- 以时间轴的形式,表示当前爬取进度,即:根据platforms.ts中的steps
|
||||||
- 同时点击扩展的popup里的内容,也变得和上面的时间轴内容一致,显示爬取进度,隐藏立即爬取等按钮,
|
- 同时点击扩展的popup里的内容,也变得和上面的时间轴内容一致,显示爬取进度,隐藏立即爬取等按钮,
|
||||||
|
|
||||||
|
3.前提:1和2都已完成,ui和交互操作上ok
|
||||||
|
- 开始爬取网页中的数据,查看message.js内容,吧里面的爬取方法都提取出来放到background/domScraper.ts中去,
|
||||||
|
- 基于2,每次根据steps打开一个新网页后,根据它的fields数组字段,调用domScraper中的方法,来提取数据,并打印到控制台即可
|
||||||
Reference in New Issue
Block a user