1
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,6 +9,7 @@ lerna-debug.log*
|
|||||||
|
|
||||||
node_modules
|
node_modules
|
||||||
dist
|
dist
|
||||||
|
storeai-extension-v0.1.0
|
||||||
dist-ssr
|
dist-ssr
|
||||||
*.local
|
*.local
|
||||||
|
|
||||||
|
|||||||
@@ -19,5 +19,3 @@
|
|||||||
7.在窗口中记得显示一个取消按钮,点击后关闭窗口,取消爬取
|
7.在窗口中记得显示一个取消按钮,点击后关闭窗口,取消爬取
|
||||||
|
|
||||||
|
|
||||||
# 具体代码实现流程
|
|
||||||
请阅读./step.md文档,并严格按照步骤进行执行
|
|
||||||
@@ -69,7 +69,7 @@ async function autoClick(config: PlatformFieldConfig, rootDom: ParentNode): Prom
|
|||||||
/**
|
/**
|
||||||
* 递归处理字段配置,支持普通字段、嵌套 row、列表和表格。
|
* 递归处理字段配置,支持普通字段、嵌套 row、列表和表格。
|
||||||
*/
|
*/
|
||||||
async function processFields(columns: PlatformFieldConfig[], rootDom: ParentNode): Promise<DomScrapeResult> {
|
export async function processFields(columns: PlatformFieldConfig[], rootDom: ParentNode): Promise<DomScrapeResult> {
|
||||||
const result: DomScrapeResult = {};
|
const result: DomScrapeResult = {};
|
||||||
|
|
||||||
for (const item of columns) {
|
for (const item of columns) {
|
||||||
|
|||||||
@@ -1,9 +1,16 @@
|
|||||||
import { getPlatformById } from '@/config/platforms';
|
import { getPlatformById } from '@/config/platforms';
|
||||||
import type { CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types';
|
import type { CrawlPauseInfo, CrawlProgressStep, CrawlTaskState, PlatformConfig, PlatformStepConfig } from '@/types';
|
||||||
import { scrapeDomFields, type DomScrapeResult } from '../domScraper';
|
import type { DomScrapeResult } from '../domScraper';
|
||||||
import type { CrawlStateResponse } from '../types';
|
import type { CrawlStateResponse } from '../types';
|
||||||
import { getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState';
|
import { getCrawlTaskState, setCrawlTaskState, updateCrawlTaskState } from './taskState';
|
||||||
|
|
||||||
|
interface PageRunnerResponse {
|
||||||
|
ok: boolean;
|
||||||
|
data?: DomScrapeResult | null;
|
||||||
|
interrupt?: CrawlPauseInfo;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
|
* 创建新的爬取任务,打开目标平台窗口,并把初始时间轴状态写入 storage。
|
||||||
*/
|
*/
|
||||||
@@ -86,6 +93,29 @@ export async function cancelCrawl(): Promise<CrawlStateResponse> {
|
|||||||
return { ok: true, data: canceledState };
|
return { ok: true, data: canceledState };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 用户处理完登录、验证码或风控后,恢复当前暂停中的爬取任务。
|
||||||
|
*/
|
||||||
|
export async function resumeCrawl(): Promise<CrawlStateResponse> {
|
||||||
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
|
if (!state || state.status !== 'paused') {
|
||||||
|
return { ok: true, data: state };
|
||||||
|
}
|
||||||
|
|
||||||
|
const resumedState: CrawlTaskState = {
|
||||||
|
...state,
|
||||||
|
status: 'running',
|
||||||
|
pause: undefined,
|
||||||
|
steps: state.steps.map((step, index) =>
|
||||||
|
index === state.currentStepIndex ? { ...step, status: 'running', message: undefined } : step,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
await setCrawlTaskState(resumedState);
|
||||||
|
return { ok: true, data: resumedState };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。
|
* 窗口关闭后,如果关闭的是爬取窗口,就把当前任务标记为取消。
|
||||||
*/
|
*/
|
||||||
@@ -114,53 +144,81 @@ async function runCrawlSteps(platform: PlatformConfig, initialState: CrawlTaskSt
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tabId = await getWindowActiveTabId(initialState.windowId);
|
|
||||||
|
|
||||||
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
|
for (let stepIndex = 0; stepIndex < platform.steps.length; stepIndex += 1) {
|
||||||
const step = platform.steps[stepIndex];
|
const step = platform.steps[stepIndex];
|
||||||
const currentState = await getCrawlTaskState();
|
let shouldRetryStep = true;
|
||||||
|
|
||||||
if (currentState?.id !== initialState.id || currentState.status !== 'running') {
|
while (shouldRetryStep) {
|
||||||
return;
|
const currentState = await getCrawlTaskState();
|
||||||
}
|
|
||||||
|
|
||||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
if (currentState?.id !== initialState.id || currentState.status === 'canceled') {
|
||||||
...state,
|
return;
|
||||||
currentStepIndex: stepIndex,
|
}
|
||||||
status: 'running',
|
|
||||||
steps: state.steps.map((item, index) => ({
|
|
||||||
...item,
|
|
||||||
status: index === stepIndex ? 'running' : item.status,
|
|
||||||
message: index === stepIndex ? undefined : item.message,
|
|
||||||
})),
|
|
||||||
}));
|
|
||||||
|
|
||||||
await chrome.tabs.update(tabId, { url: step.url, active: true });
|
if (currentState.status === 'paused') {
|
||||||
await waitForTabLoaded(tabId);
|
const resumed = await waitUntilResumed(initialState.id);
|
||||||
|
|
||||||
const isReady = await waitForStepReady(tabId, step);
|
if (!resumed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!isReady) {
|
|
||||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||||
...state,
|
...state,
|
||||||
status: 'failed',
|
|
||||||
currentStepIndex: stepIndex,
|
currentStepIndex: stepIndex,
|
||||||
|
status: 'running',
|
||||||
|
pause: undefined,
|
||||||
|
steps: state.steps.map((item, index) => ({
|
||||||
|
...item,
|
||||||
|
status: index === stepIndex ? 'running' : item.status,
|
||||||
|
message: index === stepIndex ? undefined : item.message,
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const tabId = await getWindowActiveTabId(initialState.windowId);
|
||||||
|
await chrome.tabs.update(tabId, { url: step.url, active: true });
|
||||||
|
await waitForTabLoaded(tabId);
|
||||||
|
|
||||||
|
const response = await scrapeStepInContent(tabId, step);
|
||||||
|
|
||||||
|
if (response.interrupt) {
|
||||||
|
await pauseForInterrupt(initialState.id, stepIndex, response.interrupt);
|
||||||
|
const resumed = await waitUntilResumed(initialState.id);
|
||||||
|
|
||||||
|
if (!resumed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const message = response.error ?? '页面抓取失败';
|
||||||
|
|
||||||
|
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||||
|
...state,
|
||||||
|
status: 'failed',
|
||||||
|
currentStepIndex: stepIndex,
|
||||||
|
steps: state.steps.map((item, index) =>
|
||||||
|
index === stepIndex ? { ...item, status: 'failed', message } : item,
|
||||||
|
),
|
||||||
|
}));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, response.data);
|
||||||
|
|
||||||
|
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||||
|
...state,
|
||||||
steps: state.steps.map((item, index) =>
|
steps: state.steps.map((item, index) =>
|
||||||
index === stepIndex ? { ...item, status: 'failed', message: '页面关键 DOM 未加载完成' } : item,
|
index === stepIndex
|
||||||
|
? { ...item, status: 'success', message: undefined, result: response.data }
|
||||||
|
: item,
|
||||||
),
|
),
|
||||||
}));
|
}));
|
||||||
return;
|
|
||||||
|
shouldRetryStep = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await scrapeStepFields(tabId, step);
|
|
||||||
console.log(`[crawl] ${platform.name} - ${step.name} 提取成功`, data);
|
|
||||||
|
|
||||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
|
||||||
...state,
|
|
||||||
steps: state.steps.map((item, index) =>
|
|
||||||
index === stepIndex ? { ...item, status: 'success', message: undefined } : item,
|
|
||||||
),
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await updateCrawlTaskState(initialState.id, (state) => ({
|
await updateCrawlTaskState(initialState.id, (state) => ({
|
||||||
@@ -198,55 +256,90 @@ async function getWindowActiveTabId(windowId: number): Promise<number> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 等待步骤配置中的 checkSelector 出现;第一次超时后刷新页面再重试一次。
|
* 让 content script 直接在目标页面执行检查和抓取。
|
||||||
*/
|
*/
|
||||||
async function waitForStepReady(tabId: number, step: PlatformStepConfig): Promise<boolean> {
|
async function scrapeStepInContent(tabId: number, step: PlatformStepConfig): Promise<PageRunnerResponse> {
|
||||||
if (await waitForSelector(tabId, step.checkSelector, 5000)) {
|
const startedAt = Date.now();
|
||||||
return true;
|
|
||||||
|
while (Date.now() - startedAt < 20000) {
|
||||||
|
const response = await sendPageRunnerMessage(tabId, {
|
||||||
|
action: 'SCRAPE_STEP',
|
||||||
|
payload: {
|
||||||
|
fields: step.fields,
|
||||||
|
checkSelector: step.checkSelector,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.ok || response.interrupt || !isPageRunnerNotReadyError(response.error)) {
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(500);
|
||||||
}
|
}
|
||||||
|
|
||||||
await chrome.tabs.reload(tabId);
|
return { ok: false, error: '页面脚本未响应,请刷新扩展后重试' };
|
||||||
await waitForTabLoaded(tabId);
|
|
||||||
|
|
||||||
return waitForSelector(tabId, step.checkSelector, 5000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 在目标页面轮询检查指定 selector 是否存在。
|
* 给目标页的 content script 发送页面执行消息。
|
||||||
*/
|
*/
|
||||||
async function waitForSelector(tabId: number, selector: string, timeoutMs: number): Promise<boolean> {
|
async function sendPageRunnerMessage(tabId: number, message: unknown): Promise<PageRunnerResponse> {
|
||||||
const startedAt = Date.now();
|
try {
|
||||||
|
const response = await chrome.tabs.sendMessage(tabId, message);
|
||||||
|
|
||||||
while (Date.now() - startedAt < timeoutMs) {
|
if (response && typeof response === 'object') {
|
||||||
const results = await chrome.scripting.executeScript({
|
return response as PageRunnerResponse;
|
||||||
target: { tabId },
|
}
|
||||||
func: (targetSelector: string) => Boolean(document.querySelector(targetSelector)),
|
|
||||||
args: [selector],
|
|
||||||
});
|
|
||||||
|
|
||||||
if (Boolean(results[0]?.result)) {
|
return { ok: false, error: '页面脚本返回为空' };
|
||||||
|
} catch (error: unknown) {
|
||||||
|
return { ok: false, error: error instanceof Error ? error.message : String(error) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断错误是否只是 content script 尚未注入完成。
|
||||||
|
*/
|
||||||
|
function isPageRunnerNotReadyError(error?: string): boolean {
|
||||||
|
if (!error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return /receiving end does not exist|could not establish connection|no receiving end/i.test(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 因登录、验证码或页面异常暂停当前任务。
|
||||||
|
*/
|
||||||
|
async function pauseForInterrupt(taskId: string, stepIndex: number, interrupt: CrawlPauseInfo): Promise<void> {
|
||||||
|
await updateCrawlTaskState(taskId, (state) => ({
|
||||||
|
...state,
|
||||||
|
status: 'paused',
|
||||||
|
pause: interrupt,
|
||||||
|
currentStepIndex: stepIndex,
|
||||||
|
steps: state.steps.map((step, index) =>
|
||||||
|
index === stepIndex ? { ...step, status: 'running', message: interrupt.message } : step,
|
||||||
|
),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 暂停后等待用户点继续或取消。
|
||||||
|
*/
|
||||||
|
async function waitUntilResumed(taskId: string): Promise<boolean> {
|
||||||
|
while (true) {
|
||||||
|
const state = await getCrawlTaskState();
|
||||||
|
|
||||||
|
if (!state || state.id !== taskId || state.status === 'canceled' || state.status === 'failed') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.status === 'running') {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
await new Promise((resolve) => {
|
await sleep(1000);
|
||||||
globalThis.setTimeout(resolve, 500);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 注入 domScraper 到目标页面,并根据当前 step.fields 提取页面数据。
|
|
||||||
*/
|
|
||||||
async function scrapeStepFields(tabId: number, step: PlatformStepConfig): Promise<DomScrapeResult | null> {
|
|
||||||
const results = await chrome.scripting.executeScript({
|
|
||||||
target: { tabId },
|
|
||||||
func: scrapeDomFields,
|
|
||||||
args: [step.fields],
|
|
||||||
});
|
|
||||||
|
|
||||||
return results[0]?.result ?? null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -257,7 +350,7 @@ function createCrawlWindow(url: string): Promise<chrome.windows.Window> {
|
|||||||
chrome.windows.create(
|
chrome.windows.create(
|
||||||
{
|
{
|
||||||
url,
|
url,
|
||||||
type: 'popup',
|
type: 'normal',
|
||||||
focused: true,
|
focused: true,
|
||||||
width: 1280,
|
width: 1280,
|
||||||
height: 900,
|
height: 900,
|
||||||
@@ -302,3 +395,12 @@ function waitForTabLoaded(tabId: number): Promise<void> {
|
|||||||
chrome.tabs.onUpdated.addListener(handleUpdated);
|
chrome.tabs.onUpdated.addListener(handleUpdated);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 简单等待工具。
|
||||||
|
*/
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
globalThis.setTimeout(resolve, ms);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types';
|
import type { BackgroundCommand, BackgroundResponse, CrawlStateResponse } from '../types';
|
||||||
import { cancelCrawl, cancelCrawlWhenWindowRemoved, startCrawl } from './crawlTask';
|
import { cancelCrawl, cancelCrawlWhenWindowRemoved, resumeCrawl, startCrawl } from './crawlTask';
|
||||||
import { getCrawlTaskState } from './taskState';
|
import { getCrawlTaskState } from './taskState';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -37,6 +37,8 @@ export async function handleBackgroundCommand(
|
|||||||
return { ok: true, data: await getCrawlTaskState() };
|
return { ok: true, data: await getCrawlTaskState() };
|
||||||
case 'CANCEL_CRAWL':
|
case 'CANCEL_CRAWL':
|
||||||
return cancelCrawl();
|
return cancelCrawl();
|
||||||
|
case 'RESUME_CRAWL':
|
||||||
|
return resumeCrawl();
|
||||||
default:
|
default:
|
||||||
return { ok: false, error: '未知的后台指令' };
|
return { ok: false, error: '未知的后台指令' };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,8 +23,14 @@ export interface CancelCrawlCommand {
|
|||||||
action: 'CANCEL_CRAWL';
|
action: 'CANCEL_CRAWL';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 继续当前暂停中的爬取任务。
|
||||||
|
export interface ResumeCrawlCommand {
|
||||||
|
// 消息动作类型:用户已处理登录/验证码,允许 background 继续重试当前步骤。
|
||||||
|
action: 'RESUME_CRAWL';
|
||||||
|
}
|
||||||
|
|
||||||
// popup/content script 能发送给 background 的全部消息类型。
|
// popup/content script 能发送给 background 的全部消息类型。
|
||||||
export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand;
|
export type BackgroundCommand = StartCrawlCommand | GetCrawlStateCommand | CancelCrawlCommand | ResumeCrawlCommand;
|
||||||
|
|
||||||
// background 统一响应结构。
|
// background 统一响应结构。
|
||||||
export interface BackgroundResponse<T = unknown> {
|
export interface BackgroundResponse<T = unknown> {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ export const PLATFORM_CONFIGS: PlatformConfig[] = [
|
|||||||
name: '数据看板',
|
name: '数据看板',
|
||||||
uniqueKey: 'databoard',
|
uniqueKey: 'databoard',
|
||||||
url: 'https://seller.shopee.com.my/',
|
url: 'https://seller.shopee.com.my/',
|
||||||
checkSelector: '.rate-manager-content',
|
checkSelector: '.page-container',
|
||||||
fields: [
|
fields: [
|
||||||
{
|
{
|
||||||
label: "出货统计",
|
label: "出货统计",
|
||||||
@@ -119,6 +119,191 @@ export const PLATFORM_CONFIGS: PlatformConfig[] = [
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "广告中心",
|
||||||
|
uniqueKey: "adscenter",
|
||||||
|
url: "https://seller.shopee.com.my/portal/marketing/pas/index",
|
||||||
|
checkSelector: '.page-container',
|
||||||
|
fields: [
|
||||||
|
{
|
||||||
|
label: "我的账户",
|
||||||
|
className: ".my-account-wrap",
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "广告余额",
|
||||||
|
className: ".credit-expense-label-wrapper:nth-child(1) .ellipsis-content"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "今日广告花费",
|
||||||
|
className: ".credit-expense-label-wrapper:nth-child(2) .ellipsis-content"
|
||||||
|
},
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "进行中广告列表",
|
||||||
|
className: ".eds-table__body-container",
|
||||||
|
type: 2,
|
||||||
|
tableParts: [
|
||||||
|
{ name: "fixed", select: ".eds-table__fix-body" },
|
||||||
|
{ name: "main", select: ".eds-table__main-body" }
|
||||||
|
],
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "广告信息",
|
||||||
|
className: ".info-containter",
|
||||||
|
part: "fixed",
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "广告名称",
|
||||||
|
className: ".campaign-name-container"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "广告类型",
|
||||||
|
className: ".gmv-max-noti"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "结束时间",
|
||||||
|
className: ".time-edit-wrapper"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "每日预算",
|
||||||
|
part: "main",
|
||||||
|
className: "td:nth-child(1)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "目标ROAS",
|
||||||
|
part: "main",
|
||||||
|
className: "td:nth-child(2)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "花费",
|
||||||
|
part: "main",
|
||||||
|
className: "td:nth-child(4)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "销售额",
|
||||||
|
part: "main",
|
||||||
|
className: "td:nth-child(5)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "广告支出回报率",
|
||||||
|
part: "main",
|
||||||
|
className: "td:nth-child(6)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
pagination: {
|
||||||
|
nextBtn: ".eds-pager__button-next", // 下一页按钮
|
||||||
|
disabledClass: ".eds-button--disabled", // 按钮禁用时的class(用来判断结束)
|
||||||
|
maxPage: 1, // 最大爬取页数
|
||||||
|
delay: 2000 // 翻页后的等待加载时间
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "评论管理",
|
||||||
|
uniqueKey: "message",
|
||||||
|
url: "https://seller.shopee.com.my/portal/settings/shop/rating",
|
||||||
|
checkSelector: '.page-container',
|
||||||
|
fields: [
|
||||||
|
{
|
||||||
|
label: "低星评论",
|
||||||
|
className: ".border-solid.rounded",
|
||||||
|
condition: {
|
||||||
|
list: [
|
||||||
|
".flex.items-center.mt-6 div:nth-child(3)",
|
||||||
|
".eds-react-checkbox-group label:nth-child(2)",
|
||||||
|
".eds-react-checkbox-group label:nth-child(3)",
|
||||||
|
".eds-react-checkbox-group label:nth-child(4)"
|
||||||
|
],
|
||||||
|
time: 200,
|
||||||
|
},
|
||||||
|
type: 1,
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "用户",
|
||||||
|
className: ".flex.items-center.justify-start .ml-2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "订单编号",
|
||||||
|
className: ".underline.px-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "商品名称",
|
||||||
|
className: ".min-w-0.font-medium.break-all"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "规格",
|
||||||
|
className: ".min-w-0.font-medium.break-all + div"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "评价内容",
|
||||||
|
className: ".min-w-0.overflow-hidden",
|
||||||
|
condition: {
|
||||||
|
list: [
|
||||||
|
"span.cursor-pointer"
|
||||||
|
],
|
||||||
|
time: 200,
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
],
|
||||||
|
pagination: {
|
||||||
|
nextBtn: ".eds-react-pagination-pager__button-next",
|
||||||
|
maxPage: 2, // 最大爬取页数
|
||||||
|
delay: 2000 // 翻页后的等待加载时间
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "账户健康状态",
|
||||||
|
uniqueKey: "accounthealth",
|
||||||
|
url: "https://seller.shopee.com.my/portal/accounthealth/home",
|
||||||
|
checkSelector: '.page-container',
|
||||||
|
fields: [
|
||||||
|
{
|
||||||
|
label: "健康状态",
|
||||||
|
className: ".metric-content",
|
||||||
|
type: 1,
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "模块名",
|
||||||
|
className: ".metric-type"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "值",
|
||||||
|
className: ".metric-item",
|
||||||
|
type: 1,
|
||||||
|
keys: [
|
||||||
|
{
|
||||||
|
label: "指标",
|
||||||
|
className: "p.metric-text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "值",
|
||||||
|
className: ".metric-my"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "目标",
|
||||||
|
className: ".metric-target"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "使用类型",
|
||||||
|
className: ".metric-applied-to"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ const isPanelOpen = ref(false);
|
|||||||
let timer: number | undefined;
|
let timer: number | undefined;
|
||||||
|
|
||||||
// 只有任务处于运行中时,才在网页右下角展示计时按钮。
|
// 只有任务处于运行中时,才在网页右下角展示计时按钮。
|
||||||
const isVisible = computed(() => crawlState.value?.status === 'running');
|
const isVisible = computed(() => crawlState.value ? ['running', 'paused'].includes(crawlState.value.status) : false);
|
||||||
|
|
||||||
// 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。
|
// 内容脚本挂载后立即同步一次状态,并开始每秒刷新计时和任务进度。
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
@@ -85,6 +85,14 @@ function getStepText(status: string): string {
|
|||||||
return textMap[status] ?? status;
|
return textMap[status] ?? status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 请求 background 继续暂停中的爬取任务。
|
||||||
|
*/
|
||||||
|
async function handleResumeCrawl() {
|
||||||
|
await sendBackgroundMessage({ action: 'RESUME_CRAWL' });
|
||||||
|
await refreshCrawlState();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 发送消息到 background;非扩展环境下返回空成功响应,方便本地页面不报错。
|
* 发送消息到 background;非扩展环境下返回空成功响应,方便本地页面不报错。
|
||||||
*/
|
*/
|
||||||
@@ -114,15 +122,20 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
|||||||
<ol class="dianshan-crawl-timeline">
|
<ol class="dianshan-crawl-timeline">
|
||||||
<li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`">
|
<li v-for="(step, index) in crawlState.steps" :key="step.uniqueKey" :class="`is-${step.status}`">
|
||||||
<span class="dianshan-crawl-dot"></span>
|
<span class="dianshan-crawl-dot"></span>
|
||||||
<div class="dianshan-crawl-step">
|
<div class="dianshan-crawl-step">
|
||||||
<strong>{{ index + 1 }}. {{ step.name }}</strong>
|
<strong>{{ index + 1 }}. {{ step.name }}</strong>
|
||||||
<em>{{ getStepText(step.status) }}</em>
|
<em>{{ getStepText(step.status) }}</em>
|
||||||
<small v-if="step.message">{{ step.message }}</small>
|
<small v-if="step.message">{{ step.message }}</small>
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
</ol>
|
</ol>
|
||||||
</section>
|
|
||||||
</div>
|
<div v-if="crawlState.status === 'paused' && crawlState.pause" class="dianshan-crawl-pause">
|
||||||
|
<p>{{ crawlState.pause.message }}</p>
|
||||||
|
<button type="button" @click="handleResumeCrawl">我已处理,继续</button>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
@@ -223,6 +236,35 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
|||||||
color: #b91c1c;
|
color: #b91c1c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.dianshan-crawl-pause {
|
||||||
|
display: grid;
|
||||||
|
gap: 8px;
|
||||||
|
margin-top: 12px;
|
||||||
|
padding: 10px;
|
||||||
|
border: 1px solid #f59e0b;
|
||||||
|
border-radius: 8px;
|
||||||
|
background: #fffbeb;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dianshan-crawl-pause p {
|
||||||
|
margin: 0;
|
||||||
|
color: #92400e;
|
||||||
|
font-size: 12px;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dianshan-crawl-pause button {
|
||||||
|
width: 100%;
|
||||||
|
border: 0;
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 8px 10px;
|
||||||
|
color: #ffffff;
|
||||||
|
background: #059669;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 700;
|
||||||
|
}
|
||||||
|
|
||||||
.is-running .dianshan-crawl-dot,
|
.is-running .dianshan-crawl-dot,
|
||||||
.is-success .dianshan-crawl-dot {
|
.is-success .dianshan-crawl-dot {
|
||||||
background: #10b981;
|
background: #10b981;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { createApp } from 'vue';
|
import { createApp } from 'vue';
|
||||||
import App from './App.vue';
|
import App from './App.vue';
|
||||||
|
import { setupPageRunner } from './pageRunner';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 将内容脚本应用挂载到页面中。
|
* 将内容脚本应用挂载到页面中。
|
||||||
@@ -9,17 +10,15 @@ function mountApp() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 内容脚本在宿主页面中的根容器。
|
|
||||||
// 用于避免污染业务页面结构。
|
|
||||||
const container = document.createElement('div');
|
const container = document.createElement('div');
|
||||||
container.id = 'dianshan-crx-root';
|
container.id = 'dianshan-crx-root';
|
||||||
// Vue 应用实际挂载的节点。
|
|
||||||
const appRoot = document.createElement('div');
|
const appRoot = document.createElement('div');
|
||||||
|
|
||||||
container.appendChild(appRoot);
|
container.appendChild(appRoot);
|
||||||
document.body.appendChild(container);
|
document.body.appendChild(container);
|
||||||
|
|
||||||
createApp(App).mount(appRoot);
|
createApp(App).mount(appRoot);
|
||||||
|
setupPageRunner();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (document.readyState === 'loading') {
|
if (document.readyState === 'loading') {
|
||||||
|
|||||||
207
src/content/pageRunner.ts
Normal file
207
src/content/pageRunner.ts
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
import { processFields, type DomScrapeResult } from '@/background/domScraper';
|
||||||
|
import type { CrawlPauseInfo, PlatformFieldConfig } from '@/types';
|
||||||
|
|
||||||
|
interface ScrapeStepMessage {
|
||||||
|
action: 'SCRAPE_STEP';
|
||||||
|
payload: {
|
||||||
|
fields: PlatformFieldConfig[];
|
||||||
|
checkSelector: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CheckInterruptMessage {
|
||||||
|
action: 'CHECK_INTERRUPT';
|
||||||
|
}
|
||||||
|
|
||||||
|
type PageRunnerMessage = ScrapeStepMessage | CheckInterruptMessage;
|
||||||
|
|
||||||
|
interface PageRunnerResponse {
|
||||||
|
ok: boolean;
|
||||||
|
data?: DomScrapeResult | null;
|
||||||
|
interrupt?: CrawlPauseInfo;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注册页面执行器,供 background 在目标网页中触发中断检测和 DOM 抓取。
|
||||||
|
*/
|
||||||
|
export function setupPageRunner(): void {
|
||||||
|
chrome.runtime.onMessage.addListener((message: PageRunnerMessage, _sender, sendResponse) => {
|
||||||
|
void handlePageRunnerMessage(message).then(sendResponse);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理 background 发来的页面执行消息。
|
||||||
|
*/
|
||||||
|
async function handlePageRunnerMessage(message: PageRunnerMessage): Promise<PageRunnerResponse> {
|
||||||
|
if (message.action === 'CHECK_INTERRUPT') {
|
||||||
|
return { ok: true, interrupt: detectPageInterrupt() };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.action === 'SCRAPE_STEP') {
|
||||||
|
const interrupt = detectPageInterrupt();
|
||||||
|
|
||||||
|
if (interrupt) {
|
||||||
|
return { ok: false, interrupt };
|
||||||
|
}
|
||||||
|
|
||||||
|
const readyElement = await waitForStableSelector(message.payload.checkSelector, 18000);
|
||||||
|
|
||||||
|
if (!readyElement) {
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
interrupt: {
|
||||||
|
reason: 'page_not_ready',
|
||||||
|
message: '页面关键内容暂未加载,请确认页面是否正常显示后继续',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await processFields(message.payload.fields, document.body);
|
||||||
|
return { ok: true, data };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ok: false, error: '未知页面执行指令' };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 检测当前页面是否需要用户手动处理登录、验证码或页面不存在。
|
||||||
|
*/
|
||||||
|
function detectPageInterrupt(): CrawlPauseInfo | undefined {
|
||||||
|
if (isShieldPage()) {
|
||||||
|
return {
|
||||||
|
reason: 'shield',
|
||||||
|
message: '检测到验证码或风控验证,请在打开的商家后台窗口处理完成后继续',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLoginPage()) {
|
||||||
|
return {
|
||||||
|
reason: 'reauth',
|
||||||
|
message: '检测到需要重新登录,请在打开的商家后台窗口登录完成后继续',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isNotFoundPage()) {
|
||||||
|
return {
|
||||||
|
reason: 'not_found',
|
||||||
|
message: '当前页面不存在或已失效,请确认平台配置里的页面地址是否正确',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断是否进入验证码、流量盾或风控验证页。
|
||||||
|
*/
|
||||||
|
function isShieldPage(): boolean {
|
||||||
|
const path = location.pathname.toLowerCase();
|
||||||
|
|
||||||
|
if (path.startsWith('/verify/captcha') || path.startsWith('/verify/traffic')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const shieldElement = document.querySelector(
|
||||||
|
'[data-name="verification"], .ant-captcha, #captchaContainer, [class*="captcha" i], [id*="captcha" i]',
|
||||||
|
);
|
||||||
|
|
||||||
|
return shieldElement ? isVisibleElement(shieldElement) : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断当前页面是否需要登录或二次验证密码。
|
||||||
|
*/
|
||||||
|
function isLoginPage(): boolean {
|
||||||
|
const path = location.pathname.toLowerCase();
|
||||||
|
|
||||||
|
if (
|
||||||
|
/^\/(?:buyer\/)?login\b/i.test(path) ||
|
||||||
|
/^\/account\/(?:signin|login)\b/i.test(path) ||
|
||||||
|
/^\/portal\/login\b/i.test(path)
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const visiblePasswordInput = Array.from(document.querySelectorAll('input[type="password"]')).some(isVisibleElement);
|
||||||
|
|
||||||
|
if (visiblePasswordInput) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bodyText = document.body.innerText.slice(0, 3000);
|
||||||
|
const loginTextPatterns = [
|
||||||
|
/enter\s+(your\s+)?password\s+to\s+continue/i,
|
||||||
|
/sign\s+in\s+(again\s+)?to\s+continue/i,
|
||||||
|
/please\s+(re-?)?enter\s+(your\s+)?password/i,
|
||||||
|
/请(再次|重新)?输入(您的)?密码/,
|
||||||
|
/请登录|重新登录|登录后继续/,
|
||||||
|
];
|
||||||
|
|
||||||
|
return loginTextPatterns.some((pattern) => pattern.test(bodyText));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断当前页面是否是不存在、下架或错误页面。
|
||||||
|
*/
|
||||||
|
function isNotFoundPage(): boolean {
|
||||||
|
const text = document.body.innerText.slice(0, 8000);
|
||||||
|
const title = document.title;
|
||||||
|
const notFoundPatterns = [
|
||||||
|
/page\s+not\s+found/i,
|
||||||
|
/the\s+page\s+you\s+are\s+looking\s+for/i,
|
||||||
|
/this\s+page\s+(has\s+been\s+)?removed/i,
|
||||||
|
/product\s+(is\s+)?unavailable/i,
|
||||||
|
/页面不存在|找不到(此|该)?页面|抱歉.*不存在|(商品|产品)已下架/,
|
||||||
|
];
|
||||||
|
|
||||||
|
return notFoundPatterns.some((pattern) => pattern.test(title) || pattern.test(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 等待页面中出现稳定的关键元素。
|
||||||
|
*/
|
||||||
|
async function waitForStableSelector(selector: string, timeoutMs: number): Promise<Element | null> {
|
||||||
|
const deadline = Date.now() + timeoutMs;
|
||||||
|
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
const element = document.querySelector(selector);
|
||||||
|
|
||||||
|
if (element && isVisibleElement(element)) {
|
||||||
|
await sleep(600);
|
||||||
|
const stableElement = document.querySelector(selector);
|
||||||
|
return stableElement && isVisibleElement(stableElement) ? stableElement : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断元素是否真实可见。
|
||||||
|
*/
|
||||||
|
function isVisibleElement(element: Element): boolean {
|
||||||
|
if (!element.isConnected) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const style = element.ownerDocument.defaultView?.getComputedStyle(element);
|
||||||
|
|
||||||
|
if (!style || style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity) < 0.05) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rect = element.getBoundingClientRect();
|
||||||
|
return rect.width > 0 && rect.height > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 简单等待工具。
|
||||||
|
*/
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
window.setTimeout(resolve, ms);
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -18,7 +18,9 @@ const selectedPlatform = computed(() =>
|
|||||||
);
|
);
|
||||||
|
|
||||||
const isLoggedIn = computed(() => token.value !== null);
|
const isLoggedIn = computed(() => token.value !== null);
|
||||||
const isCrawling = computed(() => crawlState.value?.status === 'running');
|
const shouldShowCrawlProgress = computed(() =>
|
||||||
|
crawlState.value ? ['running', 'paused', 'completed', 'failed'].includes(crawlState.value.status) : false,
|
||||||
|
);
|
||||||
|
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
token.value = await getToken();
|
token.value = await getToken();
|
||||||
@@ -82,6 +84,11 @@ async function handleCancelCrawl() {
|
|||||||
crawlState.value = response.data ?? null;
|
crawlState.value = response.data ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function handleResumeCrawl() {
|
||||||
|
const response = await sendBackgroundMessage<CrawlTaskState>({ action: 'RESUME_CRAWL' });
|
||||||
|
crawlState.value = response.data ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
async function refreshCrawlState() {
|
async function refreshCrawlState() {
|
||||||
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' });
|
const response = await sendBackgroundMessage<CrawlTaskState | null>({ action: 'GET_CRAWL_STATE' });
|
||||||
|
|
||||||
@@ -162,17 +169,31 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
|||||||
</button>
|
</button>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<template v-else-if="isCrawling && crawlState">
|
<template v-else-if="shouldShowCrawlProgress && crawlState">
|
||||||
<section class="space-y-4">
|
<section class="space-y-4">
|
||||||
<div class="flex items-center justify-between rounded-md bg-white px-3 py-2 shadow-sm">
|
<div class="flex items-center justify-between rounded-md bg-white px-3 py-2 shadow-sm">
|
||||||
<div>
|
<div>
|
||||||
<p class="text-sm font-medium text-slate-800">{{ crawlState.platformName }}</p>
|
<p class="text-sm font-medium text-slate-800">{{ crawlState.platformName }}</p>
|
||||||
<p class="text-xs text-slate-500">已运行 {{ formatElapsed(elapsedSeconds) }}</p>
|
<p class="text-xs text-slate-500">
|
||||||
|
{{ crawlState.status === 'paused' ? '已暂停' : '已运行 ' + formatElapsed(elapsedSeconds) }}
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<button type="button" class="text-xs text-red-600 transition hover:text-red-700"
|
<div class="flex items-center gap-2">
|
||||||
@click="handleCancelCrawl">
|
<button v-if="crawlState.status === 'paused'" type="button"
|
||||||
取消
|
class="text-xs text-emerald-600 transition hover:text-emerald-700"
|
||||||
</button>
|
@click="handleResumeCrawl">
|
||||||
|
继续
|
||||||
|
</button>
|
||||||
|
<button type="button" class="text-xs text-red-600 transition hover:text-red-700"
|
||||||
|
@click="handleCancelCrawl">
|
||||||
|
取消
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="crawlState.status === 'paused' && crawlState.pause"
|
||||||
|
class="rounded-md border border-amber-200 bg-amber-50 px-3 py-2 text-sm text-amber-800">
|
||||||
|
{{ crawlState.pause.message }}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<ol class="space-y-3">
|
<ol class="space-y-3">
|
||||||
@@ -187,6 +208,8 @@ function sendBackgroundMessage<T>(message: unknown): Promise<{ ok: boolean; data
|
|||||||
<span class="text-xs">{{ getStepText(step.status) }}</span>
|
<span class="text-xs">{{ getStepText(step.status) }}</span>
|
||||||
</div>
|
</div>
|
||||||
<p v-if="step.message" class="mt-1 text-xs">{{ step.message }}</p>
|
<p v-if="step.message" class="mt-1 text-xs">{{ step.message }}</p>
|
||||||
|
<pre v-if="step.result !== undefined"
|
||||||
|
class="mt-2 max-h-32 overflow-auto rounded bg-slate-950 p-2 text-[11px] leading-4 text-slate-100">{{ JSON.stringify(step.result, null, 2) }}</pre>
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
</ol>
|
</ol>
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
export type CrawlStepStatus = 'pending' | 'running' | 'success' | 'failed';
|
export type CrawlStepStatus = 'pending' | 'running' | 'success' | 'failed';
|
||||||
|
|
||||||
// 整体爬取任务状态。
|
// 整体爬取任务状态。
|
||||||
export type CrawlTaskStatus = 'running' | 'completed' | 'failed' | 'canceled';
|
export type CrawlTaskStatus = 'running' | 'paused' | 'completed' | 'failed' | 'canceled';
|
||||||
|
|
||||||
// 时间轴中的单个爬取步骤进度。
|
// 时间轴中的单个爬取步骤进度。
|
||||||
export interface CrawlProgressStep {
|
export interface CrawlProgressStep {
|
||||||
@@ -14,6 +14,16 @@ export interface CrawlProgressStep {
|
|||||||
status: CrawlStepStatus;
|
status: CrawlStepStatus;
|
||||||
// 状态补充说明,如失败原因。
|
// 状态补充说明,如失败原因。
|
||||||
message?: string;
|
message?: string;
|
||||||
|
// 当前步骤抓取到的数据结果。
|
||||||
|
result?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 爬取暂停原因,通常由登录、验证码或页面不存在触发。
|
||||||
|
export interface CrawlPauseInfo {
|
||||||
|
// 暂停原因编码。
|
||||||
|
reason: 'reauth' | 'shield' | 'not_found' | 'page_not_ready';
|
||||||
|
// 展示给用户看的处理提示。
|
||||||
|
message: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 当前正在执行的爬取任务快照,供 popup 和 content script 同步展示。
|
// 当前正在执行的爬取任务快照,供 popup 和 content script 同步展示。
|
||||||
@@ -30,6 +40,8 @@ export interface CrawlTaskState {
|
|||||||
startedAt: number;
|
startedAt: number;
|
||||||
// 当前任务状态。
|
// 当前任务状态。
|
||||||
status: CrawlTaskStatus;
|
status: CrawlTaskStatus;
|
||||||
|
// 暂停信息;仅 status 为 paused 时存在。
|
||||||
|
pause?: CrawlPauseInfo;
|
||||||
// 当前执行到的步骤下标。
|
// 当前执行到的步骤下标。
|
||||||
currentStepIndex: number;
|
currentStepIndex: number;
|
||||||
// 平台 steps 映射出的时间轴进度。
|
// 平台 steps 映射出的时间轴进度。
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
export type {
|
export type {
|
||||||
|
CrawlPauseInfo,
|
||||||
CrawlProgressStep,
|
CrawlProgressStep,
|
||||||
CrawlStepStatus,
|
CrawlStepStatus,
|
||||||
CrawlTaskState,
|
CrawlTaskState,
|
||||||
|
|||||||
@@ -23,18 +23,8 @@ export interface PlatformPaginationConfig {
|
|||||||
|
|
||||||
// 表格分段配置,用于兼容一个数据块由多个 table 或多个 table 片段组成的情况。
|
// 表格分段配置,用于兼容一个数据块由多个 table 或多个 table 片段组成的情况。
|
||||||
export interface PlatformTablePartConfig {
|
export interface PlatformTablePartConfig {
|
||||||
// 当前 table 或表格片段的名称。
|
|
||||||
label: string;
|
|
||||||
// 当前 table 或表格片段的兼容名称,兼容 message.js 中的 name 写法。
|
|
||||||
name?: string;
|
name?: string;
|
||||||
// 当前 table 或表格片段的 CSS 选择器。
|
|
||||||
className: string;
|
|
||||||
// 当前 table 或表格片段的兼容选择器,兼容 message.js 中的 select 写法。
|
|
||||||
select?: string;
|
select?: string;
|
||||||
// 行元素选择器,不填时由采集逻辑使用默认行选择器。
|
|
||||||
rowSelector?: string;
|
|
||||||
// 当前 table 或表格片段下需要采集的字段。
|
|
||||||
keys?: PlatformFieldConfig[];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 页面字段配置,描述一个普通元素、列表元素或表格元素如何从 DOM 中提取数据。
|
// 页面字段配置,描述一个普通元素、列表元素或表格元素如何从 DOM 中提取数据。
|
||||||
|
|||||||
52
step.md
52
step.md
@@ -1,52 +0,0 @@
|
|||||||
# 项目结构
|
|
||||||
```angular2html
|
|
||||||
src:.
|
|
||||||
├─assets # 静态资源目录
|
|
||||||
│ vite.svg # 这里的资源通常用于图标、Logo 或扩展程序内部引用的图片
|
|
||||||
│
|
|
||||||
├─background # 后台脚本 (Background Script / Service Worker)
|
|
||||||
│ index.ts # 扩展的“大脑”,常驻后台运行,处理事件监听、报文转发、存储管理等
|
|
||||||
│
|
|
||||||
├─config # 配置目录
|
|
||||||
│ platforms.ts # 自定义配置,各种平台(如不同网站、不同浏览器)的适配配置
|
|
||||||
│
|
|
||||||
├─content # 内容脚本 (Content Script)
|
|
||||||
│ │ App.vue # 注入到网页中的 UI 组件(通常用于在目标页面侧边栏或浮窗显示界面)
|
|
||||||
│ │ main.ts # 内容脚本的入口文件,负责将 Vue 组件挂载到宿主页面的 DOM 中
|
|
||||||
│ │
|
|
||||||
│ └─views # 内容脚本相关的子视图或组件
|
|
||||||
│
|
|
||||||
├─options # 选项页 (Options Page)
|
|
||||||
│ App.vue # 扩展设置页面的 UI(右键扩展图标点击“选项”打开的页面)
|
|
||||||
│ index.html # 选项页的 HTML 宿主文件
|
|
||||||
│ main.ts # 选项页的 Vue 入口文件
|
|
||||||
│
|
|
||||||
├─popup # 弹窗页 (Popup Page)
|
|
||||||
│ App.vue # 点击扩展图标时显示的弹出框 UI
|
|
||||||
│ index.html # 弹窗页的 HTML 宿主文件
|
|
||||||
│ main.ts # 弹窗页的 Vue 入口文件
|
|
||||||
│
|
|
||||||
├─shared # 共享代码库 (Shared)
|
|
||||||
│ # 存放被 background、content、popup 等多个模块共同引用的工具函数、常量、API封装等
|
|
||||||
│
|
|
||||||
└─types # 类型定义目录
|
|
||||||
index.ts # 存放全局的 TypeScript 接口(Interface)和类型(Type)定义
|
|
||||||
```
|
|
||||||
|
|
||||||
# 开发步骤
|
|
||||||
1.在popup模块中的App.vue中用tailwindcss编写,点击扩展图标时出现的弹窗,逻辑如下
|
|
||||||
- 在未登录情况下,即storage中token字段是否存在,如果不存在,弹窗内容只用显示扩展名字、描述、请登录按钮,底部扩展版本
|
|
||||||
- 当点击登录按钮后,先模拟登录,写死token,之后ui如下
|
|
||||||
- 显示扩展名字、描述、一个平台选择框(通过读取config/platforms.ts)的内容for循环显示平台、扫描按钮、最底部Row(退出按钮,扩展版本号)
|
|
||||||
- 注意:token的存储和获取逻辑放到/shared/auth.ts中去,如果涉及到接口和枚举的定义,请判断是否是全局类型
|
|
||||||
- 如果是,该类型写到一个新文件中,并放到types/下,如果不是,放到当前模块的types/目录下(如果没用,新建)
|
|
||||||
|
|
||||||
2.前提:当1完成后,点击popup的立即爬取已经可以打开一个新的窗口了
|
|
||||||
- 在所有网页(包括新打开的窗口和所有网页)的右下角都放一个圆形正计时(表示正在爬取中)
|
|
||||||
- 点击圆形正计时时,出现一个popup,内容如下
|
|
||||||
- 以时间轴的形式,表示当前爬取进度,即:根据platforms.ts中的steps
|
|
||||||
- 同时点击扩展的popup里的内容,也变得和上面的时间轴内容一致,显示爬取进度,隐藏立即爬取等按钮,
|
|
||||||
|
|
||||||
3.前提:1和2都已完成,ui和交互操作上ok
|
|
||||||
- 开始爬取网页中的数据,查看message.js内容,吧里面的爬取方法都提取出来放到background/domScraper.ts中去,
|
|
||||||
- 基于2,每次根据steps打开一个新网页后,根据它的fields数组字段,调用domScraper中的方法,来提取数据,并打印到控制台即可
|
|
||||||
@@ -1 +1 @@
|
|||||||
{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawltask.ts","./src/background/service/lifecycle.ts","./src/background/service/taskstate.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts"],"version":"5.9.3"}
|
{"root":["./manifest.config.ts","./message.js","./vite.config.ts","./src/background/domscraper.ts","./src/background/index.ts","./src/background/service.ts","./src/background/types.ts","./src/background/service/crawltask.ts","./src/background/service/lifecycle.ts","./src/background/service/taskstate.ts","./src/config/platforms.ts","./src/content/app.vue","./src/content/main.ts","./src/content/pagerunner.ts","./src/options/app.vue","./src/options/main.ts","./src/popup/app.vue","./src/popup/main.ts","./src/shared/auth.ts","./src/types/crawl.ts","./src/types/index.ts","./src/types/platform.ts","./storeai-extension-v0.1.0/service-worker-loader.js","./storeai-extension-v0.1.0/assets/config-cf-xklo9.js","./storeai-extension-v0.1.0/assets/fetch-hook.ts-bvrghr__.js","./storeai-extension-v0.1.0/assets/index-dxg1qimp.js","./storeai-extension-v0.1.0/assets/index.ts-dirvxn_b.js","./storeai-extension-v0.1.0/assets/orchestrator.ts-bleul1fk.js","./storeai-extension-v0.1.0/assets/orchestrator.ts-loader-drev6v6h.js","./storeai-extension-v0.1.0/assets/popup-dbgvbs2c.js","./storeai-extension-v0.1.0/assets/selectors-xrdds_u0.js"],"version":"5.9.3"}
|
||||||
Reference in New Issue
Block a user