From 7e2a83efd6b7410a95601bbd9c525d5db4fc36e8 Mon Sep 17 00:00:00 2001 From: zhu <1812073942@qq.com> Date: Wed, 6 May 2026 15:34:26 +0800 Subject: [PATCH] 1 --- src/background/domScraper.ts | 102 ++++++++++++++--------------------- 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/src/background/domScraper.ts b/src/background/domScraper.ts index 182423e..985fc9e 100644 --- a/src/background/domScraper.ts +++ b/src/background/domScraper.ts @@ -1,23 +1,4 @@ -import type { PlatformFieldConfig } from '@/types'; - -// DOM 抓取后的通用结果结构。 -export type DomScrapeResult = Record; - - -/** - * 在目标网页上下文中执行 DOM 抓取。 - * - * 注意:该方法会通过 chrome.scripting.executeScript 注入到页面中执行, - * 所以依赖的辅助方法都写在函数内部,避免注入后丢失模块作用域。 - */ -export async function scrapeDomFields(fields: PlatformFieldConfig[]): Promise { - if (!document.body) { - return null; - } - - return processFields(fields, document.body); -} - +import type {PlatformFieldConfig} from '@/types'; // 睡眠工具,给点击、翻页、异步渲染留出等待时间。 const sleep = (ms?: number) => new Promise((resolve) => window.setTimeout(resolve, ms ?? 1500)); @@ -26,12 +7,11 @@ const sleep = (ms?: number) => new Promise((resolve) => window.setTimeout(resolv * 从元素中提取实际值,默认取文本,也支持 attr、图片 src、链接 href。 */ function extractValue(el: Element | null, config: PlatformFieldConfig): string | null { - if (!el) { - return null; + if (el == null) { + return "未找到" } - if (config.attr) { - return (el.getAttribute(config.attr) || '').trim(); + return (el.getAttribute(config.attr) || "").trim(); } const tagName = el.tagName.toUpperCase(); @@ -49,19 +29,18 @@ function extractValue(el: Element | null, config: PlatformFieldConfig): string | } /** + * 自动点击 * 根据字段 condition 配置在指定 DOM 范围内自动点击目标元素。 */ -async function autoClick(config: PlatformFieldConfig, rootDom: ParentNode): Promise { +async function autoClick(config: PlatformFieldConfig, rootDom: Element): Promise { if (!config.condition) { return; } - - for (const selector of config.condition.list) { - const targets = Array.from(rootDom.querySelectorAll(selector)); - + for (const condition of config.condition.list) { + let targets: HTMLElement[] = Array.from(rootDom.querySelectorAll(condition)) for (const target of targets) { target.click(); - await sleep(config.condition.time); + await sleep(config?.condition.time); } } } @@ -69,8 +48,8 @@ async function autoClick(config: PlatformFieldConfig, rootDom: ParentNode): Prom /** * 递归处理字段配置,支持普通字段、嵌套 row、列表和表格。 */ -export async function processFields(columns: PlatformFieldConfig[], rootDom: ParentNode): Promise { - const result: DomScrapeResult = {}; +export async function processFields(columns: PlatformFieldConfig[], rootDom: Element) { + const result = {} as any; for (const item of columns) { await autoClick(item, rootDom); @@ -107,10 +86,12 @@ export async function processFields(columns: PlatformFieldConfig[], rootDom: Par } /** - * 按列表配置抓取所有列表项,并按分页配置继续翻页。 + * 提取列表的数据 + * @param config 配置 + * @param rootDom 父节点 */ -async function processList(config: PlatformFieldConfig, rootDom: ParentNode): Promise { - const allList: DomScrapeResult[] = []; +async function processList(config: PlatformFieldConfig, rootDom: ParentNode) { + const allList = []; let pageCount = 0; while (true) { @@ -159,54 +140,49 @@ async function processList(config: PlatformFieldConfig, rootDom: ParentNode): Pr /** * 按表格配置抓取表格行数据,并按分页配置继续翻页。 */ -async function processTable(config: PlatformFieldConfig, rootDom: ParentNode): Promise { - const allTableData: DomScrapeResult[] = []; +async function processTable(config: PlatformFieldConfig, rootDom: ParentNode) { + const allTableData: any[] = []; let pageCount = 0; while (true) { pageCount += 1; - const partsNodes: Record = {}; + const partsNodes: any = {}; - for (const part of config.tableParts ?? []) { - const partKey = part.name ?? part.label; - const partSelector = part.select ?? part.className; - const rowSelector = part.rowSelector ?? `${partSelector} tr`; - partsNodes[partKey] = Array.from(rootDom.querySelectorAll(rowSelector)); - } + config.tableParts!.forEach(part => { + partsNodes[part.name as any] = rootDom.querySelectorAll(`${part.select} tr`); + }); - const firstPart = config.tableParts?.[0]; - const firstPartKey = firstPart ? firstPart.name ?? firstPart.label : ''; - const rowCount = partsNodes[firstPartKey]?.length || 0; + // //以第一个part的行数为准,进行横向扫描 + const rowCount = partsNodes[config.tableParts![0].name!]?.length || 0 - for (let index = 0; index < rowCount; index += 1) { - const rowData: DomScrapeResult = {}; - for (const keyItem of config.keys ?? []) { - const partKey = keyItem.part ?? firstPartKey; - const targetRowNode = partsNodes[partKey]?.[index]; + for (let i = 0; i < rowCount; i++) { + const rowData: any = {}; - if (!targetRowNode) { - continue; - } + //遍历keys,根据part映射,取对应的里面找 + for (const keyItem of config.keys!) { + const targetRowNode = partsNodes[keyItem.part!][i]; - if (keyItem.keys) { - rowData[keyItem.label] = await processFields(keyItem.keys, targetRowNode); - } else { - rowData[keyItem.label] = extractValue(targetRowNode.querySelector(keyItem.className), keyItem); + if (targetRowNode) { + //提取值 + if (keyItem.keys) { + rowData[keyItem.label] = await processFields(keyItem.keys, targetRowNode) + } else { + rowData[keyItem.label] = extractValue(targetRowNode.querySelector(keyItem.className), keyItem); + } } } - allTableData.push(rowData); } if (!config.pagination) { - console.log('未配置分页信息,抓取单页后结束。'); + console.log("未配置分页信息,抓取单页后结束。"); break; } if (config.pagination.maxPage && pageCount >= config.pagination.maxPage) { - console.log('已达到配置的最大页数,停止。'); + console.log("已达到配置的最大页数,停止。"); break; } @@ -231,4 +207,4 @@ async function processTable(config: PlatformFieldConfig, rootDom: ParentNode): P } return allTableData; -} +} \ No newline at end of file