Files
store_ai_extension/message.js
2026-04-30 10:55:03 +08:00

269 lines
8.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* type0普通元素(默认1列表2表格带分页
* condition:{
* list:[] 点击条件
* time:2000 点击后的等待世界
* }
* keys子元素如果type是0则是普通的键值否则是数组键值
* tableParts表格专用兼容多table或分段table的情况
* pagination分页配置
*/
/**
* 数据类型
* 1. 纯文字或图片
* 2. 列表类型
* 3.row布局下的子元素综合1和2的
* 4. 列表
*/
(async function () {
let column = [
{
label: "低星评论",
className: ".border-solid.rounded",
condition: {
list: [
".flex.items-center.mt-6 div:nth-child(3)",
".eds-react-checkbox-group label:nth-child(2)",
".eds-react-checkbox-group label:nth-child(3)",
".eds-react-checkbox-group label:nth-child(4)"
],
time: 200,
},
type: 1,
keys: [
{
label: "用户",
className: ".flex.items-center.justify-start .ml-2"
},
{
label: "订单编号",
className: ".underline.px-1"
},
{
label: "商品名称",
className: ".min-w-0.font-medium.break-all"
},
{
label: "规格",
className: ".min-w-0.font-medium.break-all + div"
},
{
label: "评价内容",
className: ".min-w-0.overflow-hidden",
condition: {
list: [
"span.cursor-pointer"
],
time: 200,
},
},
],
pagination: {
nextBtn: ".eds-react-pagination-pager__button-next",
maxPage: 2, // 最大爬取页数
delay: 2000 // 翻页后的等待加载时间
},
},
]
//自定义睡眠
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms || 1500));
/**
* 递归结构
* @param {*列表} columns
* @param {* 父dom节点} dom
* @returns
*/
async function process(columns, dom) {
if (!dom) return null;
let result = {}
for (const item of columns) {
//判断条件,如果存在执行点击
await autoClick(item, dom)
const element = dom.querySelector(item.className);
//如果不存在
if (!element) {
result[item.label] = "没找到该元素"
continue;
}
//如果是普通元素
if (!item.type) {
//如果是row布局
if (item.keys && item.keys.length > 0) {
await autoClick(item, element)
result[item.label] = await process(item.keys, element);
} else {
await autoClick(item, element)
//正常取值
result[item.label] = extractValue(element, item);
}
} else if (item.type == 1) {
result[item.label] = await processList(item, dom)
} else if (item.type == 2) {
result[item.label] = await processTable(item, element)
}
}
return result
}
/**
* 触发点击事件
*/
async function autoClick(config, rootDom) {
if (config?.condition) {
for (const condition of config.condition.list) {
let targets = rootDom.querySelectorAll(condition)
for (const target of targets) {
target.click();
await sleep(config?.condition.time);
}
}
}
}
/**
* 提取具体值的辅助函数
*/
function extractValue(el, config) {
// 如果指定提取某个属性(如 class, href, src, data-v 等)
if (config.attr) {
return (el.getAttribute(config.attr) || "").trim();
}
if (el == null) {
return "未找到"
}
const tagName = el.tagName;
if (tagName === "IMG") return el.getAttribute("src");
if (tagName === "A") {
let href = el.getAttribute("href");
return href && !href.startsWith("http") ? window.location.origin + href : href;
}
// 默认提取文字,并清洗
return el.innerText.replace(/\n/g, "").trim();
}
/**
* 提取列表的数据
* @param {*配置} config
* @param {*父节点} rootDom
*/
async function processList(config, rootDom) {
let allList = [];
let pageCount = 0;
while (true) {
pageCount++;
const allElements = rootDom.querySelectorAll(config.className);
const elements = Array.from(allElements);
for (const element of elements) {
let itemData = await process(config.keys, element)
allList.push(itemData)
}
//1.如果没有配置分页,抓一页自动退出
if (!config.pagination) {
console.log("未配置分页信息,抓取单页后结束。");
break;
}
// 2.如果达到最大页数限制,强制停止
if (config.pagination.maxPage && pageCount >= config.pagination.maxPage) {
console.log("已达到配置的最大页数,停止。");
break;
}
// 3. 如果找不到下一页按钮,结束
const nextBtn = document.querySelector(config.pagination.nextBtn);
if (!nextBtn) {
console.log("未找到下一页按钮,抓取结束。");
break;
} else {
nextBtn.click();
await sleep(config.pagination.delay);
}
}
return allList
}
/**
* 提取表格的数据
*/
async function processTable(config, rootDom) {
let allTableData = [];
let pageCount = 0;
while (true) {
pageCount++;
//锁定所有 Table Parts 的 tr
const partsNodes = {};
config.tableParts.forEach(part => {
partsNodes[part.name] = rootDom.querySelectorAll(`${part.select} tr`);
});
// //以第一个part的行数为准进行横向扫描
const rowCount = partsNodes[config.tableParts[0].name]?.length || 0
for (let i = 0; i < rowCount; i++) {
let rowData = {};
//遍历keys根据part映射取对应的里面找
for (const keyItem of config.keys) {
const targetRowNode = partsNodes[keyItem.part][i];
if (targetRowNode) {
//提取值
if (keyItem.keys) {
rowData[keyItem.label] = await process(keyItem.keys, targetRowNode)
} else {
rowData[keyItem.label] = extractValue(targetRowNode.querySelector(keyItem.className), keyItem);
}
}
}
allTableData.push(rowData);
}
//1.如果没有配置分页,抓一页自动退出
if (!config.pagination) {
console.log("未配置分页信息,抓取单页后结束。");
break;
}
// 2.如果达到最大页数限制,强制停止
if (config.pagination.maxPage && pageCount >= config.pagination.maxPage) {
console.log("已达到配置的最大页数,停止。");
break;
}
// 3. 如果找不到下一页按钮,结束
const nextBtn = document.querySelector(config.pagination.nextBtn);
if (!nextBtn) {
console.log("未找到下一页按钮,抓取结束。");
break;
}
// 4.检擦按钮是否被禁用
const isDisabled = config.pagination.disabledClass ? nextBtn.classList.contains(config.pagination.disabledClass) : nextBtn.disabled;
if (isDisabled) {
console.log("下一页按钮已禁用,抓取结束。");
break;
}
//下一页
nextBtn.click();
await sleep(config.pagination.delay);
}
return allTableData;
}
let data = await process(column, document.body)
console.log("==== 提取成功 ====");
console.log(data);
return data
})()