263 lines
6.2 KiB
JavaScript
263 lines
6.2 KiB
JavaScript
const limit = Math.max(1, Number(args.top_n || 10));
|
|
|
|
function cleanText(value) {
|
|
return String(value || '')
|
|
.replace(/\s+/g, ' ')
|
|
.replace(/\u200b/g, '')
|
|
.trim();
|
|
}
|
|
|
|
function pickText(root, selectors) {
|
|
for (const selector of selectors) {
|
|
const node = root.querySelector(selector);
|
|
const text = cleanText(node && node.textContent);
|
|
if (text) {
|
|
return text;
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function inferHeat(text) {
|
|
const compact = cleanText(text);
|
|
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?/);
|
|
if (match) {
|
|
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
|
|
}
|
|
const plain = compact.match(/(\d+(?:\.\d+)?)(?:热度)?/);
|
|
return plain ? plain[1] : '';
|
|
}
|
|
|
|
function extractHeatToken(text) {
|
|
const compact = cleanText(text);
|
|
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
|
|
if (match) {
|
|
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function inferRank(item, index) {
|
|
const direct = pickText(item, [
|
|
'.HotList-item-index',
|
|
'.HotItem-index',
|
|
'[data-rank]',
|
|
'.RankingIndex',
|
|
]);
|
|
const directNumber = Number.parseInt(direct, 10);
|
|
if (Number.isFinite(directNumber) && directNumber > 0) {
|
|
return directNumber;
|
|
}
|
|
|
|
const text = cleanText(item.textContent);
|
|
const leading = text.match(/^(\d{1,2})\b/);
|
|
if (leading) {
|
|
return Number.parseInt(leading[1], 10);
|
|
}
|
|
|
|
return index + 1;
|
|
}
|
|
|
|
function collectRows() {
|
|
const candidates = collectDomCandidates();
|
|
const seenTitles = new Set();
|
|
const rows = [];
|
|
|
|
for (const item of candidates) {
|
|
const title = pickText(item, [
|
|
'.HotList-item-title',
|
|
'.HotList-item-title a',
|
|
'.HotItem-content a',
|
|
'h2 a',
|
|
'h2',
|
|
'a[href*="/question/"]',
|
|
]);
|
|
if (!title || seenTitles.has(title)) {
|
|
continue;
|
|
}
|
|
|
|
let heat = pickText(item, [
|
|
'.HotList-item-metrics',
|
|
'.HotList-item-heat',
|
|
'.HotItem-metrics',
|
|
'.HotItem-hot',
|
|
'[data-heat]',
|
|
]);
|
|
if (!heat) {
|
|
heat = inferHeat(item.textContent);
|
|
}
|
|
if (!heat) {
|
|
continue;
|
|
}
|
|
|
|
seenTitles.add(title);
|
|
rows.push([
|
|
inferRank(item, rows.length),
|
|
title,
|
|
heat,
|
|
]);
|
|
|
|
if (rows.length >= limit) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
function collectDomCandidates() {
|
|
const selectors = [
|
|
'.HotList-item',
|
|
'.HotItem',
|
|
'.HotList-list > *',
|
|
'[data-hot-item]',
|
|
'section ol li',
|
|
'main li',
|
|
'main article',
|
|
'main [class*="Hot"]',
|
|
];
|
|
const seen = new Set();
|
|
const candidates = [];
|
|
selectors.forEach((selector) => {
|
|
const nodes = Array.from(document.querySelectorAll(selector));
|
|
nodes.forEach((node) => {
|
|
if (seen.has(node)) {
|
|
return;
|
|
}
|
|
seen.add(node);
|
|
candidates.push(node);
|
|
});
|
|
});
|
|
return candidates;
|
|
}
|
|
|
|
function collectTextSources() {
|
|
const selectors = ['.HotList-list', '.HotList', '#root', 'main', 'body'];
|
|
const sources = [];
|
|
const seen = new Set();
|
|
selectors.forEach((selector) => {
|
|
const node = document.querySelector(selector);
|
|
const rawText = String(node && (node.innerText || node.textContent || '') || '');
|
|
const dedupeKey = cleanText(rawText);
|
|
if (!dedupeKey || seen.has(dedupeKey)) {
|
|
return;
|
|
}
|
|
seen.add(dedupeKey);
|
|
sources.push(rawText);
|
|
});
|
|
return sources.sort((left, right) => right.length - left.length);
|
|
}
|
|
|
|
function looksLikeBlockedPage(text) {
|
|
return /安全验证|异常访问|请完成验证|登录后继续|登录即可查看|验证码|访问受限/.test(text);
|
|
}
|
|
|
|
function shouldIgnoreTextLine(line) {
|
|
if (!line) {
|
|
return true;
|
|
}
|
|
if (line === '知乎热榜' || line === '首页 - 知乎' || line === '首页-知乎') {
|
|
return true;
|
|
}
|
|
if (line.startsWith('/ ') || line.startsWith('当前页面 ·') ||
|
|
line.startsWith('继续输入任务')) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function collectRowsFromText() {
|
|
const sources = collectTextSources();
|
|
for (const source of sources) {
|
|
if (!source) {
|
|
continue;
|
|
}
|
|
if (looksLikeBlockedPage(source)) {
|
|
throw new Error('知乎页面当前需要登录或完成安全验证,无法读取热榜条目');
|
|
}
|
|
|
|
const rows = parseRowsFromText(source);
|
|
if (rows.length) {
|
|
return rows.slice(0, limit);
|
|
}
|
|
}
|
|
return [];
|
|
}
|
|
|
|
function parseRowsFromText(text) {
|
|
const lines = String(text || '')
|
|
.split(/\n+/)
|
|
.map(cleanText)
|
|
.filter((line) => !!line && !shouldIgnoreTextLine(line));
|
|
const seenTitles = new Set();
|
|
const rows = [];
|
|
let pendingRank = null;
|
|
let titleParts = [];
|
|
|
|
function pushRow(title, heat) {
|
|
const normalizedTitle = cleanText(title);
|
|
if (!normalizedTitle || !heat || seenTitles.has(normalizedTitle)) {
|
|
return;
|
|
}
|
|
seenTitles.add(normalizedTitle);
|
|
rows.push([
|
|
pendingRank || rows.length + 1,
|
|
normalizedTitle,
|
|
heat,
|
|
]);
|
|
pendingRank = null;
|
|
titleParts = [];
|
|
}
|
|
|
|
for (const rawLine of lines) {
|
|
let line = rawLine;
|
|
|
|
const rankOnly = line.match(/^(\d{1,2})$/);
|
|
if (rankOnly && !titleParts.length) {
|
|
pendingRank = Number(rankOnly[1]);
|
|
continue;
|
|
}
|
|
|
|
const rankedLine = line.match(/^(\d{1,2})[.、\s]+(.+)$/);
|
|
if (rankedLine) {
|
|
pendingRank = Number(rankedLine[1]);
|
|
line = cleanText(rankedLine[2]);
|
|
}
|
|
|
|
const inlineMatch = line.match(/^(.*?)(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
|
|
if (inlineMatch && cleanText(inlineMatch[1])) {
|
|
pushRow(cleanText(inlineMatch[1]), `${inlineMatch[2]}${inlineMatch[3]}`.replace('K', 'k').replace('M', 'm'));
|
|
if (rows.length >= limit) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
const heatOnly = extractHeatToken(line);
|
|
if (heatOnly && titleParts.length) {
|
|
pushRow(titleParts.join(' '), heatOnly);
|
|
if (rows.length >= limit) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
titleParts.push(line);
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
const domRows = collectRows();
|
|
const rows = domRows.length ? domRows : collectRowsFromText();
|
|
if (!rows.length) {
|
|
throw new Error('未能从页面 DOM 中提取到知乎热榜条目');
|
|
}
|
|
|
|
return {
|
|
source: `${location.origin}${location.pathname}`,
|
|
sheet_name: '知乎热榜',
|
|
columns: ['rank', 'title', 'heat'],
|
|
rows,
|
|
};
|