chore: initial commit

This commit is contained in:
zhaoyilun
2026-04-15 01:17:01 +08:00
commit ff748ebbeb
1162 changed files with 6107 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
---
name: demo-skill
description: demo-skill browser_script tool
version: 0.1.0
author: sgclaw-skill-converter
tags:
- sgclaw
- browser_script
- converter
---
# Use Cases
- 输入 JS 脚本并生成可被 sgclaw 加载的技能包
- 适配 browser_script 执行模型,优先返回结构化结果
- 与运行时兼容,遵循 expected_domain 与 CSS 选择器约束
# Workflow
1. 解析脚本参数与返回结构
2. 生成 SKILL.toml 与 SKILL.md
3. 保持 JS 逻辑原样,落盘到 scripts/<tool>.js
4. 输出 references 与 assets 文档
# Runtime Contract
- expected_domain: `example.com`
- tool name: `extract_demo_skill`
- tool description: 基于现有脚本生成结构化结果
- args:
-
- kind: browser_script
- command: scripts/extract_demo_skill.js
# Blocked-Page Rule
- 遇到登录、验证码、权限不足时必须 throw Error不得返回空数组或空对象。
- 明确报错文案,例如:登录/验证码拦截/权限不足。
# Output Contract
- structured-first返回对象或对象数组避免 getText 二次采集。
- artifact_fields: []
- superrpa_browser 优先;使用 CSS 选择器,禁止 XPath 与 jQuery :contains
# Partial/Fallback Rule
- blocked_page: 登录/验证码/反爬场景要抛错
- partial_data: 数据不足要在注释或结果中说明
# References
- `references/implementation-notes.md`
- `assets/notes.md`
## 模式
- preferred_mode: compact

View File

@@ -0,0 +1,12 @@
[skill]
name = "demo-skill"
description = "demo-skill browser_script tool"
version = "0.1.0"
author = "sgclaw-skill-converter"
tags = ["sgclaw", "browser_script", "converter"]
[[tools]]
name = "extract_demo_skill"
description = "基于现有脚本生成结构化结果"
kind = "browser_script"
command = "scripts/extract_demo_skill.js"

View File

@@ -0,0 +1,6 @@
## Asset Notes
- skill_name: demo-skill
- expected_domain: example.com
- mode: compact
- 资源仅使用本地相对文档,不包含远程 markdown 链接。

View File

@@ -0,0 +1,8 @@
## Implementation Notes
- skill_name: demo-skill
- tool: extract_demo_skill
- args:
- artifact_fields:
- generator: main.go
- 说明: 保持脚本业务逻辑不变,仅包装与文档产出。

View File

@@ -0,0 +1 @@
return {a:1};

View File

@@ -0,0 +1,51 @@
---
name: extract_hotlist
description: extract_hotlist browser_script tool
version: 0.1.0
author: sgclaw-skill-converter
tags:
- sgclaw
- browser_script
- converter
---
# Use Cases
- 输入 JS 脚本并生成可被 sgclaw 加载的技能包
- 适配 browser_script 执行模型,优先返回结构化结果
- 与运行时兼容,遵循 expected_domain 与 CSS 选择器约束
# Workflow
1. 解析脚本参数与返回结构
2. 生成 SKILL.toml 与 SKILL.md
3. 保持 JS 逻辑原样,落盘到 scripts/<tool>.js
4. 输出 references 与 assets 文档
# Runtime Contract
- expected_domain: `未设置`
- tool name: `extract_hotlist`
- tool description: 基于现有脚本生成结构化结果
- args:
- top_n: 脚本参数 top_n
- kind: browser_script
- command: scripts/extract_hotlist.js
# Blocked-Page Rule
- 遇到登录、验证码、权限不足时必须 throw Error不得返回空数组或空对象。
- 明确报错文案,例如:登录/验证码拦截/权限不足。
# Output Contract
- structured-first返回对象或对象数组避免 getText 二次采集。
- artifact_fields: [source sheet_name columns rows data]
- superrpa_browser 优先;使用 CSS 选择器,禁止 XPath 与 jQuery :contains
# Partial/Fallback Rule
- blocked_page: 登录/验证码/反爬场景要抛错
- partial_data: 数据不足要在注释或结果中说明
# References
- `references/implementation-notes.md`
- `assets/notes.md`
## 模式
- preferred_mode: compact

View File

@@ -0,0 +1,15 @@
[skill]
name = "extract_hotlist"
description = "extract_hotlist browser_script tool"
version = "0.1.0"
author = "sgclaw-skill-converter"
tags = ["sgclaw", "browser_script", "converter"]
[[tools]]
name = "extract_hotlist"
description = "基于现有脚本生成结构化结果"
kind = "browser_script"
command = "scripts/extract_hotlist.js"
[tools.args]
top_n = "脚本参数 top_n"

View File

@@ -0,0 +1,6 @@
## Asset Notes
- skill_name: extract_hotlist
- expected_domain: 未设置
- mode: compact
- 资源仅使用本地相对文档,不包含远程 markdown 链接。

View File

@@ -0,0 +1,8 @@
## Implementation Notes
- skill_name: extract_hotlist
- tool: extract_hotlist
- args: top_n
- artifact_fields: source, sheet_name, columns, rows, data
- generator: main.go
- 说明: 保持脚本业务逻辑不变,仅包装与文档产出。

View File

@@ -0,0 +1,262 @@
const limit = Math.max(1, Number(args.top_n || 10));
function cleanText(value) {
return String(value || '')
.replace(/\s+/g, ' ')
.replace(/\u200b/g, '')
.trim();
}
function pickText(root, selectors) {
for (const selector of selectors) {
const node = root.querySelector(selector);
const text = cleanText(node && node.textContent);
if (text) {
return text;
}
}
return '';
}
function inferHeat(text) {
const compact = cleanText(text);
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?/);
if (match) {
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
}
const plain = compact.match(/(\d+(?:\.\d+)?)(?:热度)?/);
return plain ? plain[1] : '';
}
function extractHeatToken(text) {
const compact = cleanText(text);
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
if (match) {
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
}
return '';
}
function inferRank(item, index) {
const direct = pickText(item, [
'.HotList-item-index',
'.HotItem-index',
'[data-rank]',
'.RankingIndex',
]);
const directNumber = Number.parseInt(direct, 10);
if (Number.isFinite(directNumber) && directNumber > 0) {
return directNumber;
}
const text = cleanText(item.textContent);
const leading = text.match(/^(\d{1,2})\b/);
if (leading) {
return Number.parseInt(leading[1], 10);
}
return index + 1;
}
function collectRows() {
const candidates = collectDomCandidates();
const seenTitles = new Set();
const rows = [];
for (const item of candidates) {
const title = pickText(item, [
'.HotList-item-title',
'.HotList-item-title a',
'.HotItem-content a',
'h2 a',
'h2',
'a[href*="/question/"]',
]);
if (!title || seenTitles.has(title)) {
continue;
}
let heat = pickText(item, [
'.HotList-item-metrics',
'.HotList-item-heat',
'.HotItem-metrics',
'.HotItem-hot',
'[data-heat]',
]);
if (!heat) {
heat = inferHeat(item.textContent);
}
if (!heat) {
continue;
}
seenTitles.add(title);
rows.push([
inferRank(item, rows.length),
title,
heat,
]);
if (rows.length >= limit) {
break;
}
}
return rows;
}
function collectDomCandidates() {
const selectors = [
'.HotList-item',
'.HotItem',
'.HotList-list > *',
'[data-hot-item]',
'section ol li',
'main li',
'main article',
'main [class*="Hot"]',
];
const seen = new Set();
const candidates = [];
selectors.forEach((selector) => {
const nodes = Array.from(document.querySelectorAll(selector));
nodes.forEach((node) => {
if (seen.has(node)) {
return;
}
seen.add(node);
candidates.push(node);
});
});
return candidates;
}
function collectTextSources() {
const selectors = ['.HotList-list', '.HotList', '#root', 'main', 'body'];
const sources = [];
const seen = new Set();
selectors.forEach((selector) => {
const node = document.querySelector(selector);
const rawText = String(node && (node.innerText || node.textContent || '') || '');
const dedupeKey = cleanText(rawText);
if (!dedupeKey || seen.has(dedupeKey)) {
return;
}
seen.add(dedupeKey);
sources.push(rawText);
});
return sources.sort((left, right) => right.length - left.length);
}
function looksLikeBlockedPage(text) {
return /安全验证|异常访问|请完成验证|登录后继续|登录即可查看|验证码|访问受限/.test(text);
}
function shouldIgnoreTextLine(line) {
if (!line) {
return true;
}
if (line === '知乎热榜' || line === '首页 - 知乎' || line === '首页-知乎') {
return true;
}
if (line.startsWith('/ ') || line.startsWith('当前页面 ·') ||
line.startsWith('继续输入任务')) {
return true;
}
return false;
}
function collectRowsFromText() {
const sources = collectTextSources();
for (const source of sources) {
if (!source) {
continue;
}
if (looksLikeBlockedPage(source)) {
throw new Error('知乎页面当前需要登录或完成安全验证,无法读取热榜条目');
}
const rows = parseRowsFromText(source);
if (rows.length) {
return rows.slice(0, limit);
}
}
return [];
}
function parseRowsFromText(text) {
const lines = String(text || '')
.split(/\n+/)
.map(cleanText)
.filter((line) => !!line && !shouldIgnoreTextLine(line));
const seenTitles = new Set();
const rows = [];
let pendingRank = null;
let titleParts = [];
function pushRow(title, heat) {
const normalizedTitle = cleanText(title);
if (!normalizedTitle || !heat || seenTitles.has(normalizedTitle)) {
return;
}
seenTitles.add(normalizedTitle);
rows.push([
pendingRank || rows.length + 1,
normalizedTitle,
heat,
]);
pendingRank = null;
titleParts = [];
}
for (const rawLine of lines) {
let line = rawLine;
const rankOnly = line.match(/^(\d{1,2})$/);
if (rankOnly && !titleParts.length) {
pendingRank = Number(rankOnly[1]);
continue;
}
const rankedLine = line.match(/^(\d{1,2})[.、\s]+(.+)$/);
if (rankedLine) {
pendingRank = Number(rankedLine[1]);
line = cleanText(rankedLine[2]);
}
const inlineMatch = line.match(/^(.*?)(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
if (inlineMatch && cleanText(inlineMatch[1])) {
pushRow(cleanText(inlineMatch[1]), `${inlineMatch[2]}${inlineMatch[3]}`.replace('K', 'k').replace('M', 'm'));
if (rows.length >= limit) {
break;
}
continue;
}
const heatOnly = extractHeatToken(line);
if (heatOnly && titleParts.length) {
pushRow(titleParts.join(' '), heatOnly);
if (rows.length >= limit) {
break;
}
continue;
}
titleParts.push(line);
}
return rows;
}
const domRows = collectRows();
const rows = domRows.length ? domRows : collectRowsFromText();
if (!rows.length) {
throw new Error('未能从页面 DOM 中提取到知乎热榜条目');
}
return {
source: `${location.origin}${location.pathname}`,
sheet_name: '知乎热榜',
columns: ['rank', 'title', 'heat'],
rows,
};

View File

@@ -0,0 +1,262 @@
const limit = Math.max(1, Number(args.top_n || 10));
function cleanText(value) {
return String(value || '')
.replace(/\s+/g, ' ')
.replace(/\u200b/g, '')
.trim();
}
function pickText(root, selectors) {
for (const selector of selectors) {
const node = root.querySelector(selector);
const text = cleanText(node && node.textContent);
if (text) {
return text;
}
}
return '';
}
function inferHeat(text) {
const compact = cleanText(text);
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?/);
if (match) {
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
}
const plain = compact.match(/(\d+(?:\.\d+)?)(?:热度)?/);
return plain ? plain[1] : '';
}
function extractHeatToken(text) {
const compact = cleanText(text);
const match = compact.match(/(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
if (match) {
return `${match[1]}${match[2]}`.replace('K', 'k').replace('M', 'm');
}
return '';
}
function inferRank(item, index) {
const direct = pickText(item, [
'.HotList-item-index',
'.HotItem-index',
'[data-rank]',
'.RankingIndex',
]);
const directNumber = Number.parseInt(direct, 10);
if (Number.isFinite(directNumber) && directNumber > 0) {
return directNumber;
}
const text = cleanText(item.textContent);
const leading = text.match(/^(\d{1,2})\b/);
if (leading) {
return Number.parseInt(leading[1], 10);
}
return index + 1;
}
function collectRows() {
const candidates = collectDomCandidates();
const seenTitles = new Set();
const rows = [];
for (const item of candidates) {
const title = pickText(item, [
'.HotList-item-title',
'.HotList-item-title a',
'.HotItem-content a',
'h2 a',
'h2',
'a[href*="/question/"]',
]);
if (!title || seenTitles.has(title)) {
continue;
}
let heat = pickText(item, [
'.HotList-item-metrics',
'.HotList-item-heat',
'.HotItem-metrics',
'.HotItem-hot',
'[data-heat]',
]);
if (!heat) {
heat = inferHeat(item.textContent);
}
if (!heat) {
continue;
}
seenTitles.add(title);
rows.push([
inferRank(item, rows.length),
title,
heat,
]);
if (rows.length >= limit) {
break;
}
}
return rows;
}
function collectDomCandidates() {
const selectors = [
'.HotList-item',
'.HotItem',
'.HotList-list > *',
'[data-hot-item]',
'section ol li',
'main li',
'main article',
'main [class*="Hot"]',
];
const seen = new Set();
const candidates = [];
selectors.forEach((selector) => {
const nodes = Array.from(document.querySelectorAll(selector));
nodes.forEach((node) => {
if (seen.has(node)) {
return;
}
seen.add(node);
candidates.push(node);
});
});
return candidates;
}
function collectTextSources() {
const selectors = ['.HotList-list', '.HotList', '#root', 'main', 'body'];
const sources = [];
const seen = new Set();
selectors.forEach((selector) => {
const node = document.querySelector(selector);
const rawText = String(node && (node.innerText || node.textContent || '') || '');
const dedupeKey = cleanText(rawText);
if (!dedupeKey || seen.has(dedupeKey)) {
return;
}
seen.add(dedupeKey);
sources.push(rawText);
});
return sources.sort((left, right) => right.length - left.length);
}
function looksLikeBlockedPage(text) {
return /安全验证|异常访问|请完成验证|登录后继续|登录即可查看|验证码|访问受限/.test(text);
}
function shouldIgnoreTextLine(line) {
if (!line) {
return true;
}
if (line === '知乎热榜' || line === '首页 - 知乎' || line === '首页-知乎') {
return true;
}
if (line.startsWith('/ ') || line.startsWith('当前页面 ·') ||
line.startsWith('继续输入任务')) {
return true;
}
return false;
}
function collectRowsFromText() {
const sources = collectTextSources();
for (const source of sources) {
if (!source) {
continue;
}
if (looksLikeBlockedPage(source)) {
throw new Error('知乎页面当前需要登录或完成安全验证,无法读取热榜条目');
}
const rows = parseRowsFromText(source);
if (rows.length) {
return rows.slice(0, limit);
}
}
return [];
}
function parseRowsFromText(text) {
const lines = String(text || '')
.split(/\n+/)
.map(cleanText)
.filter((line) => !!line && !shouldIgnoreTextLine(line));
const seenTitles = new Set();
const rows = [];
let pendingRank = null;
let titleParts = [];
function pushRow(title, heat) {
const normalizedTitle = cleanText(title);
if (!normalizedTitle || !heat || seenTitles.has(normalizedTitle)) {
return;
}
seenTitles.add(normalizedTitle);
rows.push([
pendingRank || rows.length + 1,
normalizedTitle,
heat,
]);
pendingRank = null;
titleParts = [];
}
for (const rawLine of lines) {
let line = rawLine;
const rankOnly = line.match(/^(\d{1,2})$/);
if (rankOnly && !titleParts.length) {
pendingRank = Number(rankOnly[1]);
continue;
}
const rankedLine = line.match(/^(\d{1,2})[.、\s]+(.+)$/);
if (rankedLine) {
pendingRank = Number(rankedLine[1]);
line = cleanText(rankedLine[2]);
}
const inlineMatch = line.match(/^(.*?)(\d+(?:\.\d+)?)\s*(万|亿|k|K|m|M)(?:热度)?$/);
if (inlineMatch && cleanText(inlineMatch[1])) {
pushRow(cleanText(inlineMatch[1]), `${inlineMatch[2]}${inlineMatch[3]}`.replace('K', 'k').replace('M', 'm'));
if (rows.length >= limit) {
break;
}
continue;
}
const heatOnly = extractHeatToken(line);
if (heatOnly && titleParts.length) {
pushRow(titleParts.join(' '), heatOnly);
if (rows.length >= limit) {
break;
}
continue;
}
titleParts.push(line);
}
return rows;
}
const domRows = collectRows();
const rows = domRows.length ? domRows : collectRowsFromText();
if (!rows.length) {
throw new Error('未能从页面 DOM 中提取到知乎热榜条目');
}
return {
source: `${location.origin}${location.pathname}`,
sheet_name: '知乎热榜',
columns: ['rank', 'title', 'heat'],
rows,
};

51
skills/test-cli/SKILL.md Normal file
View File

@@ -0,0 +1,51 @@
---
name: test-cli
description: 测试转化
version: 0.1.0
author: sgclaw-skill-converter
tags:
- sgclaw
- browser_script
- converter
---
# Use Cases
- 输入 JS 脚本并生成可被 sgclaw 加载的技能包
- 适配 browser_script 执行模型,优先返回结构化结果
- 与运行时兼容,遵循 expected_domain 与 CSS 选择器约束
# Workflow
1. 解析脚本参数与返回结构
2. 生成 SKILL.toml 与 SKILL.md
3. 保持 JS 逻辑原样,落盘到 scripts/<tool>.js
4. 输出 references 与 assets 文档
# Runtime Contract
- expected_domain: `www.example.com`
- tool name: `extract_test_cli`
- tool description: 测试转化
- args:
-
- kind: browser_script
- command: scripts/extract_test_cli.js
# Blocked-Page Rule
- 遇到登录、验证码、权限不足时必须 throw Error不得返回空数组或空对象。
- 明确报错文案,例如:登录/验证码拦截/权限不足。
# Output Contract
- structured-first返回对象或对象数组避免 getText 二次采集。
- artifact_fields: [source sheet_name columns rows]
- superrpa_browser 优先;使用 CSS 选择器,禁止 XPath 与 jQuery :contains
# Partial/Fallback Rule
- blocked_page: 登录/验证码/反爬场景要抛错
- partial_data: 数据不足要在注释或结果中说明
# References
- `references/implementation-notes.md`
- `assets/notes.md`
## 模式
- preferred_mode: compact

View File

@@ -0,0 +1,12 @@
[skill]
name = "test-cli"
description = "测试转化"
version = "0.1.0"
author = "sgclaw-skill-converter"
tags = ["sgclaw", "browser_script", "converter"]
[[tools]]
name = "extract_test_cli"
description = "测试转化"
kind = "browser_script"
command = "scripts/extract_test_cli.js"

View File

@@ -0,0 +1,6 @@
## Asset Notes
- skill_name: test-cli
- expected_domain: www.example.com
- mode: compact
- 资源仅使用本地相对文档,不包含远程 markdown 链接。

View File

@@ -0,0 +1,8 @@
## Implementation Notes
- skill_name: test-cli
- tool: extract_test_cli
- args:
- artifact_fields: source, sheet_name, columns, rows
- generator: main.go
- 说明: 保持脚本业务逻辑不变,仅包装与文档产出。

View File

@@ -0,0 +1 @@
return {source:'x', columns:['a'], rows:[{a:1}], sheet_name:'demo'};

51
skills/test-cli2/SKILL.md Normal file
View File

@@ -0,0 +1,51 @@
---
name: test-cli2
description: test
version: 0.1.0
author: sgclaw-skill-converter
tags:
- sgclaw
- browser_script
- converter
---
# Use Cases
- 输入 JS 脚本并生成可被 sgclaw 加载的技能包
- 适配 browser_script 执行模型,优先返回结构化结果
- 与运行时兼容,遵循 expected_domain 与 CSS 选择器约束
# Workflow
1. 解析脚本参数与返回结构
2. 生成 SKILL.toml 与 SKILL.md
3. 保持 JS 逻辑原样,落盘到 scripts/<tool>.js
4. 输出 references 与 assets 文档
# Runtime Contract
- expected_domain: `www.example.com`
- tool name: `extract_test_cli2`
- tool description: test
- args:
-
- kind: browser_script
- command: scripts/extract_test_cli2.js
# Blocked-Page Rule
- 遇到登录、验证码、权限不足时必须 throw Error不得返回空数组或空对象。
- 明确报错文案,例如:登录/验证码拦截/权限不足。
# Output Contract
- structured-first返回对象或对象数组避免 getText 二次采集。
- artifact_fields: [source sheet_name columns rows]
- superrpa_browser 优先;使用 CSS 选择器,禁止 XPath 与 jQuery :contains
# Partial/Fallback Rule
- blocked_page: 登录/验证码/反爬场景要抛错
- partial_data: 数据不足要在注释或结果中说明
# References
- `references/implementation-notes.md`
- `assets/notes.md`
## 模式
- preferred_mode: compact

View File

@@ -0,0 +1,12 @@
[skill]
name = "test-cli2"
description = "test"
version = "0.1.0"
author = "sgclaw-skill-converter"
tags = ["sgclaw", "browser_script", "converter"]
[[tools]]
name = "extract_test_cli2"
description = "test"
kind = "browser_script"
command = "scripts/extract_test_cli2.js"

View File

@@ -0,0 +1,6 @@
## Asset Notes
- skill_name: test-cli2
- expected_domain: www.example.com
- mode: compact
- 资源仅使用本地相对文档,不包含远程 markdown 链接。

View File

@@ -0,0 +1,8 @@
## Implementation Notes
- skill_name: test-cli2
- tool: extract_test_cli2
- args:
- artifact_fields: source, sheet_name, columns, rows
- generator: main.go
- 说明: 保持脚本业务逻辑不变,仅包装与文档产出。

View File

@@ -0,0 +1 @@
return { source: 'x', columns: ['a'], rows: [{a:1}], sheet_name: 'demo' };