feat: refactor sgclaw around zeroclaw compat runtime

This commit is contained in:
zyl
2026-03-26 16:23:31 +08:00
parent bca5b75801
commit ff0771a83f
1059 changed files with 409460 additions and 23 deletions

View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Check binary file size against safeguard thresholds.
#
# Usage: check_binary_size.sh <binary_path> [label]
#
# Arguments:
# binary_path Path to the binary to check (required)
# label Optional label for step summary (e.g. target triple)
#
# Thresholds:
# >20MB — hard error (safeguard)
# >15MB — warning (advisory)
# >5MB — warning (target)
#
# Writes to GITHUB_STEP_SUMMARY when the variable is set and label is provided.
set -euo pipefail
BIN="${1:?Usage: check_binary_size.sh <binary_path> [label]}"
LABEL="${2:-}"
if [ ! -f "$BIN" ]; then
echo "::error::Binary not found at $BIN"
exit 1
fi
# macOS stat uses -f%z, Linux stat uses -c%s
SIZE=$(stat -f%z "$BIN" 2>/dev/null || stat -c%s "$BIN")
SIZE_MB=$((SIZE / 1024 / 1024))
echo "Binary size: ${SIZE_MB}MB ($SIZE bytes)"
if [ -n "$LABEL" ] && [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
echo "### Binary Size: $LABEL" >> "$GITHUB_STEP_SUMMARY"
echo "- Size: ${SIZE_MB}MB ($SIZE bytes)" >> "$GITHUB_STEP_SUMMARY"
fi
if [ "$SIZE" -gt 20971520 ]; then
echo "::error::Binary exceeds 20MB safeguard (${SIZE_MB}MB)"
exit 1
elif [ "$SIZE" -gt 15728640 ]; then
echo "::warning::Binary exceeds 15MB advisory target (${SIZE_MB}MB)"
elif [ "$SIZE" -gt 5242880 ]; then
echo "::warning::Binary exceeds 5MB target (${SIZE_MB}MB)"
else
echo "Binary size within target."
fi

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import subprocess
import sys
from pathlib import Path
DOC_PATH_RE = re.compile(r"\.mdx?$")
URL_RE = re.compile(r"https?://[^\s<>'\"]+")
INLINE_LINK_RE = re.compile(r"!?\[[^\]]*\]\(([^)]+)\)")
REF_LINK_RE = re.compile(r"^\s*\[[^\]]+\]:\s*(\S+)")
TRAILING_PUNCTUATION = ").,;:!?]}'\""
def run_git(args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(["git", *args], check=False, capture_output=True, text=True)
def commit_exists(rev: str) -> bool:
if not rev:
return False
return run_git(["cat-file", "-e", f"{rev}^{{commit}}"]).returncode == 0
def normalize_docs_files(raw: str) -> list[str]:
if not raw:
return []
files: list[str] = []
for line in raw.splitlines():
path = line.strip()
if path:
files.append(path)
return files
def infer_base_sha(provided: str) -> str:
if commit_exists(provided):
return provided
if run_git(["rev-parse", "--verify", "origin/master"]).returncode != 0:
return ""
proc = run_git(["merge-base", "origin/master", "HEAD"])
candidate = proc.stdout.strip()
return candidate if commit_exists(candidate) else ""
def infer_docs_files(base_sha: str, provided: list[str]) -> list[str]:
if provided:
return provided
if not base_sha:
return []
diff = run_git(["diff", "--name-only", base_sha, "HEAD"])
files: list[str] = []
for line in diff.stdout.splitlines():
path = line.strip()
if not path:
continue
if DOC_PATH_RE.search(path) or path in {"LICENSE", ".github/pull_request_template.md"}:
files.append(path)
return files
def normalize_link_target(raw_target: str, source_path: str) -> str | None:
target = raw_target.strip()
if target.startswith("<") and target.endswith(">"):
target = target[1:-1].strip()
if not target:
return None
if " " in target:
target = target.split()[0].strip()
if not target or target.startswith("#"):
return None
lower = target.lower()
if lower.startswith(("mailto:", "tel:", "javascript:")):
return None
if target.startswith(("http://", "https://")):
return target.rstrip(TRAILING_PUNCTUATION)
path_without_fragment = target.split("#", 1)[0].split("?", 1)[0]
if not path_without_fragment:
return None
if path_without_fragment.startswith("/"):
resolved = path_without_fragment.lstrip("/")
else:
resolved = os.path.normpath(
os.path.join(os.path.dirname(source_path) or ".", path_without_fragment)
)
if not resolved or resolved == ".":
return None
return resolved
def extract_links(text: str, source_path: str) -> list[str]:
links: list[str] = []
for match in URL_RE.findall(text):
url = match.rstrip(TRAILING_PUNCTUATION)
if url:
links.append(url)
for match in INLINE_LINK_RE.findall(text):
normalized = normalize_link_target(match, source_path)
if normalized:
links.append(normalized)
ref_match = REF_LINK_RE.match(text)
if ref_match:
normalized = normalize_link_target(ref_match.group(1), source_path)
if normalized:
links.append(normalized)
return links
def added_lines_for_file(base_sha: str, path: str) -> list[str]:
if base_sha:
diff = run_git(["diff", "--unified=0", base_sha, "HEAD", "--", path])
lines: list[str] = []
for raw_line in diff.stdout.splitlines():
if raw_line.startswith("+++"):
continue
if raw_line.startswith("+"):
lines.append(raw_line[1:])
return lines
file_path = Path(path)
if not file_path.is_file():
return []
return file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
def main() -> int:
parser = argparse.ArgumentParser(description="Collect HTTP(S) links added in changed docs lines")
parser.add_argument("--base", default="", help="Base commit SHA")
parser.add_argument(
"--docs-files",
default="",
help="Newline-separated docs files list",
)
parser.add_argument("--output", required=True, help="Output file for unique URLs")
args = parser.parse_args()
base_sha = infer_base_sha(args.base)
docs_files = infer_docs_files(base_sha, normalize_docs_files(args.docs_files))
existing_files = [path for path in docs_files if Path(path).is_file()]
if not existing_files:
Path(args.output).write_text("", encoding="utf-8")
print("No docs files available for link collection.")
return 0
unique_urls: list[str] = []
seen: set[str] = set()
for path in existing_files:
for line in added_lines_for_file(base_sha, path):
for link in extract_links(line, path):
if link not in seen:
seen.add(link)
unique_urls.append(link)
Path(args.output).write_text("\n".join(unique_urls) + ("\n" if unique_urls else ""), encoding="utf-8")
print(f"Collected {len(unique_urls)} added link(s) from {len(existing_files)} docs file(s).")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_SHA="${BASE_SHA:-}"
DOCS_FILES_RAW="${DOCS_FILES:-}"
LINKS_FILE="$(mktemp)"
trap 'rm -f "$LINKS_FILE"' EXIT
python3 ./scripts/ci/collect_changed_links.py \
--base "$BASE_SHA" \
--docs-files "$DOCS_FILES_RAW" \
--output "$LINKS_FILE"
if [ ! -s "$LINKS_FILE" ]; then
echo "No added links detected in changed docs lines."
exit 0
fi
if ! command -v lychee >/dev/null 2>&1; then
echo "lychee is required to run docs link gate locally."
echo "Install via: cargo install lychee"
exit 1
fi
echo "Checking added links with lychee (offline mode)..."
lychee --offline --no-progress --format detailed "$LINKS_FILE"

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_SHA="${BASE_SHA:-}"
DOCS_FILES_RAW="${DOCS_FILES:-}"
if [ -z "$BASE_SHA" ] && git rev-parse --verify origin/master >/dev/null 2>&1; then
BASE_SHA="$(git merge-base origin/master HEAD)"
fi
if [ -z "$DOCS_FILES_RAW" ] && [ -n "$BASE_SHA" ] && git cat-file -e "$BASE_SHA^{commit}" 2>/dev/null; then
DOCS_FILES_RAW="$(git diff --name-only "$BASE_SHA" HEAD | awk '
/\.md$/ || /\.mdx$/ || $0 == "LICENSE" || $0 == ".github/pull_request_template.md" {
print
}
')"
fi
if [ -z "$DOCS_FILES_RAW" ]; then
echo "No docs files detected; skipping docs quality gate."
exit 0
fi
if [ -z "$BASE_SHA" ] || ! git cat-file -e "$BASE_SHA^{commit}" 2>/dev/null; then
echo "BASE_SHA is missing or invalid; falling back to full-file markdown lint."
BASE_SHA=""
fi
ALL_FILES=()
while IFS= read -r file; do
if [ -n "$file" ]; then
ALL_FILES+=("$file")
fi
done < <(printf '%s\n' "$DOCS_FILES_RAW")
if [ "${#ALL_FILES[@]}" -eq 0 ]; then
echo "No docs files detected after normalization; skipping docs quality gate."
exit 0
fi
EXISTING_FILES=()
for file in "${ALL_FILES[@]}"; do
if [ -f "$file" ]; then
EXISTING_FILES+=("$file")
fi
done
if [ "${#EXISTING_FILES[@]}" -eq 0 ]; then
echo "No existing docs files to lint; skipping docs quality gate."
exit 0
fi
if command -v npx >/dev/null 2>&1; then
MD_CMD=(npx --yes markdownlint-cli2@0.20.0)
elif command -v markdownlint-cli2 >/dev/null 2>&1; then
MD_CMD=(markdownlint-cli2)
else
echo "markdownlint-cli2 is required (via npx or local binary)."
exit 1
fi
echo "Linting docs files: ${EXISTING_FILES[*]}"
LINT_OUTPUT_FILE="$(mktemp)"
set +e
"${MD_CMD[@]}" "${EXISTING_FILES[@]}" >"$LINT_OUTPUT_FILE" 2>&1
LINT_EXIT=$?
set -e
if [ "$LINT_EXIT" -eq 0 ]; then
cat "$LINT_OUTPUT_FILE"
rm -f "$LINT_OUTPUT_FILE"
exit 0
fi
if [ -z "$BASE_SHA" ]; then
cat "$LINT_OUTPUT_FILE"
rm -f "$LINT_OUTPUT_FILE"
exit "$LINT_EXIT"
fi
CHANGED_LINES_JSON_FILE="$(mktemp)"
python3 - "$BASE_SHA" "${EXISTING_FILES[@]}" >"$CHANGED_LINES_JSON_FILE" <<'PY'
import json
import re
import subprocess
import sys
base = sys.argv[1]
files = sys.argv[2:]
changed = {}
hunk = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
for path in files:
proc = subprocess.run(
["git", "diff", "--unified=0", base, "HEAD", "--", path],
check=False,
capture_output=True,
text=True,
)
ranges = []
for line in proc.stdout.splitlines():
m = hunk.match(line)
if not m:
continue
start = int(m.group(1))
count = int(m.group(2) or "1")
if count > 0:
ranges.append([start, start + count - 1])
changed[path] = ranges
print(json.dumps(changed))
PY
FILTERED_OUTPUT_FILE="$(mktemp)"
set +e
python3 - "$LINT_OUTPUT_FILE" "$CHANGED_LINES_JSON_FILE" >"$FILTERED_OUTPUT_FILE" <<'PY'
import json
import re
import sys
lint_file = sys.argv[1]
changed_file = sys.argv[2]
with open(changed_file, "r", encoding="utf-8") as f:
changed = json.load(f)
line_re = re.compile(r"^(.+?):(\d+)\s+error\s+(MD\d+(?:/[^\s]+)?)\s+(.*)$")
blocking = []
baseline = []
other_lines = []
with open(lint_file, "r", encoding="utf-8") as f:
for raw_line in f:
line = raw_line.rstrip("\n")
m = line_re.match(line)
if not m:
other_lines.append(line)
continue
path, line_no_s, rule, msg = m.groups()
line_no = int(line_no_s)
ranges = changed.get(path, [])
is_changed_line = any(start <= line_no <= end for start, end in ranges)
entry = f"{path}:{line_no} {rule} {msg}"
if is_changed_line:
blocking.append(entry)
else:
baseline.append(entry)
if baseline:
print("Existing markdown issues outside changed lines (non-blocking):")
for entry in baseline:
print(f" - {entry}")
if blocking:
print("Markdown issues introduced on changed lines (blocking):")
for entry in blocking:
print(f" - {entry}")
print(f"Blocking markdown issues: {len(blocking)}")
sys.exit(1)
if baseline:
print("No blocking markdown issues on changed lines.")
sys.exit(0)
for line in other_lines:
print(line)
if any(line.strip() for line in other_lines):
print("markdownlint exited non-zero with unclassified output; failing safe.")
sys.exit(2)
print("No blocking markdown issues on changed lines.")
PY
SCRIPT_EXIT=$?
set -e
cat "$FILTERED_OUTPUT_FILE"
rm -f "$LINT_OUTPUT_FILE" "$CHANGED_LINES_JSON_FILE" "$FILTERED_OUTPUT_FILE"
exit "$SCRIPT_EXIT"

View File

@@ -0,0 +1,209 @@
#!/usr/bin/env python3
"""Fetch GitHub Actions workflow runs for a given date and summarize costs.
Usage:
python fetch_actions_data.py [OPTIONS]
Options:
--date YYYY-MM-DD Date to query (default: yesterday)
--mode brief|full Output mode (default: full)
brief: billable minutes/hours table only
full: detailed breakdown with per-run list
--repo OWNER/NAME Repository (default: zeroclaw-labs/zeroclaw)
-h, --help Show this help message
"""
import argparse
import json
import subprocess
from datetime import datetime, timedelta, timezone
def parse_args():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Fetch GitHub Actions workflow runs and summarize costs.",
)
yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
parser.add_argument(
"--date",
default=yesterday,
help="Date to query in YYYY-MM-DD format (default: yesterday)",
)
parser.add_argument(
"--mode",
choices=["brief", "full"],
default="full",
help="Output mode: 'brief' for billable hours only, 'full' for detailed breakdown (default: full)",
)
parser.add_argument(
"--repo",
default="zeroclaw-labs/zeroclaw",
help="Repository in OWNER/NAME format (default: zeroclaw-labs/zeroclaw)",
)
return parser.parse_args()
def fetch_runs(repo, date_str, page=1, per_page=100):
"""Fetch completed workflow runs for a given date."""
url = (
f"https://api.github.com/repos/{repo}/actions/runs"
f"?created={date_str}&per_page={per_page}&page={page}"
)
result = subprocess.run(
["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
capture_output=True, text=True
)
return json.loads(result.stdout)
def fetch_jobs(repo, run_id):
"""Fetch jobs for a specific run."""
url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/jobs?per_page=100"
result = subprocess.run(
["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
capture_output=True, text=True
)
return json.loads(result.stdout)
def parse_duration(started, completed):
"""Return duration in seconds between two ISO timestamps."""
if not started or not completed:
return 0
try:
s = datetime.fromisoformat(started.replace("Z", "+00:00"))
c = datetime.fromisoformat(completed.replace("Z", "+00:00"))
return max(0, (c - s).total_seconds())
except Exception:
return 0
def main():
args = parse_args()
repo = args.repo
date_str = args.date
brief = args.mode == "brief"
print(f"Fetching workflow runs for {repo} on {date_str}...")
print("=" * 100)
all_runs = []
for page in range(1, 5): # up to 400 runs
data = fetch_runs(repo, date_str, page=page)
runs = data.get("workflow_runs", [])
if not runs:
break
all_runs.extend(runs)
if len(runs) < 100:
break
print(f"Total workflow runs found: {len(all_runs)}")
print()
# Group by workflow name
workflow_stats = {}
for run in all_runs:
name = run.get("name", "Unknown")
event = run.get("event", "unknown")
conclusion = run.get("conclusion", "unknown")
run_id = run.get("id")
if name not in workflow_stats:
workflow_stats[name] = {
"count": 0,
"events": {},
"conclusions": {},
"total_job_seconds": 0,
"total_jobs": 0,
"run_ids": [],
}
workflow_stats[name]["count"] += 1
workflow_stats[name]["events"][event] = workflow_stats[name]["events"].get(event, 0) + 1
workflow_stats[name]["conclusions"][conclusion] = workflow_stats[name]["conclusions"].get(conclusion, 0) + 1
workflow_stats[name]["run_ids"].append(run_id)
# For each workflow, sample up to 3 runs to get job-level timing
print("Sampling job-level timing (up to 3 runs per workflow)...")
print()
for name, stats in workflow_stats.items():
sample_ids = stats["run_ids"][:3]
for run_id in sample_ids:
jobs_data = fetch_jobs(repo, run_id)
jobs = jobs_data.get("jobs", [])
for job in jobs:
started = job.get("started_at")
completed = job.get("completed_at")
duration = parse_duration(started, completed)
stats["total_job_seconds"] += duration
stats["total_jobs"] += 1
# Extrapolate: if we sampled N runs but there are M total, scale up
sampled = len(sample_ids)
total = stats["count"]
if sampled > 0 and sampled < total:
scale = total / sampled
stats["estimated_total_seconds"] = stats["total_job_seconds"] * scale
else:
stats["estimated_total_seconds"] = stats["total_job_seconds"]
# Print summary sorted by estimated cost (descending)
sorted_workflows = sorted(
workflow_stats.items(),
key=lambda x: x[1]["estimated_total_seconds"],
reverse=True
)
if brief:
# Brief mode: compact billable hours table
print(f"{'Workflow':<40} {'Runs':>5} {'Est.Mins':>9} {'Est.Hours':>10}")
print("-" * 68)
grand_total_minutes = 0
for name, stats in sorted_workflows:
est_mins = stats["estimated_total_seconds"] / 60
grand_total_minutes += est_mins
print(f"{name:<40} {stats['count']:>5} {est_mins:>9.1f} {est_mins/60:>10.2f}")
print("-" * 68)
print(f"{'TOTAL':<40} {len(all_runs):>5} {grand_total_minutes:>9.0f} {grand_total_minutes/60:>10.1f}")
print(f"\nProjected monthly: ~{grand_total_minutes/60*30:.0f} hours")
else:
# Full mode: detailed breakdown with per-run list
print("=" * 100)
print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
print("-" * 100)
grand_total_minutes = 0
for name, stats in sorted_workflows:
sampled_mins = stats["total_job_seconds"] / 60
est_total_mins = stats["estimated_total_seconds"] / 60
grand_total_minutes += est_total_mins
events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
print(
f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
f"{sampled_mins:>12.1f} {est_total_mins:>14.1f} {events_str}"
)
print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14} outcomes: {conclusions_str}")
print("-" * 100)
print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
print()
# Also show raw run list
print("\n" + "=" * 100)
print("DETAILED RUN LIST")
print("=" * 100)
for run in all_runs:
name = run.get("name", "Unknown")
event = run.get("event", "unknown")
conclusion = run.get("conclusion", "unknown")
run_id = run.get("id")
started = run.get("run_started_at", "?")
print(f" [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -euo pipefail
MODE="correctness"
if [ "${1:-}" = "--strict" ]; then
MODE="strict"
fi
echo "==> rust quality: cargo fmt --all -- --check"
cargo fmt --all -- --check
if [ "$MODE" = "strict" ]; then
echo "==> rust quality: cargo clippy --locked --all-targets -- -D warnings"
cargo clippy --locked --all-targets -- -D warnings
else
echo "==> rust quality: cargo clippy --locked --all-targets -- -D clippy::correctness"
cargo clippy --locked --all-targets -- -D clippy::correctness
fi

View File

@@ -0,0 +1,237 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_SHA="${BASE_SHA:-}"
RUST_FILES_RAW="${RUST_FILES:-}"
if [ -z "$BASE_SHA" ] && git rev-parse --verify origin/master >/dev/null 2>&1; then
BASE_SHA="$(git merge-base origin/master HEAD)"
fi
if [ -z "$BASE_SHA" ] && git rev-parse --verify HEAD~1 >/dev/null 2>&1; then
BASE_SHA="$(git rev-parse HEAD~1)"
fi
if [ -z "$BASE_SHA" ] || ! git cat-file -e "$BASE_SHA^{commit}" 2>/dev/null; then
echo "BASE_SHA is missing or invalid for strict delta gate."
echo "Set BASE_SHA explicitly or ensure origin/master is available."
exit 1
fi
if [ -z "$RUST_FILES_RAW" ]; then
RUST_FILES_RAW="$(git diff --name-only "$BASE_SHA" HEAD | awk '/\.rs$/ { print }')"
fi
ALL_FILES=()
while IFS= read -r file; do
if [ -n "$file" ]; then
ALL_FILES+=("$file")
fi
done < <(printf '%s\n' "$RUST_FILES_RAW")
if [ "${#ALL_FILES[@]}" -eq 0 ]; then
echo "No Rust source files changed; skipping strict delta gate."
exit 0
fi
EXISTING_FILES=()
for file in "${ALL_FILES[@]}"; do
if [ -f "$file" ]; then
EXISTING_FILES+=("$file")
fi
done
if [ "${#EXISTING_FILES[@]}" -eq 0 ]; then
echo "No existing changed Rust files to lint; skipping strict delta gate."
exit 0
fi
echo "Strict delta linting changed Rust files: ${EXISTING_FILES[*]}"
CHANGED_LINES_JSON_FILE="$(mktemp)"
CLIPPY_JSON_FILE="$(mktemp)"
CLIPPY_STDERR_FILE="$(mktemp)"
FILTERED_OUTPUT_FILE="$(mktemp)"
trap 'rm -f "$CHANGED_LINES_JSON_FILE" "$CLIPPY_JSON_FILE" "$CLIPPY_STDERR_FILE" "$FILTERED_OUTPUT_FILE"' EXIT
python3 - "$BASE_SHA" "${EXISTING_FILES[@]}" >"$CHANGED_LINES_JSON_FILE" <<'PY'
import json
import re
import subprocess
import sys
base = sys.argv[1]
files = sys.argv[2:]
hunk = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
changed = {}
for path in files:
proc = subprocess.run(
["git", "diff", "--unified=0", base, "HEAD", "--", path],
check=False,
capture_output=True,
text=True,
)
ranges = []
for line in proc.stdout.splitlines():
match = hunk.match(line)
if not match:
continue
start = int(match.group(1))
count = int(match.group(2) or "1")
if count > 0:
ranges.append([start, start + count - 1])
changed[path] = ranges
print(json.dumps(changed))
PY
set +e
cargo clippy --quiet --locked --all-targets --message-format=json -- -D warnings >"$CLIPPY_JSON_FILE" 2>"$CLIPPY_STDERR_FILE"
CLIPPY_EXIT=$?
set -e
if [ "$CLIPPY_EXIT" -eq 0 ]; then
echo "Strict delta gate passed: no strict warnings/errors."
exit 0
fi
set +e
python3 - "$CLIPPY_JSON_FILE" "$CHANGED_LINES_JSON_FILE" >"$FILTERED_OUTPUT_FILE" <<'PY'
import json
import sys
from pathlib import Path
messages_file = sys.argv[1]
changed_file = sys.argv[2]
with open(changed_file, "r", encoding="utf-8") as f:
changed = json.load(f)
cwd = Path.cwd().resolve()
def normalize_path(path_value: str) -> str:
path = Path(path_value)
if path.is_absolute():
try:
return path.resolve().relative_to(cwd).as_posix()
except Exception:
return path.as_posix()
return path.as_posix()
blocking = []
baseline = []
unclassified = []
classified_count = 0
with open(messages_file, "r", encoding="utf-8", errors="ignore") as f:
for raw_line in f:
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
if payload.get("reason") != "compiler-message":
continue
message = payload.get("message", {})
level = message.get("level")
if level not in {"warning", "error"}:
continue
code_obj = message.get("code") or {}
code = code_obj.get("code") if isinstance(code_obj, dict) else None
text = message.get("message", "")
spans = message.get("spans") or []
candidate_spans = [span for span in spans if span.get("is_primary")]
if not candidate_spans:
candidate_spans = spans
span_entries = []
for span in candidate_spans:
file_name = span.get("file_name")
line_start = span.get("line_start")
line_end = span.get("line_end")
if not file_name or line_start is None:
continue
norm_path = normalize_path(file_name)
span_entries.append((norm_path, int(line_start), int(line_end or line_start)))
if not span_entries:
unclassified.append(f"{level.upper()} {code or '-'} {text}")
continue
is_changed_line = False
best_path, best_line, _ = span_entries[0]
for path, line_start, line_end in span_entries:
ranges = changed.get(path)
if ranges is None:
continue
for start, end in ranges:
if line_end >= start and line_start <= end:
is_changed_line = True
best_path, best_line = path, line_start
break
if is_changed_line:
break
entry = f"{best_path}:{best_line} {level.upper()} {code or '-'} {text}"
classified_count += 1
if is_changed_line:
blocking.append(entry)
else:
baseline.append(entry)
if baseline:
print("Existing strict lint issues outside changed Rust lines (non-blocking):")
for entry in baseline:
print(f" - {entry}")
if blocking:
print("Strict lint issues introduced on changed Rust lines (blocking):")
for entry in blocking:
print(f" - {entry}")
print(f"Blocking strict lint issues: {len(blocking)}")
sys.exit(1)
if classified_count > 0:
print("No blocking strict lint issues on changed Rust lines.")
sys.exit(0)
if unclassified:
print("Strict lint exited non-zero with unclassified diagnostics; failing safe:")
for entry in unclassified[:20]:
print(f" - {entry}")
sys.exit(2)
print("Strict lint exited non-zero without parsable diagnostics; failing safe.")
sys.exit(2)
PY
FILTER_EXIT=$?
set -e
cat "$FILTERED_OUTPUT_FILE"
if [ "$FILTER_EXIT" -eq 0 ]; then
if [ -s "$CLIPPY_STDERR_FILE" ]; then
echo "clippy stderr summary (informational):"
cat "$CLIPPY_STDERR_FILE"
fi
exit 0
fi
if [ -s "$CLIPPY_STDERR_FILE" ]; then
echo "clippy stderr summary:"
cat "$CLIPPY_STDERR_FILE"
fi
exit "$FILTER_EXIT"