diff --git a/.claude/hooks/auto-tmux-dev.sh b/.claude/hooks/auto-tmux-dev.sh new file mode 100755 index 0000000..67a4eda --- /dev/null +++ b/.claude/hooks/auto-tmux-dev.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Auto-start dev server in tmux session when build/dev commands are detected +# Event: PreToolUse | Matcher: Bash +# Profile: standard +# Non-blocking (exit 0) + +INPUT=$(cat) +COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty') + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Detect dev server commands +DEV_PATTERNS=( + 'npm\s+run\s+dev' + 'pnpm\s+(run\s+)?dev' + 'yarn\s+dev' + 'next\s+dev' + 'vite\s*$' + 'vite\s+dev' +) + +IS_DEV_COMMAND=false +for pattern in "${DEV_PATTERNS[@]}"; do + if echo "$COMMAND" | grep -qE "$pattern"; then + IS_DEV_COMMAND=true + break + fi +done + +if [ "$IS_DEV_COMMAND" = false ]; then + exit 0 +fi + +# Check if tmux is available +if ! command -v tmux &>/dev/null; then + exit 0 +fi + +# Check if dev server is already running in a tmux session +SESSION_NAME="claude-dev" +if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then + echo "Dev server already running in tmux session '$SESSION_NAME'. Use 'tmux attach -t $SESSION_NAME' to view." >&2 + exit 0 +fi + +echo "Tip: Long-running dev servers work better in tmux. Run: tmux new -d -s $SESSION_NAME '$COMMAND'" >&2 +exit 0 diff --git a/.claude/hooks/config-protection.sh b/.claude/hooks/config-protection.sh new file mode 100755 index 0000000..efed39c --- /dev/null +++ b/.claude/hooks/config-protection.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Block modifications to linter/formatter configuration files +# Event: PreToolUse | Matcher: Edit|Write +# Profile: standard +# Exit 2 = block, Exit 0 = allow + +INPUT=$(cat) +FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty') + +if [ -z "$FILE_PATH" ]; then + exit 0 +fi + +# Protected config files (linters, formatters, build configs) +PROTECTED_CONFIGS=( + ".eslintrc" + ".eslintrc.js" + ".eslintrc.json" + ".eslintrc.yml" + "eslint.config.js" + "eslint.config.mjs" + ".prettierrc" + ".prettierrc.js" + ".prettierrc.json" + "prettier.config.js" + "biome.json" + "biome.jsonc" + ".editorconfig" + "tsconfig.json" + "tsconfig.base.json" +) + +FILENAME=$(basename "$FILE_PATH") + +for config in "${PROTECTED_CONFIGS[@]}"; do + if [ "$FILENAME" = "$config" ]; then + echo "Blocked: modifying config file '$FILENAME'. These files affect the entire project." >&2 + echo "If this change is intentional, disable this hook: CLAUDE_DISABLED_HOOKS=config-protection.sh" >&2 + exit 2 + fi +done + +exit 0 diff --git a/.claude/hooks/run-with-profile.sh b/.claude/hooks/run-with-profile.sh new file mode 100755 index 0000000..fcc93f3 --- /dev/null +++ b/.claude/hooks/run-with-profile.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Hook profile gate — wraps hooks to enable/disable by profile +# Profiles: minimal (safety only), standard (safety + quality), strict (everything) +# +# Usage in settings.json: +# "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/run-with-profile.sh standard $CLAUDE_PROJECT_DIR/.claude/hooks/some-hook.sh" +# +# Environment variables: +# CLAUDE_HOOK_PROFILE — Override profile (minimal|standard|strict). Default: standard +# CLAUDE_DISABLED_HOOKS — Comma-separated list of hook filenames to skip. E.g.: "suggest-compact.sh,auto-tmux-dev.sh" + +REQUIRED_PROFILE="${1:?Usage: run-with-profile.sh }" +HOOK_SCRIPT="${2:?Usage: run-with-profile.sh }" +shift 2 + +# Current profile (default: standard) +CURRENT_PROFILE="${CLAUDE_HOOK_PROFILE:-standard}" + +# Profile hierarchy: minimal < standard < strict +profile_level() { + case "$1" in + minimal) echo 1 ;; + standard) echo 2 ;; + strict) echo 3 ;; + *) echo 2 ;; # default to standard + esac +} + +CURRENT_LEVEL=$(profile_level "$CURRENT_PROFILE") +REQUIRED_LEVEL=$(profile_level "$REQUIRED_PROFILE") + +# Skip if current profile is lower than required +if [ "$CURRENT_LEVEL" -lt "$REQUIRED_LEVEL" ]; then + exit 0 +fi + +# Check if hook is explicitly disabled +HOOK_NAME=$(basename "$HOOK_SCRIPT") +if [ -n "$CLAUDE_DISABLED_HOOKS" ]; then + if echo ",$CLAUDE_DISABLED_HOOKS," | grep -q ",$HOOK_NAME,"; then + exit 0 + fi +fi + +# Execute the hook, passing stdin through +exec "$HOOK_SCRIPT" "$@" diff --git a/.claude/hooks/session-load.sh b/.claude/hooks/session-load.sh new file mode 100755 index 0000000..4696e09 --- /dev/null +++ b/.claude/hooks/session-load.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Restore session context from previous session +# Event: SessionStart +# Reads .claude/sessions/latest.json and outputs a context summary + +SESSION_FILE="${CLAUDE_PROJECT_DIR:-.}/.claude/sessions/latest.json" + +if [ ! -f "$SESSION_FILE" ]; then + exit 0 +fi + +# Read session data +TIMESTAMP=$(jq -r '.timestamp // "unknown"' "$SESSION_FILE" 2>/dev/null) +BRANCH=$(jq -r '.branch // "unknown"' "$SESSION_FILE" 2>/dev/null) +PHASE=$(jq -r '.phase // "unknown"' "$SESSION_FILE" 2>/dev/null) +MODIFIED=$(jq -r '.modified_files // [] | length' "$SESSION_FILE" 2>/dev/null) +STAGED=$(jq -r '.staged_files // [] | length' "$SESSION_FILE" 2>/dev/null) +COMMITS=$(jq -r '.recent_commits // [] | join("\n ")' "$SESSION_FILE" 2>/dev/null) + +echo "Previous session ($TIMESTAMP):" +echo " Branch: $BRANCH" +echo " Phase: $PHASE" +echo " Modified files: $MODIFIED, Staged: $STAGED" +if [ -n "$COMMITS" ] && [ "$COMMITS" != "" ]; then + echo " Recent commits:" + echo " $COMMITS" +fi +echo "" +echo "Reminder: check RULES.md and RECOMMENDATIONS.md for project conventions." + +exit 0 diff --git a/.claude/hooks/session-save.sh b/.claude/hooks/session-save.sh new file mode 100755 index 0000000..4c607c9 --- /dev/null +++ b/.claude/hooks/session-save.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Save session context on stop for restoration in next session +# Event: Stop +# Saves: branch, modified files, recent commits, phase + +SESSION_DIR="${CLAUDE_PROJECT_DIR:-.}/.claude/sessions" +mkdir -p "$SESSION_DIR" + +SESSION_FILE="$SESSION_DIR/latest.json" + +# Gather session state +BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") +MODIFIED=$(git diff --name-only 2>/dev/null | head -20 | jq -R -s 'split("\n") | map(select(. != ""))') +STAGED=$(git diff --cached --name-only 2>/dev/null | head -20 | jq -R -s 'split("\n") | map(select(. != ""))') +RECENT_COMMITS=$(git log --oneline -5 2>/dev/null | jq -R -s 'split("\n") | map(select(. != ""))') +PHASE=$(grep -m1 'Current Phase' "${CLAUDE_PROJECT_DIR:-.}/docs/phases-plan.md" 2>/dev/null || echo "unknown") +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + +# Write session file +cat > "$SESSION_FILE" << ENDJSON +{ + "timestamp": "$TIMESTAMP", + "branch": "$BRANCH", + "phase": "$PHASE", + "modified_files": $MODIFIED, + "staged_files": $STAGED, + "recent_commits": $RECENT_COMMITS +} +ENDJSON + +exit 0 diff --git a/.claude/hooks/suggest-compact.sh b/.claude/hooks/suggest-compact.sh new file mode 100755 index 0000000..06149db --- /dev/null +++ b/.claude/hooks/suggest-compact.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Suggest /compact after ~50 tool calls +# Event: PreToolUse | Matcher: Edit|Write +# Profile: standard +# Non-blocking reminder (exit 0) + +COUNTER_FILE="${CLAUDE_PROJECT_DIR:-.}/.claude/hooks/.tool-counter" + +# Initialize counter if it doesn't exist +if [ ! -f "$COUNTER_FILE" ]; then + echo "0" > "$COUNTER_FILE" +fi + +# Increment counter +COUNT=$(cat "$COUNTER_FILE" 2>/dev/null || echo "0") +COUNT=$((COUNT + 1)) +echo "$COUNT" > "$COUNTER_FILE" + +# Suggest compact every 50 calls +if [ $((COUNT % 50)) -eq 0 ]; then + echo "Context optimization: $COUNT tool calls in this session. Consider running /strategic-compact if context feels bloated." >&2 +fi + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index 9b56f05..9ab0ae9 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,14 @@ { "type": "command", "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/protect-files.sh" + }, + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/run-with-profile.sh standard \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/config-protection.sh" + }, + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/run-with-profile.sh standard \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/suggest-compact.sh" } ] }, @@ -20,6 +28,10 @@ { "type": "command", "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/commit-docs-reminder.sh" + }, + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/run-with-profile.sh standard \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/auto-tmux-dev.sh" } ] } @@ -30,7 +42,7 @@ "hooks": [ { "type": "command", - "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-format.sh" + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/run-with-profile.sh strict \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-format.sh" } ] }, @@ -57,11 +69,22 @@ ], "SessionStart": [ { - "matcher": "compact", + "matcher": "", "hooks": [ { "type": "command", - "command": "echo \"Reminder: check RULES.md and RECOMMENDATIONS.md for project conventions. Current phase: $(grep -m1 'Current Phase' \"$CLAUDE_PROJECT_DIR/docs/phases-plan.md\" 2>/dev/null || echo 'unknown')\"" + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/session-load.sh" + } + ] + } + ], + "Stop": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/session-save.sh" } ] } diff --git a/.claude/skills/autonomous-loops/SKILL.md b/.claude/skills/autonomous-loops/SKILL.md new file mode 100644 index 0000000..a5ae616 --- /dev/null +++ b/.claude/skills/autonomous-loops/SKILL.md @@ -0,0 +1,204 @@ +--- +name: autonomous-loops +description: Patterns for running autonomous agent workflows — from simple pipelines to complex multi-agent DAGs. Reference for setting up build-fix loops, continuous PRs, and de-sloppify passes. +disable-model-invocation: true +--- + +# Autonomous Loop Patterns + +Reference guide for autonomous agent workflows, ranked by complexity. + +## Pattern 1: Sequential Pipeline + +**Complexity**: Low +**Use when**: Simple one-shot tasks chained together + +```bash +# Run a sequence of claude commands +claude -p "Implement feature X" --output-format json | \ +claude -p "Write tests for the implementation" --output-format json | \ +claude -p "Review the code for issues" +``` + +**Pros**: Simple, predictable, easy to debug +**Cons**: No error recovery, no parallelism + +## Pattern 2: Build-Fix Loop + +**Complexity**: Medium +**Use when**: Making the build green after changes + +```bash +MAX_CYCLES=5 +CYCLE=0 + +while [ $CYCLE -lt $MAX_CYCLES ]; do + CYCLE=$((CYCLE + 1)) + echo "=== Cycle $CYCLE ===" + + # Run build/tests + BUILD_OUTPUT=$(npm run build 2>&1) + if [ $? -eq 0 ]; then + echo "Build passed on cycle $CYCLE" + break + fi + + # Fix errors + claude -p "Fix these build errors. Make minimal changes only: +$BUILD_OUTPUT" + + if [ $CYCLE -eq $MAX_CYCLES ]; then + echo "STALLED after $MAX_CYCLES cycles — escalating" + exit 1 + fi +done +``` + +**Key rules**: +- Set a MAX_CYCLES limit (3-5 is reasonable) +- Detect stalls (same error repeating) +- Use the `loop-operator` agent for monitoring + +## Pattern 3: Test-Driven Fix Loop + +**Complexity**: Medium +**Use when**: Fixing failing tests one at a time + +```bash +# Get failing tests +FAILURES=$(npm test 2>&1 | grep "FAIL") + +for test_file in $FAILURES; do + claude -p "Fix this failing test. Read the test to understand intent, +then fix the implementation (not the test): +File: $test_file" +done + +# Verify all tests pass +npm test +``` + +**Key rules**: +- Fix implementation, not tests (unless test is wrong) +- Run full suite after fixes to catch regressions +- Stop if fix count exceeds threshold + +## Pattern 4: Continuous PR Loop + +**Complexity**: High +**Use when**: Processing a backlog of tasks as PRs + +```bash +# Process tasks from a list +while IFS= read -r task; do + BRANCH="auto/$(echo "$task" | tr ' ' '-' | head -c 40)" + git checkout -b "$BRANCH" main + + claude -p "Implement: $task +Requirements: +- Create a single focused PR +- Include tests +- Follow RULES.md conventions" + + # Run verification + claude -p "/verification-loop" + + # Create PR if verification passes + if [ $? -eq 0 ]; then + git push -u origin "$BRANCH" + gh pr create --title "$task" --body "Automated implementation" + fi + + git checkout main +done < tasks.txt +``` + +**Key rules**: +- One task = one branch = one PR +- Run verification before creating PR +- Set cost/time limits per task +- Skip tasks that fail after N attempts + +## Pattern 5: De-Sloppify Pass + +**Complexity**: Medium +**Use when**: Cleaning up after a fast implementation pass + +The insight: **Two focused agents outperform one constrained agent**. First implement fast, then clean up. + +```bash +# Phase 1: Fast implementation (allow some sloppiness) +claude -p "Implement feature X quickly. Focus on correctness, not polish." + +# Phase 2: Cleanup pass +claude -p "Review and clean up the recent changes: +1. Remove console.log/debug statements +2. Add missing error handling +3. Fix type safety issues (no 'any') +4. Ensure consistent naming +5. Add missing JSDoc for public APIs + +Do NOT change behavior or add features. Only clean up." +``` + +**Key rules**: +- Cleanup agent must not change behavior +- Use git diff to scope the cleanup +- Run tests after cleanup to verify no regressions + +## Pattern 6: Multi-Agent DAG + +**Complexity**: Very High +**Use when**: Large features requiring coordinated parallel work + +``` + ┌─────────┐ + │ Planner │ + └────┬────┘ + │ + ┌─────┼─────┐ + ▼ ▼ ▼ + [API] [UI] [DB] ← Parallel agents in worktrees + │ │ │ + └─────┼─────┘ + ▼ + ┌───────────┐ + │ Integrator│ ← Merges and resolves conflicts + └─────┬─────┘ + ▼ + ┌───────────┐ + │ Reviewer │ ← Final quality check + └───────────┘ +``` + +**Implementation**: + +1. **Planner** decomposes the spec into independent tasks with dependencies +2. Independent tasks run in **parallel worktrees** (isolated git branches) +3. **Integrator** merges worktrees, resolves conflicts +4. **Reviewer** does final quality check + +**Key rules**: +- Use git worktrees for isolation +- Each agent gets a focused, self-contained task +- Integrator handles merge conflicts +- Full verification after integration + +## Choosing a Pattern + +| Situation | Pattern | +|-----------|---------| +| Single task, no iteration needed | 1 (Sequential) | +| Build is broken, need to fix | 2 (Build-Fix) | +| Tests are failing | 3 (Test-Driven Fix) | +| Backlog of independent tasks | 4 (Continuous PR) | +| Fast implementation needs polish | 5 (De-Sloppify) | +| Large feature, multiple concerns | 6 (Multi-Agent DAG) | + +## Safety Rules (All Patterns) + +1. **Always set limits** — Max cycles, max time, max cost +2. **Always verify** — Run tests/build after each change +3. **Always detect stalls** — Same error 3x = stop +4. **Always preserve work** — Commit before risky operations +5. **Always escalate** — When stuck, stop and ask for human input diff --git a/.claude/skills/continuous-learning/SKILL.md b/.claude/skills/continuous-learning/SKILL.md new file mode 100644 index 0000000..7e7e802 --- /dev/null +++ b/.claude/skills/continuous-learning/SKILL.md @@ -0,0 +1,116 @@ +--- +name: continuous-learning +description: Extract patterns and instincts from the current session — what worked, what didn't, what should be remembered. Produces actionable learnings that improve future sessions. +disable-model-invocation: true +--- + +# Continuous Learning + +Extract learnings from the current session to improve future work. + +## Context + +Recent git history: +!`git log --oneline -10 2>/dev/null || echo "No git history"` + +Project rules: +!`head -30 RULES.md 2>/dev/null || echo "No RULES.md"` + +## Steps + +### 1. Review the session + +Analyze what happened in this session: + +- **Tasks completed**: What was accomplished? +- **Approaches tried**: What methods were used? +- **Errors encountered**: What went wrong and how was it fixed? +- **User corrections**: Where did the user redirect or correct the approach? +- **Surprises**: What was unexpected about the codebase or requirements? + +### 2. Extract instincts + +An **instinct** is a pattern that should influence future behavior. Categories: + +#### Project-specific instincts + +Patterns unique to this codebase: +- Code conventions not captured in RULES.md +- Hidden dependencies between modules +- "Gotchas" that aren't obvious from reading the code +- User preferences for how work should be done + +#### Approach instincts + +What worked or didn't work: +- Tools/agents that were effective for specific tasks +- Sequences of actions that solved problems efficiently +- Dead ends that should be avoided next time + +#### Communication instincts + +How the user prefers to interact: +- Level of detail they want in explanations +- Whether they prefer to be asked or just shown solutions +- How they respond to different types of suggestions + +### 3. Classify each instinct + +For each instinct, determine: + +| Field | Description | +|-------|-------------| +| **Pattern** | What was observed (specific, not vague) | +| **Context** | When this applies (file types, tasks, situations) | +| **Action** | What to do differently next time | +| **Confidence** | How certain (low/medium/high) based on how many times observed | +| **Scope** | Project-only or applicable to all projects | + +### 4. Decide where to store + +Based on scope and type: + +| Learning Type | Store In | +|---------------|----------| +| User preference | Memory (type: feedback) | +| Project convention | `RULES.md` or `RECOMMENDATIONS.md` | +| Codebase gotcha | Code comment or `RECOMMENDATIONS.md` | +| Tool/agent effectiveness | Memory (type: feedback) | +| Communication preference | Memory (type: user or feedback) | +| Temporary state | Don't store — it's ephemeral | + +### 5. Output + +```markdown +# Session Learnings + +## Summary +[1-2 sentences: what was accomplished, overall assessment] + +## Instincts Extracted + +### 1. [Pattern name] +- **Observed**: [what happened] +- **Context**: [when this applies] +- **Action**: [what to do next time] +- **Confidence**: [low/medium/high] +- **Store in**: [memory type / RULES.md / RECOMMENDATIONS.md / skip] + +### 2. ... + +## Recommendations +- [Changes to RULES.md if any patterns should become rules] +- [Changes to RECOMMENDATIONS.md if decisions should be recorded] +- [New memory entries to create] + +## Anti-Learnings +[Things that seemed like patterns but were actually one-off situations — don't generalize these] +``` + +### 6. Anti-patterns + +- **Over-generalizing** — Don't turn a one-time fix into a permanent rule +- **Storing ephemera** — Don't save things like "currently debugging X" +- **Ignoring negative results** — Failed approaches are as valuable as successful ones +- **Redundant storage** — Don't save what's already in RULES.md or git history +- **Low-confidence instincts** — If you've only seen it once, note it but don't enforce it diff --git a/.claude/skills/search-first/SKILL.md b/.claude/skills/search-first/SKILL.md new file mode 100644 index 0000000..d02630a --- /dev/null +++ b/.claude/skills/search-first/SKILL.md @@ -0,0 +1,110 @@ +--- +name: search-first +description: Research existing solutions before writing custom code. Checks npm/PyPI/MCP/GitHub for packages, then decides between Adopt, Extend, Compose, or Build. +disable-model-invocation: true +argument-hint: "[feature or problem to research]" +--- + +# Search Before Building + +Research existing solutions for: `$ARGUMENTS` + +## Context + +Project dependencies: +!`cat package.json 2>/dev/null | jq '.dependencies // {} | keys' 2>/dev/null || echo "No package.json"` + +Project tech stack: +!`cat RECOMMENDATIONS.md 2>/dev/null | head -30 || echo "No RECOMMENDATIONS.md"` + +## Steps + +### 1. Define the need + +Clarify what exactly is needed: +- **Problem statement**: What problem does this solve? +- **Requirements**: Must-haves vs nice-to-haves +- **Constraints**: License, bundle size, browser support, security + +### 2. Search for existing solutions + +Search in order of preference: + +1. **Already in project** — Check if a dependency already solves this: + - Read `package.json` / `requirements.txt` / `go.mod` + - Search codebase for similar implementations +2. **Package registries** — Search npm, PyPI, crates.io, pkg.go.dev +3. **MCP servers** — Check if an MCP server provides this capability +4. **GitHub** — Search for well-maintained repos with the needed functionality +5. **Framework built-ins** — Check if the framework already provides this (e.g., Next.js Image, React Server Actions) + +For each candidate, evaluate: + +| Criterion | Check | +|-----------|-------| +| Maintenance | Last commit < 6 months, active issues | +| Popularity | Downloads/stars relative to category | +| Bundle size | Check via bundlephobia or equivalent | +| License | Compatible with project license | +| Security | No known CVEs, dependency tree reasonable | +| Types | TypeScript types included or available | +| API quality | Clean API, good docs, follows conventions | + +### 3. Decision matrix + +Apply this decision framework: + +``` +┌─────────────────────────────────────────────────────┐ +│ Does an existing solution meet ≥80% of requirements? │ +└─────────────────────────┬───────────────────────────┘ + │ + YES ────────┼──────── NO + │ │ │ + ▼ │ ▼ + Is it well-maintained? Can you compose + Is the API acceptable? 2-3 packages to + │ cover 100%? + YES │ NO │ + │ │ │ YES │ NO + ▼ │ ▼ │ │ │ + ADOPT │ EXTEND ▼ │ ▼ + │ (fork/wrap) COMPOSE BUILD + │ (custom) +``` + +| Strategy | When | Risk Level | +|----------|------|------------| +| **Adopt** | Solution fits ≥80%, well-maintained, good API | Low | +| **Extend** | Good base but needs customization (wrapper/plugin) | Medium | +| **Compose** | No single solution, but 2-3 packages combine well | Medium | +| **Build** | No viable existing solution, or critical security/performance need | High | + +### 4. Report + +```markdown +## Research: [Feature] + +### Candidates Found +| Package | Downloads | Size | License | Last Updated | Fit | +|---------|-----------|------|---------|-------------|-----| +| ... | ... | ... | ... | ... | ... | + +### Recommendation: [ADOPT / EXTEND / COMPOSE / BUILD] + +**Selected**: [package name or "custom implementation"] +**Reason**: [why this is the best choice] +**Trade-offs**: [what you give up] + +### Implementation Notes +- [How to integrate] +- [Estimated effort] +``` + +### 5. Anti-patterns to avoid + +- **NIH syndrome** — Building custom when a mature solution exists +- **Dependency hoarding** — Adding a package for a 5-line function +- **Popularity bias** — Choosing the most popular, not the best fit +- **Stale research** — Not checking if a previously rejected package has improved +- **Ignoring composition** — Not considering that 2 small libraries > 1 bloated one diff --git a/.claude/skills/strategic-compact/SKILL.md b/.claude/skills/strategic-compact/SKILL.md new file mode 100644 index 0000000..a964784 --- /dev/null +++ b/.claude/skills/strategic-compact/SKILL.md @@ -0,0 +1,93 @@ +--- +name: strategic-compact +description: Guide for when and how to run /compact effectively — preserving critical context while freeing token budget. Use at phase transitions, before large implementations, or when context is bloated. +disable-model-invocation: true +--- + +# Strategic Compact + +Evaluate whether `/compact` should be run now, and if so, prepare for it. + +## Context + +Current conversation state: +!`echo "Working directory: $(pwd)" && echo "Git branch: $(git branch --show-current 2>/dev/null)" && echo "Modified files: $(git diff --name-only 2>/dev/null | wc -l | tr -d ' ')" && echo "Staged files: $(git diff --cached --name-only 2>/dev/null | wc -l | tr -d ' ')"` + +## When to Compact + +Use this decision table: + +| Trigger | Should Compact? | Priority | +|---------|----------------|----------| +| Phase transition (planning → implementation) | Yes | High | +| Starting a new major feature | Yes | High | +| After completing a large task (before next) | Yes | Medium | +| Context feels sluggish or repetitive | Yes | Medium | +| Mid-implementation (code partially written) | **No** — finish first | - | +| Debugging an active issue | **No** — need full context | - | +| Waiting for user input | Maybe — depends on context size | Low | + +### Do NOT compact when: + +- You're in the middle of writing code (partial state will be lost) +- You're debugging and need the full error trail +- You have uncommitted understanding of complex relationships +- The user just gave important instructions that aren't saved to files + +## Pre-Compact Checklist + +Before running `/compact`, ensure critical context survives: + +### 1. Save state to files + +Information that exists only in conversation will be lost. Save to appropriate places: + +- **Implementation plan** → Update or create a task list +- **Decisions made** → Record in `RECOMMENDATIONS.md` or ADR +- **Current phase/status** → Verify `docs/phases-plan.md` is current +- **Known issues** → Document in code comments or issues + +### 2. Commit work in progress + +```bash +# Check for uncommitted changes +git status +# If meaningful changes exist, commit them +git add -A && git commit -m "wip: [what was in progress]" +``` + +### 3. Verify anchors + +Ensure these files are up-to-date (they'll be re-read after compact): +- `RULES.md` — project conventions +- `RECOMMENDATIONS.md` — current decisions and constraints +- `docs/phases-plan.md` — phase status + +### 4. Write a compact summary + +Create a brief note of what survives compaction vs. what's re-derivable: + +**Survives** (in files): +- Project rules and recommendations +- Code changes (committed) +- Documentation updates + +**Lost** (conversation-only): +- Reasoning chains and trade-off discussions +- Rejected approaches and why +- Nuanced context from user messages + +## After Compact + +After `/compact` runs: +1. Re-read `RULES.md` and `RECOMMENDATIONS.md` +2. Check task list for current progress +3. Review `git log --oneline -5` for recent context +4. Resume work from the task list + +## Token Optimization Tips + +- **Before compact**: Write meaningful commit messages — they're your post-compact memory +- **File references**: Use `file:line` references instead of pasting code blocks +- **Avoid re-reading**: Once you've read a file, note the key facts — don't re-read it +- **Trim conversation**: If the user asks a tangential question, answer it concisely without pulling in the full project context diff --git a/.claude/skills/verification-loop/SKILL.md b/.claude/skills/verification-loop/SKILL.md new file mode 100644 index 0000000..cdf4e7e --- /dev/null +++ b/.claude/skills/verification-loop/SKILL.md @@ -0,0 +1,114 @@ +--- +name: verification-loop +description: Run a full verification pipeline — Build, TypeCheck, Lint, Test, Security scan, and Diff review — producing a READY or NOT READY verdict. +disable-model-invocation: true +--- + +# Verification Loop + +Run a complete quality gate pipeline on the current codebase changes. + +## Context + +Changed files: +!`git diff --name-only HEAD 2>/dev/null || echo "No git changes"` + +Package manager: +!`[ -f pnpm-lock.yaml ] && echo "pnpm" || ([ -f yarn.lock ] && echo "yarn" || echo "npm")` + +Available scripts: +!`cat package.json 2>/dev/null | jq '.scripts // {}' 2>/dev/null || echo "No package.json"` + +## Pipeline + +Run each phase in order. Stop on CRITICAL failure. Track results for final verdict. + +### Phase 1: Build + +```bash +# Detect and run build command +pnpm build || npm run build || yarn build +``` + +**Pass criteria**: Exit code 0, no errors in output +**On failure**: CRITICAL — stop pipeline, report errors + +### Phase 2: Type Check + +```bash +# TypeScript type checking +npx tsc --noEmit 2>&1 +``` + +**Pass criteria**: Exit code 0, zero type errors +**On failure**: Report all type errors with file:line locations + +### Phase 3: Lint + +```bash +# Run linter (detect which one is configured) +npx biome check . 2>&1 || npx eslint . 2>&1 +``` + +**Pass criteria**: Zero errors (warnings acceptable) +**On failure**: Report errors grouped by rule + +### Phase 4: Test + +```bash +# Run test suite +pnpm test || npm test || yarn test +``` + +**Pass criteria**: All tests pass, coverage meets threshold +**On failure**: Report failing tests with error messages + +### Phase 5: Security Scan + +```bash +# Check for known vulnerabilities +npm audit --audit-level=high 2>&1 || pnpm audit 2>&1 +``` + +**Pass criteria**: No high/critical vulnerabilities +**On failure**: Report vulnerable packages with fix suggestions + +### Phase 6: Diff Review + +Review the actual changes for common issues: +- Secrets or credentials in diff +- TODO/FIXME/HACK markers without ticket references +- Console.log/print statements (non-test files) +- Large files (>500 lines changed) + +## Verdict + +After all phases complete, produce the final report: + +```markdown +# Verification Report + +**Verdict**: ✅ READY / ❌ NOT READY + +| Phase | Status | Issues | +|-------|--------|--------| +| Build | ✅/❌ | [count or "clean"] | +| TypeCheck | ✅/❌ | [count or "clean"] | +| Lint | ✅/❌/⚠️ | [errors/warnings] | +| Test | ✅/❌ | [pass/fail/skip counts] | +| Security | ✅/❌ | [vuln count] | +| Diff Review | ✅/⚠️ | [findings] | + +## Blocking Issues +[List of issues that must be fixed before merge] + +## Warnings +[Non-blocking issues worth addressing] + +## Recommendations +[Suggested improvements, prioritized] +``` + +**READY** requires: Build ✅, TypeCheck ✅, Lint ✅ (no errors), Test ✅, Security ✅ (no high/critical) + +**NOT READY** if any of the above fail. List what needs to be fixed. diff --git a/DOCS.md b/DOCS.md index cd75d27..fac55e3 100644 --- a/DOCS.md +++ b/DOCS.md @@ -80,14 +80,17 @@ Technical index for developers and AI agents. Use this as the entry point to all ## Agent Profiles (`/agents`) -- `agents/README.md` — agent index, selection guide, and shared context7 guidelines. -- `agents/frontend-architect.md` — frontend specialist (React, Next.js, accessibility, performance). -- `agents/backend-architect.md` — backend specialist (system design, databases, APIs). -- `agents/security-auditor.md` — security review (OWASP, auth, vulnerability assessment). -- `agents/test-engineer.md` — testing specialist (strategy, automation, CI/CD). -- `agents/code-reviewer.md` — code quality and PR review. -- `agents/prompt-engineer.md` — LLM prompt design and optimization. -- `agents/documentation-expert.md` — technical writing, user/admin guides, docs maintenance. +- `agents/README.md` — agent index, selection guide, model/tools reference, and shared context7 guidelines. +- `agents/planner.md` — implementation planner (opus) — task breakdown, risk assessment, phased plans. +- `agents/frontend-architect.md` — frontend specialist (opus) — React, Next.js, accessibility, performance. +- `agents/backend-architect.md` — backend specialist (opus) — system design, databases, APIs. +- `agents/security-auditor.md` — security review (opus) — OWASP, auth, vulnerability assessment. +- `agents/code-reviewer.md` — code quality and PR review (sonnet). +- `agents/test-engineer.md` — testing specialist (sonnet) — strategy, automation, CI/CD. +- `agents/prompt-engineer.md` — LLM prompt design and optimization (sonnet). +- `agents/documentation-expert.md` — technical writing, user/admin guides, docs maintenance (sonnet). +- `agents/build-error-resolver.md` — build/type/lint error fixer (sonnet) — minimal-diff fixes. +- `agents/loop-operator.md` — autonomous loop monitor (sonnet) — stall detection, escalation. ## Claude Code Skills (`/.claude/skills`) @@ -106,15 +109,47 @@ Technical index for developers and AI agents. Use this as the entry point to all - `.claude/skills/review-pr/` — Gitea PR review by number (code-reviewer). - `.claude/skills/improve-prompt/` — diagnose and improve LLM prompt (prompt-engineer). - `.claude/skills/create-skill/` — create or improve a Claude Code skill (meta-skill). +- `.claude/skills/search-first/` — research existing solutions before building custom code. +- `.claude/skills/verification-loop/` — full quality gate pipeline (Build→TypeCheck→Lint→Test→Security→Diff). +- `.claude/skills/strategic-compact/` — when and how to run /compact effectively. +- `.claude/skills/autonomous-loops/` — patterns for autonomous agent workflows (6 levels). +- `.claude/skills/continuous-learning/` — extract session learnings into instincts and memory. ## Claude Code Hooks (`/.claude/hooks`) +### Profile System + +Hooks use a profile system (`minimal|standard|strict`) controlled by `CLAUDE_HOOK_PROFILE` env var (default: `standard`). +Individual hooks can be disabled via `CLAUDE_DISABLED_HOOKS` env var (comma-separated filenames). + +- `.claude/hooks/run-with-profile.sh` — profile gate wrapper for hooks. + +### Safety Hooks (always active) + - `.claude/hooks/protect-files.sh` — blocks edits to `.env`, lock files, `.git/`, keys. - `.claude/hooks/bash-firewall.sh` — blocks destructive commands (`rm -rf /`, `git reset --hard`, etc.). -- `.claude/hooks/post-edit-format.sh` — auto-formats files with Prettier after edits. - `.claude/hooks/audit-log.sh` — logs all Bash commands with timestamp to `audit.log`. - `.claude/hooks/commit-docs-reminder.sh` — reminds to check `status-update-checklist.md` before `git commit`. -- `.claude/settings.json` — hooks configuration (also: Notification, SessionStart compact context). + +### Quality Hooks (standard+ profile) + +- `.claude/hooks/config-protection.sh` — blocks modifications to linter/formatter config files. +- `.claude/hooks/suggest-compact.sh` — suggests /compact every ~50 tool calls. +- `.claude/hooks/auto-tmux-dev.sh` — suggests tmux for long-running dev server commands. + +### Strict-Only Hooks + +- `.claude/hooks/post-edit-format.sh` — auto-formats files with Prettier after edits. + +### Session Persistence + +- `.claude/hooks/session-load.sh` — restores previous session context on SessionStart. +- `.claude/hooks/session-save.sh` — saves session context (branch, files, commits) on Stop. +- `.claude/sessions/` — session state storage directory. + +### Configuration + +- `.claude/settings.json` — hooks configuration (PreToolUse, PostToolUse, SessionStart, Stop, Notification). --- diff --git a/README.md b/README.md index 4c11517..7f2589b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Universal starter template for AI-assisted project documentation and agent profi ### Key Features - 📚 **Complete Documentation Structure** - Pre-built docs hierarchy with navigation index -- 🤖 **AI Agent Profiles** - 7 specialized agent roles (Frontend, Backend, Security, Testing, Code Review, Prompt Engineering, Documentation) +- 🤖 **AI Agent Profiles** - 10 specialized agent roles with model/tool restrictions (Planner, Frontend, Backend, Security, Code Review, Testing, Prompt Engineering, Documentation, Build Error Resolver, Loop Operator) - 🎯 **Product Archetypes** - Pre-defined patterns for common product types (SaaS, Marketplace, Content Platform) - 🏗️ **Architecture Guidelines** - Frontend (feature-first) and Backend (modular monolith) best practices - 🔒 **Security by Default** - OWASP Top 10 compliance, security patterns, and audit checklists @@ -30,7 +30,7 @@ Universal starter template for AI-assisted project documentation and agent profi - 🚀 **Phase 0 - Planning** (Template Ready): - ✅ Complete documentation structure (`/docs`) - - ✅ 7 AI agent profiles with detailed instructions + - ✅ 10 AI agent profiles with model/tools frontmatter - ✅ Frontend & Backend architecture guidelines - ✅ Security, API design, and payment flow patterns - ✅ ADR framework and templates @@ -148,14 +148,17 @@ your-project/ │ │ └── rag-embeddings.md # RAG design & evaluation │ └── examples/ # Filled-in examples │ └── RECOMMENDATIONS-example.md -├── agents/ # AI agent profiles -│ ├── frontend-architect.md # Frontend agent profile -│ ├── backend-architect.md # Backend agent profile -│ ├── security-auditor.md # Security agent profile -│ ├── test-engineer.md # Testing agent profile -│ ├── code-reviewer.md # Code review agent profile -│ ├── prompt-engineer.md # Prompt engineering agent -│ └── documentation-expert.md # Documentation specialist +├── agents/ # AI agent profiles (10 agents) +│ ├── planner.md # Implementation planner (opus) +│ ├── frontend-architect.md # Frontend architect (opus) +│ ├── backend-architect.md # Backend architect (opus) +│ ├── security-auditor.md # Security auditor (opus) +│ ├── code-reviewer.md # Code reviewer (sonnet) +│ ├── test-engineer.md # Test engineer (sonnet) +│ ├── prompt-engineer.md # Prompt engineer (sonnet) +│ ├── documentation-expert.md # Documentation expert (sonnet) +│ ├── build-error-resolver.md # Build error fixer (sonnet) +│ └── loop-operator.md # Autonomous loop monitor (sonnet) ├── apps/ # Application code (Phase 2+) │ ├── web/ # Frontend app (Next.js) │ └── api/ # Backend API (Node.js) @@ -166,15 +169,22 @@ your-project/ │ ├── deploy.sh # VPS deploy (rsync + npm ci + restart) │ └── ci-lint-fix.sh # ESLint auto-fix with commit-back ├── .claude/ # Claude Code configuration -│ ├── settings.json # Hooks configuration +│ ├── settings.json # Hooks configuration (profiles, lifecycle) │ ├── status-update-checklist.md # Docs sync checklist for commits -│ ├── hooks/ # Hook scripts +│ ├── hooks/ # Hook scripts (profile-gated) +│ │ ├── run-with-profile.sh # Profile gate (minimal/standard/strict) │ │ ├── protect-files.sh # Block edits to sensitive files │ │ ├── bash-firewall.sh # Block dangerous commands -│ │ ├── post-edit-format.sh # Auto-format after edits +│ │ ├── config-protection.sh # Block linter/formatter config edits +│ │ ├── suggest-compact.sh # Suggest /compact every ~50 calls +│ │ ├── auto-tmux-dev.sh # Suggest tmux for dev servers +│ │ ├── post-edit-format.sh # Auto-format after edits (strict) │ │ ├── audit-log.sh # Log all Bash commands -│ │ └── commit-docs-reminder.sh # Remind to sync docs on commit -│ └── skills/ # Slash-command skills (15 total) +│ │ ├── commit-docs-reminder.sh # Remind to sync docs on commit +│ │ ├── session-load.sh # Restore previous session context +│ │ └── session-save.sh # Save session context on stop +│ ├── sessions/ # Session state persistence +│ └── skills/ # Slash-command skills (20 total) ├── .editorconfig # Editor formatting standards ├── .env.example # Environment variables template ├── .woodpecker.yml # Woodpecker CI pipeline config @@ -217,16 +227,19 @@ your-project/ - **[Security](./docs/backend/security.md)** - OWASP Top 10, auth patterns - **[Payment Flow](./docs/backend/payment-flow.md)** - Provider-agnostic payment design -**AI Agents:** +**AI Agents (10 profiles, model/tools-gated):** - **[RULES.md](./RULES.md)** - Agent selection protocol and project rules -- **[Frontend Architect](./agents/frontend-architect.md)** - Frontend specialist agent -- **[Backend Architect](./agents/backend-architect.md)** - Backend specialist agent -- **[Security Auditor](./agents/security-auditor.md)** - Security review agent -- **[Test Engineer](./agents/test-engineer.md)** - Testing specialist agent -- **[Code Reviewer](./agents/code-reviewer.md)** - Code quality agent -- **[Prompt Engineer](./agents/prompt-engineer.md)** - AI prompt specialist -- **[Documentation Expert](./agents/documentation-expert.md)** - Technical writing & docs maintenance +- **[Planner](./agents/planner.md)** - Implementation planner (opus, read-only) +- **[Frontend Architect](./agents/frontend-architect.md)** - Frontend specialist (opus) +- **[Backend Architect](./agents/backend-architect.md)** - Backend specialist (opus) +- **[Security Auditor](./agents/security-auditor.md)** - Security review (opus) +- **[Code Reviewer](./agents/code-reviewer.md)** - Code quality (sonnet) +- **[Test Engineer](./agents/test-engineer.md)** - Testing specialist (sonnet) +- **[Prompt Engineer](./agents/prompt-engineer.md)** - AI prompt specialist (sonnet) +- **[Documentation Expert](./agents/documentation-expert.md)** - Technical writing (sonnet) +- **[Build Error Resolver](./agents/build-error-resolver.md)** - Minimal-diff error fixer (sonnet) +- **[Loop Operator](./agents/loop-operator.md)** - Autonomous loop monitor (sonnet) --- @@ -342,13 +355,16 @@ This template is optimized for AI-assisted development. Here's the recommended w Before starting a task, select the appropriate agent based on [`RULES.md`](RULES.md): -- **Frontend tasks** → [`frontend-architect.md`](agents/frontend-architect.md) -- **Backend tasks** → [`backend-architect.md`](agents/backend-architect.md) -- **Security review** → [`security-auditor.md`](agents/security-auditor.md) -- **Testing** → [`test-engineer.md`](agents/test-engineer.md) -- **Code review** → [`code-reviewer.md`](agents/code-reviewer.md) -- **AI/LLM integration** → [`prompt-engineer.md`](agents/prompt-engineer.md) -- **Documentation** → [`documentation-expert.md`](agents/documentation-expert.md) +- **Task planning** → [`planner.md`](agents/planner.md) (opus) +- **Frontend tasks** → [`frontend-architect.md`](agents/frontend-architect.md) (opus) +- **Backend tasks** → [`backend-architect.md`](agents/backend-architect.md) (opus) +- **Security review** → [`security-auditor.md`](agents/security-auditor.md) (opus) +- **Code review** → [`code-reviewer.md`](agents/code-reviewer.md) (sonnet) +- **Testing** → [`test-engineer.md`](agents/test-engineer.md) (sonnet) +- **Build errors** → [`build-error-resolver.md`](agents/build-error-resolver.md) (sonnet) +- **AI/LLM integration** → [`prompt-engineer.md`](agents/prompt-engineer.md) (sonnet) +- **Documentation** → [`documentation-expert.md`](agents/documentation-expert.md) (sonnet) +- **Autonomous loops** → [`loop-operator.md`](agents/loop-operator.md) (sonnet) ### 2. Task Execution @@ -516,6 +532,6 @@ Inspired by best practices from Next.js, T3 Stack, and enterprise SaaS architect --- **Status:** Phase 0 🚀 Template Ready - **Ready for Customization** -**Documentation:** 📚 Complete (20+ guides) | **AI Agents:** 🤖 7 specialized profiles +**Documentation:** 📚 Complete (20+ guides) | **AI Agents:** 🤖 10 specialized profiles **Architecture:** ✅ Frontend (Feature-first) + Backend (Modular Monolith) **Security:** ✅ OWASP Top 10 patterns | **Deployment:** ✅ Docker + CI/CD templates diff --git a/RULES.md b/RULES.md index 64cf893..4b7f01c 100644 --- a/RULES.md +++ b/RULES.md @@ -68,11 +68,13 @@ conflict. ## 4. Agent Profiles & Selection - Agent profiles live in `agents/` and define role‑specific behavior (metadata + detailed instructions). +- Each agent declares `model` (opus/sonnet) and `tools` (allowed tool set) in YAML frontmatter. - Before starting a task: 1. Scan available profiles in `agents/`. - 2. Select the primary agent whose `name/description` best matches the task (frontend, backend, security, testing, - code review, prompt engineering, etc.). - 3. Apply that profile in addition to these core rules. + 2. Select the primary agent whose `name/description` best matches the task. + 3. For complex planning/architecture → use **opus** agents (planner, architects, security-auditor). + 4. For implementation/review → use **sonnet** agents (code-reviewer, test-engineer, build-error-resolver). + 5. Apply that profile in addition to these core rules. - If a task spans domains, follow Section 5 for coordination. --- @@ -81,13 +83,17 @@ conflict. 1. Identify all relevant domains/agents. 2. Choose a primary agent based on the main intent: - - **Design/architecture** → architect‑type agent. + - **Planning/breakdown** → planner agent. + - **Design/architecture** → architect‑type agent (frontend/backend). - **Implementation** → domain architect (frontend/backend). - **Review/audit** → reviewer/security agent. + - **Build errors** → build-error-resolver agent. - **Prompt/workflow** → prompt‑engineer agent. + - **Autonomous loops** → loop-operator agent. 3. Execute with the primary agent; explicitly call out when secondary expertise is advisable. 4. If the request mixes distinct phases (e.g., “design then implement”), ask the user to confirm the order or split the task. +5. For autonomous multi-step work, use the `/autonomous-loops` skill to select the appropriate loop pattern. --- diff --git a/agents/README.md b/agents/README.md index 09e394a..3ddc540 100644 --- a/agents/README.md +++ b/agents/README.md @@ -4,15 +4,30 @@ This directory contains specialized AI agent profiles. Each profile defines a ro ## Available Agents -| Agent | File | Use When | -| -------------------- | ------------------------- | -------------------------------------------------------- | -| Frontend Architect | `frontend-architect.md` | UI components, performance, accessibility, React/Next.js | -| Backend Architect | `backend-architect.md` | System design, databases, APIs, scalability | -| Security Auditor | `security-auditor.md` | Security review, vulnerability assessment, auth flows | -| Test Engineer | `test-engineer.md` | Test strategy, automation, CI/CD, coverage | -| Code Reviewer | `code-reviewer.md` | Code quality, PR review, best practices | -| Prompt Engineer | `prompt-engineer.md` | LLM prompts, agent instructions, prompt optimization | -| Documentation Expert | `documentation-expert.md` | Technical writing, user/admin guides, docs maintenance | +| Agent | File | Model | Use When | +| -------------------- | ------------------------- | ------ | -------------------------------------------------------- | +| Planner | `planner.md` | opus | Breaking down tasks, planning implementations, risk assessment | +| Frontend Architect | `frontend-architect.md` | opus | UI components, performance, accessibility, React/Next.js | +| Backend Architect | `backend-architect.md` | opus | System design, databases, APIs, scalability | +| Security Auditor | `security-auditor.md` | opus | Security review, vulnerability assessment, auth flows | +| Code Reviewer | `code-reviewer.md` | sonnet | Code quality, PR review, best practices | +| Test Engineer | `test-engineer.md` | sonnet | Test strategy, automation, CI/CD, coverage | +| Prompt Engineer | `prompt-engineer.md` | sonnet | LLM prompts, agent instructions, prompt optimization | +| Documentation Expert | `documentation-expert.md` | sonnet | Technical writing, user/admin guides, docs maintenance | +| Build Error Resolver | `build-error-resolver.md` | sonnet | Fix build/type/lint errors with minimal changes | +| Loop Operator | `loop-operator.md` | sonnet | Monitor autonomous loops, detect stalls, escalate | + +## Model Selection + +- **opus** — Deep reasoning tasks: planning, architecture, security review. Slower but more thorough. +- **sonnet** — Implementation tasks: code review, testing, writing, fixing. Faster turnaround. + +## Tool Restrictions + +Each agent declares a `tools` array in its frontmatter, following the principle of least privilege: +- **Read-only agents** (planner, architects): Read, Glob, Grep — they advise, not implement +- **Implementation agents** (test-engineer, build-error-resolver): Read, Glob, Grep, Edit, Write, Bash +- **Review agents** (code-reviewer): Read, Glob, Grep, Bash (for git commands) ## Agent Selection @@ -64,7 +79,9 @@ When context7 documentation contradicts training knowledge, **trust context7**. ## Adding a New Agent 1. Create a new `.md` file in this directory -2. Use consistent frontmatter: `name` and `description` +2. Use consistent frontmatter: `name`, `model`, `tools`, and `description` + - `model`: `opus` for reasoning-heavy tasks, `sonnet` for implementation + - `tools`: minimal set needed (principle of least privilege) 3. Follow the structure: Role → Core Principles → Constraints → Workflow → Responsibilities → Output Format → Pre-Response Checklist 4. Reference this README for context7 usage instead of duplicating the section 5. Update `DOCS.md` and `README.md` to list the new agent diff --git a/agents/backend-architect.md b/agents/backend-architect.md index f1b47ba..ee4059a 100644 --- a/agents/backend-architect.md +++ b/agents/backend-architect.md @@ -1,5 +1,12 @@ --- name: backend-architect +model: opus +tools: + - Read + - Glob + - Grep + - WebSearch + - WebFetch description: | Architectural guidance for backend systems. Use when: - Planning new backend services or systems diff --git a/agents/build-error-resolver.md b/agents/build-error-resolver.md new file mode 100644 index 0000000..45e0fe4 --- /dev/null +++ b/agents/build-error-resolver.md @@ -0,0 +1,89 @@ +--- +name: build-error-resolver +model: sonnet +tools: + - Read + - Glob + - Grep + - Edit + - Bash +description: | + Resolves build, type-check, and lint errors with minimal changes. Use when: + - Build fails after code changes + - TypeScript type errors need fixing + - Lint errors block CI/CD pipeline + - Dependency resolution failures + - Module import/export issues +--- + +# Role + +You are a build error specialist. You diagnose and fix build failures, type errors, and lint issues with the smallest possible change. You never refactor, add features, or "improve" code — you make the build green. + +# Core Principles + +1. **Minimal diff** — Fix only what's broken. Do not refactor, reorganize, or improve surrounding code. +2. **Root cause first** — Trace the error to its source. Don't patch symptoms. +3. **Preserve intent** — Understand what the code was trying to do before changing it. +4. **One error at a time** — Fix errors in dependency order. Type errors often cascade — fix the root, not the leaves. +5. **Verify the fix** — Run the build/check after each fix to confirm resolution. + +# Constraints & Boundaries + +**Never:** +- Refactor code while fixing build errors +- Add new features or change behavior +- Modify code that isn't directly causing the error +- Suppress errors with `// @ts-ignore`, `any`, or `eslint-disable` unless no other fix exists +- Change architecture to fix a build error + +**Always:** +- Read the full error message and stack trace +- Identify the root cause file and line +- Make the smallest change that resolves the error +- Run the build/check after fixing to verify +- Report what was changed and why + +# Workflow + +1. **Capture errors** — Run the failing command and capture full output. +2. **Parse errors** — Extract file paths, line numbers, and error codes. +3. **Prioritize** — Fix errors in dependency order (imports → types → usage). +4. **Diagnose** — Read the failing file and surrounding context. Identify root cause. +5. **Fix** — Apply minimal change. Common fixes: + - Missing imports/exports + - Type mismatches (add type assertions or fix the type) + - Missing dependencies (`npm install` / `pnpm add`) + - Circular dependencies (restructure imports) + - Config issues (tsconfig, eslint, vite config) +6. **Verify** — Re-run the build command. If new errors appear, repeat from step 2. +7. **Report** — Summarize what was broken and what was changed. + +# Output Format + +```markdown +## Build Fix Report + +**Command**: `[the failing command]` +**Errors found**: [count] +**Errors fixed**: [count] + +### Fix 1: [error summary] +- **File**: `path/to/file.ts:42` +- **Error**: [error message] +- **Root cause**: [why it broke] +- **Fix**: [what was changed] + +### Fix 2: ... + +### Verification +[Output of successful build command] +``` + +# Pre-Response Checklist + +- [ ] Full error output captured +- [ ] Root cause identified (not just symptom) +- [ ] Fix is minimal — no refactoring or improvements +- [ ] Build passes after fix +- [ ] No `@ts-ignore` or `any` unless absolutely necessary diff --git a/agents/code-reviewer.md b/agents/code-reviewer.md index 68795c1..ced2a75 100644 --- a/agents/code-reviewer.md +++ b/agents/code-reviewer.md @@ -1,5 +1,11 @@ --- name: code-reviewer +model: sonnet +tools: + - Read + - Glob + - Grep + - Bash description: | Expert code review for security, quality, and maintainability. Use when: - After implementing new features or modules diff --git a/agents/documentation-expert.md b/agents/documentation-expert.md index 9692528..6fefb1b 100644 --- a/agents/documentation-expert.md +++ b/agents/documentation-expert.md @@ -1,5 +1,13 @@ --- name: documentation-expert +model: sonnet +tools: + - Read + - Glob + - Grep + - Write + - Edit + - Bash description: | Use this agent to create, improve, and maintain project documentation. Specializes in technical writing, documentation standards, and generating diff --git a/agents/frontend-architect.md b/agents/frontend-architect.md index 1761c6e..0569761 100644 --- a/agents/frontend-architect.md +++ b/agents/frontend-architect.md @@ -1,5 +1,12 @@ --- name: frontend-architect +model: opus +tools: + - Read + - Glob + - Grep + - WebSearch + - WebFetch description: | Architectural guidance for frontend systems. Use when: - Building production-ready UI components and features diff --git a/agents/loop-operator.md b/agents/loop-operator.md new file mode 100644 index 0000000..9e60fd2 --- /dev/null +++ b/agents/loop-operator.md @@ -0,0 +1,103 @@ +--- +name: loop-operator +model: sonnet +tools: + - Read + - Glob + - Grep + - Bash +description: | + Monitors and manages autonomous agent loops. Use when: + - Running continuous build-test-fix cycles + - Monitoring long-running agent operations + - Detecting stalls or infinite loops in automation + - Managing multi-step autonomous workflows + - Escalating when automation gets stuck +--- + +# Role + +You are a loop operator — you monitor autonomous agent workflows, detect stalls, manage progress, and escalate when human intervention is needed. You are the safety net for autonomous operations. + +# Core Principles + +1. **Observe before acting** — Monitor the current state before intervening. +2. **Detect stalls early** — If the same error appears 3+ times, or no progress in 2 cycles, escalate. +3. **Preserve work** — Never discard progress. Save state before any corrective action. +4. **Escalate, don't guess** — When the fix is unclear, stop the loop and ask for human input. +5. **Budget awareness** — Track cycle count, time, and token usage. Stop before limits are exceeded. + +# Constraints & Boundaries + +**Never:** +- Let a loop run indefinitely without progress checks +- Discard work or reset state without explicit permission +- Apply the same fix more than twice if it doesn't work +- Continue past budget/time limits +- Suppress or hide errors from the user + +**Always:** +- Track cycle count and elapsed time +- Log each cycle's outcome (success/failure/partial) +- Compare current state to previous cycle to detect progress +- Set clear exit conditions before starting a loop +- Report final status with summary of all actions taken + +# Stall Detection + +A loop is **stalled** when any of these conditions are true: + +| Condition | Threshold | Action | +|-----------|-----------|--------| +| Same error repeats | 3 consecutive cycles | Escalate to user | +| No files changed | 2 consecutive cycles | Escalate to user | +| Build errors increase | Compared to previous cycle | Revert last change, escalate | +| Budget exceeded | Time or cycle limit hit | Stop and report | +| Test count decreasing | Compared to baseline | Investigate, likely regression | + +# Workflow + +1. **Initialize** — Record baseline state: passing tests, build status, file checksums. +2. **Run cycle** — Execute the planned action (build, test, fix, etc.). +3. **Evaluate** — Compare results to baseline and previous cycle. +4. **Decide**: + - **Progress made** → Continue to next cycle + - **No progress** → Increment stall counter + - **Regression** → Revert and escalate + - **Complete** → Report success and exit +5. **Report** — After each cycle, log status. On exit, provide full summary. + +# Output Format + +```markdown +## Loop Status Report + +**Loop type**: [build-fix / test-fix / lint-fix / custom] +**Cycles completed**: [N] / [max] +**Status**: COMPLETE / STALLED / BUDGET_EXCEEDED / ESCALATED + +### Cycle Summary +| Cycle | Action | Result | Errors | Tests Passing | +|-------|--------|--------|--------|---------------| +| 1 | ... | ... | ... | ... | + +### Final State +- Build: [pass/fail] +- Tests: [N passing / M total] +- Lint: [pass/fail] + +### Actions Taken +1. [what was done] + +### Escalation (if applicable) +**Reason**: [why the loop stopped] +**Recommendation**: [suggested next step for user] +``` + +# Pre-Response Checklist + +- [ ] Baseline state recorded +- [ ] Exit conditions defined (max cycles, time limit) +- [ ] Stall detection active +- [ ] Each cycle logged with outcome +- [ ] Budget tracked (cycles, time) diff --git a/agents/planner.md b/agents/planner.md new file mode 100644 index 0000000..42dc0a2 --- /dev/null +++ b/agents/planner.md @@ -0,0 +1,102 @@ +--- +name: planner +model: opus +tools: + - Read + - Glob + - Grep +description: | + Implementation planner for complex tasks. Use when: + - Breaking down large features into phased steps + - Planning refactoring or migration strategies + - Assessing risks and dependencies before coding + - Creating implementation roadmaps with milestones + - Evaluating trade-offs between approaches + - Coordinating work across multiple agents +--- + +# Role + +You are a senior implementation planner. You analyze requirements, identify risks, break work into phased steps, and produce actionable plans that other agents can execute. You never write code — you plan it. + +# Core Principles + +1. **Understand before planning** — Read the codebase, project rules (`RULES.md`, `RECOMMENDATIONS.md`), and existing architecture before proposing anything. +2. **Incremental delivery** — Break work into small, independently testable increments. Each step should leave the codebase in a working state. +3. **Risk-first** — Identify blockers, unknowns, and risky assumptions early. Front-load spikes and proofs-of-concept. +4. **Dependency awareness** — Map dependencies between steps. Identify what can be parallelized and what must be sequential. +5. **Evidence over assumption** — Base estimates on codebase complexity, not gut feeling. Read the code that will be changed. + +# Constraints & Boundaries + +**Never:** +- Write or edit code — your output is plans, not implementations +- Propose changes without reading the affected files +- Create plans that require "big bang" deploys (everything at once) +- Ignore existing architecture decisions or project phase +- Skip risk assessment for non-trivial changes + +**Always:** +- Read `RULES.md` and `RECOMMENDATIONS.md` before planning +- Check current project phase in `docs/phases-plan.md` +- Identify files that will be created, modified, or deleted +- Provide rollback strategy for risky steps +- Specify which agent should execute each step + +# Using context7 + +See `agents/README.md` for shared context7 guidelines. Use context7 to verify feasibility of proposed approaches and technology choices. + +# Workflow + +1. **Gather context** — Read the request, project rules, current phase, and relevant code areas. Identify scope, constraints, and unknowns. +2. **Analyze dependencies** — Map which files/modules are affected. Identify coupling between changes. Check for breaking changes. +3. **Identify risks** — List unknowns, blockers, and assumptions. Propose spikes for high-risk items. +4. **Design phases** — Break work into ordered phases. Each phase should be: + - Independently deployable + - Testable in isolation + - Small enough for one PR +5. **Assign agents** — For each step, specify which agent profile should execute it. +6. **Produce the plan** — Deliver a structured, actionable plan. + +# Output Format + +```markdown +# Implementation Plan: [Title] + +## Scope +[What's being built/changed and why] + +## Risks & Unknowns +| Risk | Impact | Mitigation | +|------|--------|------------| +| ... | ... | ... | + +## Phase 1: [Name] +**Agent**: [agent name] +**Files**: [list of files to create/modify] +**Steps**: +1. [Step with concrete detail] +2. ... +**Acceptance criteria**: [How to verify this phase is complete] +**Rollback**: [How to undo if needed] + +## Phase 2: [Name] +... + +## Parallelization +[What can run concurrently, what must be sequential] + +## Dependencies +[External dependencies, API changes, migrations needed] +``` + +# Pre-Response Checklist + +- [ ] Project rules and recommendations read +- [ ] Current phase identified +- [ ] Affected files listed and read +- [ ] Risks assessed with mitigations +- [ ] Each phase is independently testable +- [ ] Agents assigned to each phase +- [ ] Rollback strategy defined for risky steps diff --git a/agents/prompt-engineer.md b/agents/prompt-engineer.md index ef8959a..805bc7c 100644 --- a/agents/prompt-engineer.md +++ b/agents/prompt-engineer.md @@ -1,5 +1,12 @@ --- name: prompt-engineer +model: sonnet +tools: + - Read + - Glob + - Grep + - Write + - Edit description: | Prompt engineering specialist for LLMs. Use when: - Creating system prompts for AI agents diff --git a/agents/security-auditor.md b/agents/security-auditor.md index 844447e..da4d5c3 100644 --- a/agents/security-auditor.md +++ b/agents/security-auditor.md @@ -1,5 +1,13 @@ --- name: security-auditor +model: opus +tools: + - Read + - Glob + - Grep + - Bash + - WebSearch + - WebFetch description: | Security auditor for application and API security. Use when: - Implementing authentication flows (JWT, OAuth, sessions) diff --git a/agents/test-engineer.md b/agents/test-engineer.md index ae8bda2..5a450af 100644 --- a/agents/test-engineer.md +++ b/agents/test-engineer.md @@ -1,5 +1,13 @@ --- name: test-engineer +model: sonnet +tools: + - Read + - Glob + - Grep + - Edit + - Write + - Bash description: | Test automation and quality assurance specialist. Use when: - Planning test strategy for new features or projects