commit 0265afa054a6aa6c70fd4946aa57df990a95175d Author: 44r0n7 <44r0n7+gitea@pm.me> Date: Sat May 2 11:49:07 2026 -0400 chore: bootstrap lean sysadmin-chronicles repo Import the runnable game code, content, docs, scripts, and repo guidance while leaving local agent state, dependency installs, build output, and backup copies out of the published tree. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..577a89a --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ + +# Local agent/runtime state +.agents/ +.claude/ +.claude-flow/ +.codex/ +.swarm/ +.agent-backups/ +.agent-logs/ +.agent-prompts/ + +# Build outputs and dependencies +node_modules/ +dist/ +coverage/ +.vite/ +.svelte-kit/ +frontend/node_modules/ +server/node_modules/ +frontend/dist/ + +# Local data and cache files +*.db +*.sqlite +*.sqlite-shm +*.sqlite-wal +*.tar.gz +*.log +originals-*/ +.cache/ +tmp/ +temp/ + +# Environment variables +.env +.env.local +.env.*.local + +# Common editor / OS cruft +.DS_Store +*.swp +*~ diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..ceb16c4 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,32 @@ +{ + "mcpServers": { + "ruflo": { + "command": "npx", + "args": [ + "-y", + "ruflo@latest", + "mcp", + "start" + ], + "autoStart": false + }, + "claude-flow": { + "command": "npx", + "args": [ + "-y", + "@claude-flow/cli@latest", + "mcp", + "start" + ], + "env": { + "npm_config_update_notifier": "false", + "CLAUDE_FLOW_MODE": "v3", + "CLAUDE_FLOW_HOOKS_ENABLED": "true", + "CLAUDE_FLOW_TOPOLOGY": "hierarchical-mesh", + "CLAUDE_FLOW_MAX_AGENTS": "15", + "CLAUDE_FLOW_MEMORY_BACKEND": "hybrid" + }, + "autoStart": false + } + } +} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..edb2573 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,172 @@ +# sysadmin-chronicles + +@RTK.md + +> Multi-agent orchestration framework for agentic coding + +## Project Overview + +A Claude Flow powered project + +**Tech Stack**: TypeScript, Node.js +**Architecture**: Domain-Driven Design with bounded contexts + +## Quick Start + +### Installation +```bash +npm install +``` + +## Dual-Agent Note + +- This repo is set up for both Claude and Codex CLI +- If shell-based delegation is needed, prefer `/home/aaron/.npm-global/bin/codex` instead of relying on `codex` being on `PATH` +- Use RuFlo for orchestration and Codex for execution when a second coding agent is helpful +- Use `rtk` for noisy shell commands to reduce token/context usage; see `RTK.md` + +## Project Map + +Before substantive work, read `PROJECT_MAP.md`. + +Use it to: +- identify relevant files before loading context +- follow hot paths and change-impact notes +- avoid known anti-patterns + +Update `PROJECT_MAP.md` when: +- commands, routes, public APIs, or user-facing workflows change +- meaningful files/modules are added, removed, or renamed +- persistence formats or compatibility contracts change +- feature ownership or architecture changes +- major known issues are discovered or resolved + +Do not update it for tiny refactors, wording tweaks, or dependency bumps that do not change workflow or structure. + +### Build +```bash +npm run build +``` + +### Test +```bash +npm test +``` + +### Development +```bash +npm run dev +``` + +## Agent Coordination + +### Swarm Configuration + +This project uses hierarchical swarm coordination for complex tasks: + +| Setting | Value | Purpose | +|---------|-------|---------| +| Topology | `hierarchical` | Queen-led coordination (anti-drift) | +| Max Agents | 8 | Optimal team size | +| Strategy | `specialized` | Clear role boundaries | +| Consensus | `raft` | Leader-based consistency | + +### When to Use Swarms + +**Invoke swarm for:** +- Multi-file changes (3+ files) +- New feature implementation +- Cross-module refactoring +- API changes with tests +- Security-related changes +- Performance optimization + +**Skip swarm for:** +- Single file edits +- Simple bug fixes (1-2 lines) +- Documentation updates +- Configuration changes + +### Available Skills + +Use `$skill-name` syntax to invoke: + +| Skill | Use Case | +|-------|----------| +| `$swarm-orchestration` | Multi-agent task coordination | +| `$memory-management` | Pattern storage and retrieval | +| `$sparc-methodology` | Structured development workflow | +| `$security-audit` | Security scanning and CVE detection | + +### Agent Types + +| Type | Role | Use Case | +|------|------|----------| +| `researcher` | Requirements analysis | Understanding scope | +| `architect` | System design | Planning structure | +| `coder` | Implementation | Writing code | +| `tester` | Test creation | Quality assurance | +| `reviewer` | Code review | Security and quality | + +## Code Standards + +### File Organization +- **NEVER** save to root folder +- `/src` - Source code files +- `/tests` - Test files +- `/docs` - Documentation +- `/config` - Configuration files + +### Quality Rules +- Files under 500 lines +- No hardcoded secrets +- Input validation at boundaries +- Typed interfaces for public APIs +- TDD London School (mock-first) preferred + +### Commit Messages +``` +(): + +[optional body] + +Co-Authored-By: claude-flow +``` + +Types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `chore` + +## Security + +### Critical Rules +- NEVER commit secrets, credentials, or .env files +- NEVER hardcode API keys +- Always validate user input +- Use parameterized queries for SQL +- Sanitize output to prevent XSS + +### Path Security +- Validate all file paths +- Prevent directory traversal (../) +- Use absolute paths internally + +## Memory System + +### Storing Patterns +```bash +npx @claude-flow/cli memory store \ + --key "pattern-name" \ + --value "pattern description" \ + --namespace patterns +``` + +### Searching Memory +```bash +npx @claude-flow/cli memory search \ + --query "search terms" \ + --namespace patterns +``` + +## Links + +- Documentation: https://github.com/ruvnet/claude-flow +- Issues: https://github.com/ruvnet/claude-flow/issues diff --git a/AGENT_RULES.md b/AGENT_RULES.md new file mode 100644 index 0000000..6b27f70 --- /dev/null +++ b/AGENT_RULES.md @@ -0,0 +1,190 @@ +# SYSADMIN CHRONICLES — AGENT WORKING RULES +> Version 2.0 | Status: Enforced +> +> Changelog: +> v2.0 — Rewritten for Node.js + Svelte era. GDScript/Godot rules removed. +> v1.1 — GDScript-specific rules (superseded). +> +> Read this file FIRST before touching anything else. These rules prevent content +> corruption, broken cross-references, and silent design drift. + +--- + +## 0. ALWAYS READ FIRST + +Before doing any work, read these files in order: +1. `AGENT_RULES.md` (this file) +2. `OPEN_ISSUES.md` — current known issues and decisions in progress +3. `docs/ARCHITECTURE.md` — system design and constraints +4. `docs/QUEST_AUTHORING.md` — content schema and validation rules + +If you are working on a specific domain, also read: +- Content work → `docs/QUEST_AUTHORING.md` + relevant `content/world_flags/world_flags.json` +- VM work → `docs/ARCHITECTURE.md` sections 5 and 6 +- Save system work → `docs/SAVE_SYSTEM.md` +- Server work → `server/src/` — read the relevant service file before editing + +--- + +## 1. WHAT YOU MAY DO WITHOUT ASKING + +- Add new `.js` or `.svelte` files in `server/src/` or `frontend/src/` following existing conventions +- Add new JSON content files in `content/` that pass content validation +- Add new shell scripts in `tools/` that do not modify VM state +- Edit files you created in the current working session +- Run read-only commands: `cat`, `ls`, `grep`, `diff`, `virsh domstate`, probes +- Run content validation: `node tools/content/validate-content.js` +- Run server tests: `cd server && npm test` +- Create new files in `tools/vm/quest-prep/` for new quests + +## 2. WHAT YOU MUST ASK BEFORE DOING + +- Modifying `docs/ARCHITECTURE.md`, `docs/QUEST_AUTHORING.md`, `docs/SAVE_SYSTEM.md`, or `docs/ROADMAP.md` +- Modifying `content/world_flags/world_flags.json` +- Modifying any existing quest, ticket, incident, or dialogue JSON file +- Adding a new Express route or WebSocket event type +- Changing any validation rule type name or schema field name +- Changing VM profile IDs, snapshot names, or network profile names +- Any `virsh` command that modifies state: `start`, `destroy`, `snapshot-create`, `snapshot-revert` +- Any `tools/vm/` script that writes to a VM image + +## 3. WHAT YOU MUST NEVER DO + +- Delete any file (use a rename to `.bak` and ask first) +- Run `virsh undefine`, `virsh pool-delete`, or `virsh net-destroy` without explicit instruction +- Run `tools/vm/snapshot-all.sh --revert-to` without explicit instruction +- Modify a VM's baseline snapshot or `baseline.clean` state +- Run provisioning scripts (`Q0XX-prep.sh`) against any VM without explicit instruction +- Add a world flag reference in any content file without first adding it to `world_flags.json` +- Create a solution branch with a `priority` that duplicates an existing branch in the same quest +- Set `follow_up_incident` to an incident ID that does not exist as a file +- Set `series_id` in a dialogue file without ensuring at least 2 members share that series_id +- Modify the save file schema without updating `server/src/services/SaveState.js` and the migration handler +- Ignore content validation errors and proceed anyway + +--- + +## 4. CONTENT AUTHORING RULES + +### World Flags +- Every flag used anywhere must exist in `content/world_flags/world_flags.json` +- When you set a flag in a quest or incident, update `set_by` in the registry +- When you read a flag in a quest, incident, or dialogue, update `read_by` +- Conflicting flags must list each other in `conflicts_with` +- A flag with `persists: false` resets at the start of each new shift (not on load) + +### Quests +- Every quest must have a `clue_fingerprint` with at least one evidence entry +- Every quest must declare `required_vms` — list ALL VMs touched, not just the primary +- Branch priorities must be unique within a quest — no two branches share a priority number +- The highest-priority branch that matches wins — author branches so better fixes have higher priority +- Do not author a branch that cannot be distinguished from another branch by validation rules alone + +### Tickets +- Both `initial_priority` and `current_priority` must be present and equal at authoring time +- `current_priority` is the only field the runtime modifies — never change `initial_priority` at runtime + +### Incidents +- Every incident must declare `blast_radius_quests` (can be empty array, never omit) +- Every incident must declare `blast_radius_incidents` (can be empty array, never omit) +- `follow_up_incident` in a quest branch must map to an incident file that exists + +### Dialogue +- If `series_id` is declared, `series_position` must also be declared +- A `series_id` must have at least 2 dialogue files sharing it before content passes validation +- `trigger: "world_flag:{id}"` — the flag ID must exist in the registry + +### File Naming +- Quest files: `Q{NNN}-{kebab-case-title}.json` +- Ticket files: `T{NNN}.json` +- Incident files: `I{NNN}-{kebab-case-title}.json` +- Dialogue files: `{character}-Q{NNN}.json` or `{character}-Q{NNN}-{variant}.json` +- Do NOT bundle multiple dialogue characters or quests into one file +- VM profiles: `{snake_case}.json` +- Quest prep scripts: `Q{NNN}-prep.sh` + +--- + +## 5. CODE AUTHORING RULES + +### Node.js (server/src/) + +- All host commands go through `server/src/lib/ssh.js` or `server/src/lib/virsh.js` — never use `child_process` directly in service files +- All VM lifecycle actions go through `VMManager.js` — never call libvirt directly from quest or validation logic +- Never hardcode VM domain names — use constants from `ContentLoader` or the VM profile JSON +- All world flag reads and writes go through `QuestEngine.js` — never mutate flags directly +- Trust changes go through `TrustSystem.js` — never modify trust score directly +- Services coordinate via `eventBus.js` (Node EventEmitter) — no service may `require()` another service and call its internals directly; emit events instead +- All save-state writes go through `SaveState.js` + +### Svelte (frontend/src/) + +- All API calls go through `frontend/src/lib/api.js` — no raw `fetch()` in components +- WebSocket events are received in `App.svelte` and distributed to panels via Svelte stores or props — panels do not open their own WebSocket connections +- No game logic in Svelte components — components render state and dispatch user actions only + +### Validation Rules + +- Every new rule type must be added to `server/src/services/ValidationEngine.js` and the QUEST_AUTHORING.md rule reference table +- Rules must only observe state — they must never modify VM state + +### Shell Scripts + +- All scripts in `tools/vm/` must print a dry-run summary before modifying anything +- All scripts must be idempotent — running them twice must produce the same result +- Scripts that require root must check for permissions and exit clearly if absent +- Use `sc-` prefix for all libvirt resources created by the game + +--- + +## 6. VM SAFETY RULES + +- Never operate on a VM domain that does not start with `sc-` +- Never revert a snapshot during an active quest without explicit player/developer instruction +- The workstation VM (`sc-workstation`) must stay live during all gameplay — never suspend it mid-session +- If a probe or validation script fails, log the failure and return a degraded-state result — never crash the server +- All SSH connections from the host to guests use key-based auth only — no passwords in scripts + +--- + +## 7. HOW TO HANDLE AMBIGUITY + +If you are unsure whether something is correct: +1. Check `OPEN_ISSUES.md` — the answer may already be there +2. Check `docs/QUEST_AUTHORING.md` for schema rules +3. Check `content/world_flags/world_flags.json` for flag semantics +4. If still unsure, **stop and ask** rather than making an assumption + +Do not proceed with a best-guess implementation of something that is in +`OPEN_ISSUES.md` as unresolved. Wait for a decision. + +--- + +## 8. AFTER MAKING CHANGES + +After any content change: +- Run `node tools/content/validate-content.js` and confirm zero errors +- If you added a world flag, confirm it appears in `world_flags.json` with correct `set_by` and `read_by` +- If you added a quest, confirm its prep script exists or is noted as pending in `OPEN_ISSUES.md` + +After any server code change: +- Run `cd server && npm test` and confirm no regressions +- If you added a new validation rule type, add it to `docs/QUEST_AUTHORING.md` + +After any architectural change (new route, new VM, new service, new WebSocket event): +- Update `docs/PROJECT_MAP.md` — boot flow, service graph, VM identity table, or known gaps as applicable + +--- + +## 9. DO NOT SILENTLY FIX DESIGN ISSUES + +If you discover a design inconsistency (e.g., two quests that conflict, a flag used +incorrectly, a branch that cannot be validated), do NOT silently patch it. + +Instead: +1. Add it to `OPEN_ISSUES.md` with a clear description +2. Flag it in your response to the developer +3. Wait for a decision before changing any content + +The exception is purely mechanical errors (typos, missing commas, wrong field +names) where the intent is unambiguous — those can be fixed directly. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..47dfa25 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,188 @@ +# Claude Code Configuration — Ruflo Dual-Agent Workflow + +## Agent Roles (Non-Negotiable) + +| Agent | Responsibilities | +|-------|-----------------| +| **Claude** | Planning, architecture, pseudocode, tradeoff analysis, validation, review | +| **Codex** | All implementation: writing files, editing code, CLI ops, refactoring, debugging | +| **Ruflo** | Orchestration, task delegation, shared memory between agents | + +**Claude MUST NOT write production code, full implementations, or complete file edits.** +**Claude MUST NOT output full files or multi-function implementations — all such work must be delegated.** +**Claude MUST delegate all execution to Codex via Ruflo before attempting it directly.** + +## The Mandatory Workflow Loop + +For every task that involves implementation: + +1. **Analyze** — understand requirements, constraints, relevant files +2. **Decompose** — break into discrete units (each unit = one Codex call) +3. **Delegate** — assign each unit to Codex with a clear, structured spec +4. **Wait** — do not add tool calls after delegating; wait for results +5. **Review** — critically examine Codex output for correctness and completeness +6. **Refine** — if output is wrong or incomplete, re-delegate with corrected spec + +**FAILSAFE: If Claude detects it is about to write code, edit a file, or run a CLI command — STOP and delegate instead.** + +## Delegation Rules + +**Use Codex for:** +- Writing any source file (any language) +- Editing existing files +- Refactoring and mechanical fixes +- Running validation, lint, tests +- Writing shell scripts and config files +- Debugging implementation errors + +**Use Claude for:** +- Requirements analysis +- System design and API contract design +- Pseudocode and algorithm sketches (illustrative only, not production) +- Architecture Decision Records +- Reviewing and critiquing Codex output +- Tradeoff analysis + +**Codex invocation pattern:** +```bash +/home/aaron/.npm-global/bin/codex "" +``` + +## Exceptions — Claude Writes Directly Only When + +- The task requires judgment Codex demonstrably cannot provide (novel validation logic, cross-reference reasoning) +- Codex has already failed on the same task in this session +- The change is a single Edit tool call on a non-production file (config, doc) + +## Behavioral Rules (Always Enforced) + +- Read `CLAUDE.md`, `AGENTS.md`, and `AGENT_RULES.md` before starting substantive work +- Read `RTK.md` and prefer `rtk` for noisy shell output, including Codex task specs that ask agents to inspect, search, test, or summarize command output +- Do what has been asked; nothing more, nothing less +- NEVER create files unless absolutely necessary for achieving the goal +- ALWAYS prefer editing an existing file to creating a new one +- NEVER proactively create documentation or README files unless explicitly requested +- NEVER save working files or scratch notes to the root folder +- ALWAYS read a file before editing it +- NEVER commit secrets, credentials, or `.env` files +- Never continuously check status after spawning a swarm — wait for results + +## File Organization + +- `/src` — source code +- `/tests` — test files +- `/docs` — documentation and markdown +- `/config` — configuration files +- `/scripts` — utility scripts +- `/examples` — example code + +## Project Architecture + +- Domain-Driven Design with bounded contexts +- Files under 500 lines +- Typed interfaces for all public APIs +- TDD London School (mock-first) for new code +- Event sourcing for state changes +- Input validation at all system boundaries + +### Project Config + +- **Topology**: hierarchical-mesh +- **Max Agents**: 15 +- **Memory**: hybrid +- **HNSW**: Enabled +- **Neural**: Enabled + +## Build & Test + +```bash +npm run build # Build +npm test # Test +npm run lint # Lint +``` + +- ALWAYS run tests after any code change (via Codex) +- ALWAYS verify build succeeds before committing + +## Security Rules + +- NEVER hardcode API keys, secrets, or credentials in source files +- NEVER commit `.env` files or any file containing secrets +- Always validate user input at system boundaries +- Always sanitize file paths to prevent directory traversal +- Run `npx @claude-flow/cli@latest security scan` after security-related changes + +## Concurrency: 1 Message = All Related Operations + +- All operations MUST be concurrent/parallel in a single message +- ALWAYS spawn ALL agents in ONE message with full instructions via Agent tool +- ALWAYS batch ALL file reads in ONE message +- ALWAYS batch ALL Bash commands in ONE message + +## Swarm Orchestration + +- Initialize swarm via CLI for complex tasks before delegating +- Spawn concurrent agents using Claude Code's Agent tool +- Never use CLI tools alone for execution — Agent tool agents do the actual work +- Call CLI tools AND Agent tool in ONE message for complex work + +```bash +npx @claude-flow/cli@latest swarm init --topology hierarchical --max-agents 8 --strategy specialized +``` + +## Swarm Execution Rules + +- ALWAYS use `run_in_background: true` for all Agent tool calls +- ALWAYS put ALL Agent calls in ONE message for parallel execution +- After spawning, STOP — do NOT add more tool calls or check status +- Never poll agent status repeatedly — trust agents to return +- Review ALL results before proceeding + +## 3-Tier Model Routing (ADR-026) + +| Tier | Handler | Cost | Use Cases | +|------|---------|------|-----------| +| **1** | Edit tool directly | $0 | Single-line transforms — skip LLM | +| **2** | Haiku | $0.0002 | Simple tasks (<30% complexity) | +| **3** | Sonnet/Opus | $0.003–0.015 | Complex reasoning, architecture, security | + +## Ruflo Memory & Shared Context + +Keep shared context between Claude and Codex via Ruflo memory so Codex always has full task specs: + +| Tool | Description | +|------|-------------| +| `memory_store` | Store design decisions and task specs | +| `memory_search` | Semantic search before starting work | +| `memory_search_unified` | Search across Claude + AgentDB + patterns | +| `memory_retrieve` | Retrieve a stored spec by key | + +```bash +# Store a design decision before delegating +npx @claude-flow/cli@latest memory store --key "task-" --value "" --namespace tasks + +# Search for prior patterns +npx @claude-flow/cli@latest memory search --query "" +``` + +## Key MCP Tools (discover via ToolSearch) + +| Category | Tools | +|----------|-------| +| Memory | `memory_store`, `memory_search`, `memory_search_unified` | +| Swarm | `swarm_init`, `swarm_status`, `swarm_health` | +| Agents | `agent_spawn`, `agent_list`, `agent_status` | +| Hive-Mind | `hive-mind_init`, `hive-mind_spawn`, `hive-mind_consensus` | +| Hooks | `hooks_route`, `hooks_session-start`, `hooks_post-task` | +| Security | `aidefence_scan`, `aidefence_is_safe` | + +``` +ToolSearch("memory search") → memory_store, memory_search, memory_search_unified +ToolSearch("swarm") → swarm_init, swarm_status, swarm_health +ToolSearch("+aidefence") → aidefence_scan, aidefence_is_safe, aidefence_has_pii +``` + +## Support + +- Documentation: https://github.com/ruvnet/ruflo +- Issues: https://github.com/ruvnet/ruflo/issues diff --git a/OPEN_ISSUES.md b/OPEN_ISSUES.md new file mode 100644 index 0000000..a0e046b --- /dev/null +++ b/OPEN_ISSUES.md @@ -0,0 +1,199 @@ +# SYSADMIN CHRONICLES — OPEN ISSUES +> Version 2.0 | Last updated: Phase 1 skeleton build +> +> All known design gaps, content bugs, and deferred decisions. +> Items here must NOT be implemented with a best-guess — wait for a resolution. +> Mark items RESOLVED with the fix details when closed. + +--- + +## AGENT INSTRUCTIONS (READ FIRST) +> Added during Phase 1 skeleton build. Document any decisions you make here. +> +> When you resolve an open issue: +> 1. Move it to the RESOLVED ISSUES section at the bottom. +> 2. Add resolution details and the file(s) changed. +> 3. Update ROADMAP.md to mark relevant tasks complete. +> +> When you make a minor direction change (non-game-changing): +> 1. Note it here under NEW DECISIONS. +> 2. Update the relevant doc (ARCHITECTURE.md, SAVE_SYSTEM.md, etc.) +> 3. Do NOT silently patch content files — note it here first. +> +> **CODE QUALITY AUDIT — COMPLETE** +> All P1, P2, and P3 items from docs/CODEX_AUDIT_FIXES.md have been resolved. +> docs/CODEX_AUDIT_FIXES.md has been deleted per its own WHEN DONE instruction. + +--- + +## NEW DECISIONS (Made During Phase 1 Build) + +### ND-001 — T005-T008 bundled file needs split (same as OI-008 pattern) +The file T005-T008.json is a bundled array. Content loader expects one file per +ticket with an "id" field. It has been kept as-is (content loader skips arrays). +Split into T005.json–T008.json before Phase 5 ticket loading is implemented. + +### ND-002 — OI-006 (persists: false) already resolved in SAVE_SYSTEM.md v1.3 +SAVE_SYSTEM.md v1.3 defines shift-boundary reset. SaveState.js implements the +reset_shift_flags() equivalent at shift start. Closed. + +### ND-003 — OI-007 (Q002 blast_radius) safe to fix mechanically +Q002-syntax-error.json blast_radius: ["I001"] should be []. Mechanical fix — +the next agent can apply it directly without asking. See OI-007. + +### ND-004 — Stack is Node.js + Svelte +The game runs as a Node.js/Express server with a Svelte web HUD. The workstation +is a real XFCE VM (sc-workstation). All game logic lives in `server/src/`. + +### ND-005 — opsbridge/sudo SSH path for host→workstation validation +The validation path from the host uses the `opsbridge` management user then +`sudo -H -i -u player` inside the guest, because Q001 intentionally removes +`/home/player/.ssh/authorized_keys`. The correct form is: + +``` +ssh opsbridge@ sudo -H -i -u player -- sh -c '' +``` + +Separated sudo flags (`-H -i -u`) required — combined `-Hiu` misparses on some builds. + +**Status**: RESOLVED — confirmed working in ValidationEngine.js SSH path. + +### ND-006 — build_machine snapshot chain now materializes baseline.post-q006 from Q006 clean state +`tools/setup/seed-vms.sh` now builds `sc-build-machine` in two authored stages: +`Q006-prep.sh` creates the broken `baseline.clean` state for "Time Is A Flat +Circle", and `Q006-post-clean.sh` applies the clean branch outcome before taking +`baseline.post-q006`. + +`Q008` is still a separate multi-VM provisioning gap. Its authored starting +state touches both vulcan and hermes, so it should not be guessed into the +single-domain snapshot chain until that flow is designed explicitly. + +### ND-007 — terminal UX: Tilix is the player's terminal (no in-game simulation) +**Status**: RESOLVED. The player uses a real Tilix terminal inside the workstation +XFCE VM. All terminal UX (history, scrollback, copy/paste) is handled by Tilix. +No terminal simulation needed. See `docs/WORKSTATION_POLISH_BACKLOG.md` for +outstanding workstation desktop polish items. + +### ND-008 — vulcan player shell/PATH is still misprovisioned +**Status**: RESOLVED 2026-04-24. +Root cause: `inetutils` (provides `/usr/bin/hostname` on Arch) was not in the +`build-build-machine.sh` pacman install. Hermes (Debian) has hostname pre-installed. +Fix applied in `tools/vm/build-build-machine.sh`: +- Added `inetutils` to the runcmd pacman install line. +- Added runcmd entries to write `/home/player/.bashrc` (explicit PATH) and + `.bash_profile` (sources .bashrc), then chown to player. +Regression gate added to `tools/setup/seed-vms.sh` (STEP 1b): after builds, +SSH-tests `hostname` on sc-web-server and sc-build-machine; fails fast if missing. + +--- + +## MUST RESOLVE BEFORE PHASE 3 + +### OI-001 — Q001 permissive-setup branch contradictory logic +**File**: content/quests/Q001-welcome-aboard.json +Option A: bad-but-not-fatal permissions (755 dir), quest completes with warning. +Option B: fatally wrong permissions (777), quest does NOT complete via this branch. +**Decision needed**: Which option? +**Status**: RESOLVED — permissive-setup branch (Option A/lenient) was already correctly implemented. Q001 branch validates file_exists + file_owner without checking mode, so 755 directory case completes the quest with trust_delta 0. marcus-Q001.json already has complete-permissive stage. + +### OI-002 — Q008 rollback-only vs rollback-and-pin have identical validation +**File**: content/quests/Q008-bad-upstream.json +Need a distinguishing rule for pinned vs unpinned. Likely an IgnorePkg entry +in /etc/pacman.conf (detectable via file_contains). +**Status**: RESOLVED — Q008 already has file_contains check for IgnorePkg in /etc/pacman.conf on rollback-and-pin branch, and a not-rule on the rollback-only branch to ensure mutual exclusion. Confirmed in Q008 internal_notes. + +--- + +## MUST RESOLVE BEFORE PHASE 5 + +### OI-003 — Incident files I002 and I003 are missing +Author I002-backup-pressure-recurrence.json and I003-app-update-recurrence.json +following the I001 pattern before Phase 6. +**Status**: RESOLVED — both files authored (content/incidents/I002-backup-pressure-recurrence.json, I003-app-update-recurrence.json). Content validator passes zero errors. + +### OI-004 — pressure_profile field is referenced but never defined +Recommend: separate files in content/pressure_profiles/ with a defined schema. +**Status**: RESOLVED — created content/pressure_profiles/ with web_outage_escalation.json and app_outage_escalation.json. Schema uses trigger_after_seconds steps with notification, notification_severity, and escalate_linked_ticket fields. escalate_linked_ticket resolves to the quest's own ticket_id at runtime. + +### OI-005 — check_mode: explicit trigger mechanism undefined +A "Verify Fix" button in the ticket panel UI, shown per-objective when check_mode == explicit. +**Status**: RESOLVED — Verify Fix button implemented in TicketsPanel.svelte. Button appears +per-objective when check_mode == explicit, disables during check, re-enables with 2s delay on failure. + +--- + +## LOW PRIORITY / ANYTIME + +### OI-007 — Q002 blast_radius incorrectly references I001 +Fix: change blast_radius: ["I001"] to blast_radius: [] in Q002. +**Status**: RESOLVED — blast_radius set to [] in Q002-syntax-error.json; _blast_radius_note added explaining I001 triggers only from Q003 quick-fix branch. + +### OI-008 — tier2-dialogue.json naming convention +Individual files exist: marcus-Q005.json, marcus-Q006.json, marcus-Q008.json, +priya-Q007.json. The bundled file is kept as tier2-dialogue.SPLIT_PENDING.json. +Verify individual files are complete then delete the SPLIT_PENDING file. +**Status**: RESOLVED — individual files confirmed present. Bundled file removed. + +### OI-009 — sarah-web series has only one member +sarah-Q003-angry.json declares series_id: "sarah-web" but no second member exists. +Either add sarah-Q004+ or remove series_id until a second file is authored. +**Status**: RESOLVED — series_id and series_position removed from sarah-Q003-angry.json. Series grouping deferred until a second sarah-web member is authored. + +### OI-011 — Snapshot baseline chain +seed-vms.sh implements the chain. Need formal policy in QUEST_AUTHORING.md. +Chain: workstation: baseline.day-one; web_server: clean→post-q002→post-q003→post-q004; +build_machine: clean→post-q006. Each post-qXXX baseline from CLEAN branch resolution. +**Status**: RESOLVED — Baseline Snapshot Chain subsection added to docs/QUEST_AUTHORING.md in VM PROVISIONING HOOKS section. Documents chain per VM, the clean-branch-only rule, and naming convention. + +--- + +## RESOLVED ISSUES + +### OI-006 — persists: false flag semantics +Resolution: Shift-boundary reset handled in SaveState.js at shift start. + +### OI-010 — file_absent and file_owner_is_not undocumented rule types +Resolution: Added to ValidationEngine.js as full rule types. +Still needs: update QUEST_AUTHORING.md rule reference table. + +### OI-012 — SSH execution contract +Resolution: server/src/lib/ssh.js — Promise-based, structured result (stdout/stderr/exitCode), +BatchMode key-based auth, 30s default timeout. + +### OI-013 — Language choice +Resolution: Node.js + Svelte. See ND-004. + +--- + +## ADDITIONAL RESOLUTIONS (Phase 1 continued) + +### OI-003 — I002 and I003 incident files authored +**Resolution**: +- I002-backup-pressure-recurrence.json authored — triggers on hermes_backup_partial flag, + 3-step escalation, resolves when cron+ownership+disk all correct. +- I003-app-update-recurrence.json authored — triggers when rollback-only branch taken on Q008, + re-installs broken version unless pinned. Resolves when IgnorePkg + correct version confirmed. +- Content validator now passes zero errors. + +### ND-001 — T005-T008 split complete +T005.json, T006.json, T007.json, T008.json created from the bundled file. +Content validator now loads all 8 tickets correctly. + +### OI-007 — Q002 blast_radius fix +**Resolution**: The validator was fixed to normalize world_flags.json array format. +The Q002 blast_radius: ["I001"] issue is documented — apply this one-line fix +directly: change blast_radius in Q002-syntax-error.json from ["I001"] to []. + +### VALIDATOR FIXES applied this session: +- validate-content.js now normalizes world_flags.json array format correctly +- Advisory clue_fingerprint rule types (service_state_is, file_size_above, etc.) + are now accepted — they describe evidence, not runtime-evaluated rules +- T005-T008 bundled file is now skipped correctly (SPLIT_DONE suffix) +- WorldFlags handling now normalizes both Array and Dict flag formats + +### CONTENT STATUS: validate-content.js exits 0 (zero errors, 2 warnings) +Warnings are all expected and documented: +- priya-ops series: 1 member (needs future dialogue) +- T005-T008.SPLIT_DONE.json: skipped (bundled file, split done) +(sarah-web series warning removed — series_id stripped from sarah-Q003-angry.json per OI-009) +(tier2-dialogue.SPLIT_PENDING warning removed — renamed to .SPLIT_DONE.bak per OI-008) diff --git a/PROJECT_MAP.md b/PROJECT_MAP.md new file mode 100644 index 0000000..9c4091d --- /dev/null +++ b/PROJECT_MAP.md @@ -0,0 +1,255 @@ +# PROJECT_MAP.md + +# Auto-generated / agent-maintained project index. +# Purpose: help future agents quickly select the right context and files. +# Last updated: 2026-04-30 - Initial root map created from current repo and docs. + +--- + +## 1. Project Snapshot + +Sysadmin Chronicles is a Linux sysadmin game where players resolve tickets inside real libvirt/QEMU VMs. The host Node.js/Express server owns state, validation, VM control, APIs, and static sites; the Svelte/Vite HUD runs in Chromium inside the workstation VM. + +--- + +## 2. Context Budget Rules + +1. Read this `PROJECT_MAP.md` first. +2. Identify the relevant hot path or feature area. +3. Load only directly relevant files. +4. Load one dependency layer outward only if needed. +5. Avoid loading whole directories unless the map says the area is tightly coupled. +6. Prefer tests and public interfaces over implementation details when scoping behavior. +7. Update this map after meaningful structural or user-facing changes. + +Do not paste large files into context when a targeted excerpt, `rg`, `rtk`, or symbol search is enough. + +--- + +## 3. Architecture Summary + +The game is host-authoritative: frontend actions call server APIs, services update save/game state, and validation checks real VM state over SSH/libvirt instead of trusting typed commands. + +Runtime flow: + +`scripts/start-game.sh -> server/src/index.js -> ContentLoader/SaveState/services -> VMManager.ensureWorkstationLive() -> Express/WebSocket on port 3000 -> remote-viewer opens sc-workstation -> Chromium HUD` + +VMs: + +- `sc-workstation` / `ares`: player workstation, XFCE, Chromium HUD, Tilix. +- `sc-web-server` / `hermes`: web/server target. +- `sc-build-machine` / `vulcan`: build/package target. + +--- + +## 4. File Priority Map + +### Tier 1 - Critical / frequently needed + +| Path | Role | When to load | +|------|------|--------------| +| `server/src/index.js` | Server bootstrap, routes, static serving, WebSocket events | Runtime/API/static route changes | +| `server/src/services/ContentLoader.js` | Loads authored content collections | Content schema/loading changes | +| `server/src/services/SaveState.js` | Save persistence and state shape | Save/progression compatibility changes | +| `server/src/services/QuestEngine.js` | Quest lifecycle and completion | Quest activation/completion changes | +| `server/src/services/TicketService.js` | Ticket state and resolution | Ticket workflow changes | +| `server/src/services/ValidationEngine.js` | Real VM rule evaluation | Objective/validation rule changes | +| `server/src/services/VMManager.js` | libvirt state, startup, IP discovery | VM runtime/control changes | +| `frontend/src/App.svelte` | Main HUD orchestration | Tab/workflow/UI state changes | +| `frontend/src/lib/api.js` | Browser API client/session handling | API contract changes | +| `tools/vm/build-vm.sh` | Common VM build driver | VM build behavior changes | +| `tools/vm/profiles/workstation.sh` | Ares workstation image profile | Desktop/provisioning changes | +| `scripts/start-game.sh`, `start-game.sh` | End-to-end launchers | Startup/viewer/server launch changes | +| `tools/lib/internal-https.sh` | Shared internal HTTPS cert/env/URL helpers | Portal/Sage/company URL or TLS startup changes | +| `content/quests/`, `content/tickets/`, `content/vm_profiles/` | Authored gameplay data | Quest, ticket, or VM identity changes | + +### Tier 2 - Supporting / sometimes needed + +| Path | Role | When to load | +|------|------|--------------| +| `server/src/routes/` | API route modules | Endpoint behavior changes | +| `server/src/services/TrustSystem.js` | Trust score and unlocks | Access/progression tuning | +| `server/src/services/ProgressionSystem.js` | Unlock/progression rules | Unlock state changes | +| `server/src/services/EmailService.js` | Mail/read/reply state | Mail workflow changes | +| `server/src/services/SageService.js` | Sage API behavior | Knowledge-base changes | +| `server/src/services/IncidentScheduler.js` | Timed incident pressure | Incident/shift pacing | +| `server/src/services/ShiftReviewService.js` | End-shift review data | Profile/review changes | +| `frontend/src/components/` | HUD panels/components | Focused UI changes | +| `tools/content/validate-content.js` | Content validator | Any content schema/rule change | +| `tools/setup/*.sh` | Host setup/seed/uninstall | Installer/setup flow changes | +| `tools/vm/profiles/*.sh` | Target VM profiles | Hermes/vulcan/workstation provisioning changes | +| `tools/vm/repair-workstation-launchers.sh` | Live workstation desktop launcher trust repair | Existing Ares desktop launcher prompt fixes | +| `tools/vm/quest-prep/` | Baseline quest state authorship | VM baseline quest setup | +| `sage/`, `company-website/` | Static web surfaces | Sage/company site changes | +| `docs/ARCHITECTURE.md`, `docs/SAVE_SYSTEM.md`, `docs/VM_BUILD_SYSTEM.md` | Deep design docs | Architecture/save/build questions | + +### Tier 3 - Peripheral / rarely needed + +| Path | Role | When to load | +|------|------|--------------| +| `frontend/dist/` | Generated frontend build | Only to verify served assets | +| `node_modules/` | Installed dependencies | Avoid; use manifests instead | +| `ruvector.db` | Local RTK/vector data | Do not inspect for normal work | +| `vm/images/`, `vm/snapshots/` | Large/live VM data | Only for explicit VM storage tasks | +| Static assets | Images/icons/fonts | Only for visual asset changes | + +--- + +## 5. Hot Paths + +- Change server API route: + - Load: `server/src/index.js`, relevant `server/src/routes/*`, relevant service, `frontend/src/lib/api.js` + - Usually update: server tests and frontend caller + - Watch out for: session middleware and WebSocket refresh expectations + +- Change quest/ticket behavior: + - Load: `content/quests/`, `content/tickets/`, `ContentLoader.js`, `QuestEngine.js`, `TicketService.js`, `ValidationEngine.js` + - Usually update: `tools/content/validate-content.js`, docs if schema changes + - Watch out for: world flag and dialogue ID references + +- Change validation rule: + - Load: `ValidationEngine.js`, tests, representative quest JSON + - Usually update: `docs/QUEST_AUTHORING.md` + - Watch out for: validation must observe real VM state, not commands typed + +- Change VM build/provisioning: + - Load: `docs/VM_BUILD_SYSTEM.md`, `tools/vm/build-vm.sh`, relevant `tools/vm/profiles/*.sh`, `tools/setup/seed-vms.sh` + - Usually update: setup docs and dependency/version tracking + - Watch out for: destructive `--force`, cloud-init quoting, readiness gates + +- Change workstation desktop UX: + - Load: `tools/vm/profiles/workstation.sh`, `scripts/start-game.sh`, `runtime/viewer/*` + - Usually update: rebuild/patch instructions + - Watch out for: RAM pressure, browser launch, desktop icon permissions + +- Change frontend HUD workflow: + - Load: `frontend/src/App.svelte`, relevant component, `frontend/src/lib/api.js` + - Usually update: `cd frontend && npm run build` + - Watch out for: generated `frontend/dist` is what the server serves + +- Change save/progression/trust: + - Load: `SaveState.js`, `TrustSystem.js`, `ProgressionSystem.js`, `QuestEngine.js`, content progression JSON + - Usually update: migration/default handling and tests + - Watch out for: no-auto-restore and backward compatibility + +- Change Sage/company web surface: + - Load: `server/src/index.js`, `sage/` or `company-website/`, workstation profile bookmarks/proxy config + - Usually update: browser smoke test inside ares + - Watch out for: `/sage/` route and guest bookmark expectations + +--- + +## 6. Change Impact Map + +| Change type | Also check/update | +|-------------|-------------------| +| API contract | route, service, `frontend/src/lib/api.js`, HUD state handling, tests | +| Content schema or IDs | validator, ContentLoader, quests/tickets/dialogue/world flags, docs | +| Validation rule | `ValidationEngine.js`, representative content, tests, authoring docs | +| Persistence format | `SaveState.js`, migrations/defaults, load/save compatibility tests | +| VM profile/domain | setup scripts, start script, content VM profiles, docs, live libvirt state | +| Workstation desktop | profile packages, cloud-init user-data, browser/shortcut config, RAM/performance | +| Frontend UI workflow | `App.svelte`, component state, API client, WebSocket refresh behavior | +| Build/dev tooling | README/dev docs, `AGENTS.md`, scripts, dependency/version manifest | + +--- + +## 7. Key Concepts & Domain Terms + +- **Ares**: player workstation VM; libvirt domain `sc-workstation`. +- **Hermes**: web server target VM; libvirt domain `sc-web-server`. +- **Vulcan**: build machine target VM; libvirt domain `sc-build-machine`. +- **opsbridge**: management user for host-driven SSH validation/control. +- **player**: in-world workstation user. +- **world_flags**: durable flags for quest/narrative branching. +- **trust/unlocks**: score and capability gate system. +- **solution_branches**: authored ticket outcomes. +- **pressure_profiles**: incident/timer pressure configuration. +- **baseline/recovery/checkpoint/live**: VM state tiers described in save/snapshot docs. +- **Sage**: knowledge-base/help system. + +--- + +## 8. User-Facing Surface + +- Launch: `bash scripts/start-game.sh`. +- HUD tabs: Tickets, Mail, Docs, Sage, VMs, Profile. +- Server APIs: `/api/session`, `/api/state`, `/api/tickets`, `/api/mail`, `/api/docs`, `/api/vms`, `/api/sage`, `/api/profile`. +- Static sites: `/sage`, `/company`, `/public`; intended in-VM browser URLs use HTTPS (`portal.axiomworks.internal:3000`, `sage.axiomworks.internal:3000/sage/`, `www.axiomworks.corp/`). +- Workstation desktop: Chromium HUD, terminal, desktop shortcuts, remote-viewer session. +- Save path: `~/.local/share/sysadmin-chronicles/save.json`. + +--- + +## 9. Persistence / Data Contracts + +| Contract | Defined in | Compatibility notes | +|----------|------------|---------------------| +| Save file | `SaveState.js`, docs `SAVE_SYSTEM.md` | Preserve load defaults and migration behavior | +| Authored content JSON | `content/`, `ContentLoader.js`, validator | Treat as read-only runtime input; keep IDs stable | +| VM profiles | `content/vm_profiles/`, `tools/vm/profiles/*.sh` | Domain/hostname/user changes affect scripts and docs | +| Frontend session token | `frontend/src/lib/api.js` | Stored in browser local storage; API retries on invalid session | +| VM disks/snapshots | libvirt/qcow2, docs | Save/load must handle missing domains/snapshots gracefully | +| Static route prefixes | `server/src/index.js` | Guest bookmarks/proxies may depend on `/sage` and `/company` | + +--- + +## 10. Test & Validation Map + +| Change area | Validation | +|-------------|------------| +| Content changes | `node tools/content/validate-content.js --verbose` | +| Server services/routes | `cd server && npm test` | +| Frontend HUD | `cd frontend && npm run build` | +| Host setup | `bash tools/setup/check-host.sh` | +| VM profile/build logic | `bash tools/vm/build-workstation.sh --dry-run` when available; otherwise inspect generated command/output | +| Live VM runtime | `virsh --connect qemu:///system list --all`; targeted SSH/HTTP checks | +| Full smoke test | `bash scripts/start-game.sh`, then use HUD in ares workstation | + +--- + +## 11. Known Risk Areas / Tech Debt + +- **VM rebuilds**: build scripts can destroy/recreate `sc-` domains; confirm intent before force paths. +- **Cloud-init profiles**: YAML and shell quoting are fragile in `tools/vm/profiles/*.sh`. +- **Readiness checks**: networking, cloud-init, LightDM, and SSH can hang builds if guest state drifts. +- **Save/snapshot drift**: save refs can point at missing domains or snapshots; recovery handling matters. +- **Real VM validation**: many behaviors need live libvirt smoke tests beyond unit tests. +- **Content cross-references**: IDs, world flags, dialogue, branches, and tickets can silently desync. +- **Generated frontend**: server serves `frontend/dist` when present, so rebuild after UI changes. +- **Workstation performance**: Chromium/XFCE can cause RAM pressure and perceived hangs. +- **Internal HTTPS**: `tools/lib/internal-https.sh` is the shared source for launcher TLS env and in-VM Portal/Sage/company URLs; avoid reintroducing per-script HTTP fallbacks. +- **Desktop launcher trust**: Trust all `/home/player/Desktop/*.desktop` files through the real player DBus session via `/usr/local/bin/trust-desktop-launchers`; use `tools/vm/repair-workstation-launchers.sh` for live VMs. +- **Docs drift**: some older roadmap/status docs may lag active implementation. + +--- + +## 12. Anti-Patterns + +- Do not fake SSH, terminals, or validation results for core gameplay. +- Do not validate quests by matching commands the player typed. +- Do not operate on non-`sc-` libvirt domains from game scripts. +- Do not mutate `content/` as runtime save state. +- Do not run quest-prep scripts against live player VMs unless explicitly intended. +- Do not write save JSON ad hoc; go through `SaveState.js`. +- Do not rely only on QEMU guest agent IP discovery. +- Do not place scratch files in the repo root. +- Do not turn this map into a changelog or README replacement. + +--- + +## 13. Agent Workflow Notes + +1. Start with this map. +2. Use hot paths to choose files. +3. Prefer symbol/search-based context over broad file loading. +4. Keep edits narrow. +5. Update this map only when structure, ownership, contracts, workflows, or known risk areas change. +6. Append a one-line changelog entry for meaningful updates. + +--- + +## 14. Change Log + +- 2026-04-30 Initial root map created; updated VM tooling notes for seed ISO detach and installer/rebuild image path normalization. +- 2026-05-02 Centralized internal HTTPS launch URLs/TLS env and workstation desktop launcher trust repair paths. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0813a36 --- /dev/null +++ b/README.md @@ -0,0 +1,155 @@ +# Sysadmin Chronicles + +A native Linux game where you work as a junior sysadmin at Axiom Works, handling +real tickets inside real Linux virtual machines managed by QEMU/KVM. + +**Status**: Node.js server + Svelte HUD implemented. Server, frontend, and all +services are built. Pending: Phase 7 workstation VM verification + Phase 10 full playtest. + +--- + +## Architecture Summary + +The game runs as a Node.js server on the host, serving a Svelte web HUD into the +workstation VM's browser. The player works inside a real XFCE desktop. + +``` +Host machine +├── Node.js game server (port 3000) — quest logic, validation, VM control +└── Svelte HUD — tickets, mail, Sage, docs (served by game server) + +Workstation VM (sc-workstation / ares) — Debian 12 XFCE desktop +├── Chromium → http://192.168.100.1:3000 (HUD, auto-opens on login) +└── Tilix → SSH to hermes/vulcan (real terminal, real SSH) + +Target VMs (headless) +├── sc-web-server (hermes) — Q002–Q005, Q007 +└── sc-build-machine (vulcan) — Q006, Q008 +``` + +Quest completion is validated by the server SSHing into the target VM and +evaluating real system state — not by tracking commands typed. + +--- + +## Quick Start (Development) + +### Prerequisites + +```bash +# Install host dependencies +sudo apt install qemu-system-x86_64 libvirt-daemon-system virsh qemu-img \ + nodejs npm virt-viewer + +# Add yourself to the libvirt group +sudo usermod -aG libvirt $USER && newgrp libvirt +``` + +### First-Time Setup + +```bash +# Check host capabilities +bash tools/setup/check-host.sh + +# Create libvirt networks, storage pool, and SSH keys +bash tools/setup/first-run-setup.sh + +# Build VM images and provision quest baselines +bash tools/setup/seed-vms.sh +``` + +### Build the Frontend + +```bash +cd frontend && npm install && npm run build && cd .. +``` + +### Run the Game + +```bash +# Start game server + open workstation VM via SPICE +bash scripts/start-game.sh + +# Or run server only (for development/testing) +cd server && npm install && node src/index.js +``` + +### Validate Content + +```bash +node tools/content/validate-content.js --verbose +``` + +### Run Server Tests + +```bash +cd server && npm test +``` + +--- + +## Project Structure + +``` +sysadmin-chronicles/ +│ +├── server/ Node.js game server +│ └── src/ +│ ├── index.js Entry point — Express + WebSocket +│ ├── routes/ REST API routes +│ └── services/ ContentLoader, QuestEngine, ValidationEngine, etc. +│ +├── frontend/ Svelte web HUD +│ ├── src/ Components, api.js +│ └── dist/ Built output (served by game server) +│ +├── scripts/ +│ └── start-game.sh Start server + open SPICE viewer +│ +├── content/ All game content (JSON — unchanged) +│ ├── quests/ Q001–Q008 +│ ├── tickets/ T001–T008 +│ ├── incidents/ I001–I003 +│ ├── dialogue/ All NPC dialogue files +│ ├── vm_profiles/ workstation, web_server, build_machine +│ └── progression/ trust_unlocks.json +│ +├── tools/ +│ ├── setup/ check-host.sh, first-run-setup.sh, seed-vms.sh +│ ├── vm/ build scripts, quest-prep/, suppress-maintenance-noise.sh +│ └── content/ validate-content.js, verify-clue-fingerprints.js +│ +├── docs/ +│ ├── ARCHITECTURE.md System design +│ ├── ROADMAP.md Phase tracking +│ └── QUEST_AUTHORING.md Content authoring guide +``` + +--- + +## Key Design Rules + +- Game server is the single source of truth — frontend only displays results +- Validation is server-side only — SSH into VMs, evaluate real system state +- Quest completion is state-based only — never command-sequence tracking +- Only operate on `sc-` prefixed libvirt domains +- Content JSON is read-only at runtime — ContentLoader reads once at startup +- Save file is at `~/.local/share/sysadmin-chronicles/save.json` + +--- + +## Current Build State + +### Done +- Node.js game server with all services (ContentLoader, QuestEngine, TicketService, + ValidationEngine, VMManager, TrustSystem, ProgressionSystem, EmailService, + SageService, ShiftTimer, IncidentScheduler, ShiftReviewService, CertificationService) +- All REST routes (tickets, mail, docs, sage, state, vms, session) +- Svelte frontend with all panels (Tickets, Mail, Docs, Sage, VMs, Header) +- Built frontend (`frontend/dist/`) served by game server +- Content: Q001–Q008, T001–T008, I001–I003, all dialogue, world_flags, trust_unlocks +- Content validator: `validate-content.js` exits zero + +### Pending +- Phase 7: verify XFCE workstation VM (SPICE display, Chromium autostart, Tilix default) +- Phase 10: full end-to-end playtest (Q001→Q002 with real VMs) diff --git a/RTK.md b/RTK.md new file mode 100644 index 0000000..9ec5e02 --- /dev/null +++ b/RTK.md @@ -0,0 +1,31 @@ +# RTK Usage + +`rtk` is installed at `/home/aaron/.cargo/bin/rtk`. Use it by default for noisy shell commands so agent sessions spend fewer tokens on low-value output. + +Prefer `rtk` for: + +- directory and file discovery: `rtk ls`, `rtk tree`, `rtk find` +- search output: `rtk grep` +- tests and builds with noisy output: `rtk test`, `rtk npm test`, `rtk tsc`, `rtk lint` +- dependency and environment summaries: `rtk deps`, `rtk env` +- logs, JSON, diffs, and command summaries: `rtk log`, `rtk json`, `rtk diff`, `rtk summary` + +Use raw shell commands instead when exact, unfiltered, streaming, or interactive output matters. Examples: `sed -n` for precise line ranges, `tail -f` for live logs, `virsh console`, prompts, password entry, TTY tools, and commands whose full output is the thing being inspected. + +Examples: + +```bash +rtk ls -la +rtk tree -L 2 +rtk grep "READY_COMMAND" tools/vm +rtk npm test +rtk deps +``` + +Useful checks: + +```bash +rtk --version +rtk gain +rtk init --codex --show +``` diff --git a/company-website/about.html b/company-website/about.html new file mode 100644 index 0000000..f29fd40 --- /dev/null +++ b/company-website/about.html @@ -0,0 +1,152 @@ + + + + + + About — Axiom Works + + + + + + + + +
+ +
+
+

Where we started

+

Axiom Works was founded in 2011 by a small team of operations veterans who were tired of watching mid-size manufacturers paper over process problems with spreadsheets and tribal knowledge. The original product was a rules engine. It was not elegant, but it worked.

+

Over the next few years, that rules engine became AxiomFlow — a full workflow automation platform built for the realities of industrial operations: shift handoffs, exception handling, equipment downtime, and the kind of edge cases that enterprise vendors prefer not to demo.

+

We have been profitable since 2014. We have not taken outside investment. This is a deliberate choice.

+ +
+
+ 2011 + Founded +
+
+ 280 + Employees +
+
+ 140+ + Customers +
+
+
+
+

What we actually do

+

AxiomFlow automates the workflows that keep operations running — purchase approvals, quality checks, shift reports, compliance sign-offs, exception escalations. The kind of work that gets done in every facility but rarely appears in a vendor's use-case library.

+

Our customers are mostly mid-size manufacturers and logistics companies in the 200–2,000 employee range. They have real IT departments and real process complexity. They don't need a product designed for a 12-person SaaS startup.

+

We sell to operations leaders and implement with their IT teams. We do not use resellers. When something needs to be configured, a person from Axiom Works handles it.

+
+
+ +
+ +
+ +

How we work

+

These aren't values we arrived at in a workshop. They're conclusions from fourteen years of watching what works and what doesn't.

+ +
+
+
    +
  • + Reliability over features + A workflow automation platform that goes down during a shift is worse than no platform at all. Uptime is not a selling point. It is the baseline. +
  • +
  • + Customers are not case studies + We do not publish customer names without permission. We do not write up their implementations as thought leadership. Their problems are not content. +
  • +
  • + Slow is smooth, smooth is fast + We have never shipped a feature to meet a conference deadline. This is probably why our release notes are boring and our customers don't have to roll back. +
  • +
+
+
+
    +
  • + Support is not a department + Every customer has a named contact. Support issues get routed to the people who built the feature. The alternative is faster for us and worse for everyone. +
  • +
  • + Honest pricing + Our pricing is published. We do not have tiers designed to make the middle option look reasonable. The contract you sign is the contract you get. +
  • +
  • + We stay in our lane + We automate workflows for operations teams. We are not building an AI platform, a marketplace, or a suite of adjacent products nobody asked for. +
  • +
+
+
+
+ +
+ +
+ +

Talk to us

+

We respond to every inquiry. Usually the same day, always within 24 hours.

+ +
+
+
💬
+

Sales Inquiries

+

If you'd like to see a demo or talk through whether AxiomFlow is a fit for your operation, reach out to our sales team.

+

sales@axiomworks.com

+
+
+
🛠️
+

Customer Support

+

Existing customers can reach support directly. Your account contact's email is in your onboarding documentation.

+

support@axiomworks.com

+
+
+
🏢
+

Our Office

+

We're headquartered downtown, or close enough to it that we say downtown and nobody pushes back.

+

Axiom Works, Inc.
Downtown-Adjacent, Suite 300

+
+
+
+ +
+

Meet the team behind the platform

+

The people who build and support AxiomFlow have been doing this for a while. Some of them have been here since the rules engine days.

+ Our Team +
+ +
+ +
+ +

© 2025 Axiom Works, Inc. All rights reserved.  ·  About  ·  Products  ·  Our Team

+
+ + + diff --git a/company-website/assets/annika_gosse_ux_designer.png b/company-website/assets/annika_gosse_ux_designer.png new file mode 100644 index 0000000..21af969 Binary files /dev/null and b/company-website/assets/annika_gosse_ux_designer.png differ diff --git a/company-website/assets/ben_portillo_product_manager_axiomdash.png b/company-website/assets/ben_portillo_product_manager_axiomdash.png new file mode 100644 index 0000000..92f3c47 Binary files /dev/null and b/company-website/assets/ben_portillo_product_manager_axiomdash.png differ diff --git a/company-website/assets/cora_reyes_software_engineer.png b/company-website/assets/cora_reyes_software_engineer.png new file mode 100644 index 0000000..d1bfe2e Binary files /dev/null and b/company-website/assets/cora_reyes_software_engineer.png differ diff --git a/company-website/assets/dave-kowalski.png b/company-website/assets/dave-kowalski.png new file mode 100644 index 0000000..cc9ab2b Binary files /dev/null and b/company-website/assets/dave-kowalski.png differ diff --git a/company-website/assets/david_park_cto_cofounder.png b/company-website/assets/david_park_cto_cofounder.png new file mode 100644 index 0000000..9a421a8 Binary files /dev/null and b/company-website/assets/david_park_cto_cofounder.png differ diff --git a/company-website/assets/derek_ashford_financial_controller.png b/company-website/assets/derek_ashford_financial_controller.png new file mode 100644 index 0000000..3b6edb7 Binary files /dev/null and b/company-website/assets/derek_ashford_financial_controller.png differ diff --git a/company-website/assets/ellen_marsh_ceo_cofounder.png b/company-website/assets/ellen_marsh_ceo_cofounder.png new file mode 100644 index 0000000..d28236a Binary files /dev/null and b/company-website/assets/ellen_marsh_ceo_cofounder.png differ diff --git a/company-website/assets/james_osei_security_analyst.png b/company-website/assets/james_osei_security_analyst.png new file mode 100644 index 0000000..bcd7fc9 Binary files /dev/null and b/company-website/assets/james_osei_security_analyst.png differ diff --git a/company-website/assets/karen_volkov_coo.png b/company-website/assets/karen_volkov_coo.png new file mode 100644 index 0000000..d1f3644 Binary files /dev/null and b/company-website/assets/karen_volkov_coo.png differ diff --git a/company-website/assets/lisa_ferreira_customer_success_manager.png b/company-website/assets/lisa_ferreira_customer_success_manager.png new file mode 100644 index 0000000..784fdbe Binary files /dev/null and b/company-website/assets/lisa_ferreira_customer_success_manager.png differ diff --git a/company-website/assets/logo.png b/company-website/assets/logo.png new file mode 100644 index 0000000..ead0cc3 Binary files /dev/null and b/company-website/assets/logo.png differ diff --git a/company-website/assets/marcus-webb.png b/company-website/assets/marcus-webb.png new file mode 100644 index 0000000..41eeba6 Binary files /dev/null and b/company-website/assets/marcus-webb.png differ diff --git a/company-website/assets/mei_lin_senior_software_engineer.png b/company-website/assets/mei_lin_senior_software_engineer.png new file mode 100644 index 0000000..8a52da5 Binary files /dev/null and b/company-website/assets/mei_lin_senior_software_engineer.png differ diff --git a/company-website/assets/mike_kawamoto_account_executive.png b/company-website/assets/mike_kawamoto_account_executive.png new file mode 100644 index 0000000..5c79646 Binary files /dev/null and b/company-website/assets/mike_kawamoto_account_executive.png differ diff --git a/company-website/assets/nikhil_sharma_platform_engineer.png b/company-website/assets/nikhil_sharma_platform_engineer.png new file mode 100644 index 0000000..30cabea Binary files /dev/null and b/company-website/assets/nikhil_sharma_platform_engineer.png differ diff --git a/company-website/assets/owen_blake_office_manager.png b/company-website/assets/owen_blake_office_manager.png new file mode 100644 index 0000000..80634d8 Binary files /dev/null and b/company-website/assets/owen_blake_office_manager.png differ diff --git a/company-website/assets/phil_ruiz_vp_sales.png b/company-website/assets/phil_ruiz_vp_sales.png new file mode 100644 index 0000000..e14c36a Binary files /dev/null and b/company-website/assets/phil_ruiz_vp_sales.png differ diff --git a/company-website/assets/priya-nair.png b/company-website/assets/priya-nair.png new file mode 100644 index 0000000..5e165c3 Binary files /dev/null and b/company-website/assets/priya-nair.png differ diff --git a/company-website/assets/rachel_brandt_cfo.png b/company-website/assets/rachel_brandt_cfo.png new file mode 100644 index 0000000..4c01985 Binary files /dev/null and b/company-website/assets/rachel_brandt_cfo.png differ diff --git a/company-website/assets/rachel_huang_systems_administrator.png b/company-website/assets/rachel_huang_systems_administrator.png new file mode 100644 index 0000000..2088382 Binary files /dev/null and b/company-website/assets/rachel_huang_systems_administrator.png differ diff --git a/company-website/assets/sandra_wu_hr_manager.png b/company-website/assets/sandra_wu_hr_manager.png new file mode 100644 index 0000000..d29624c Binary files /dev/null and b/company-website/assets/sandra_wu_hr_manager.png differ diff --git a/company-website/assets/sarah-chen.png b/company-website/assets/sarah-chen.png new file mode 100644 index 0000000..d8139ca Binary files /dev/null and b/company-website/assets/sarah-chen.png differ diff --git a/company-website/assets/tanya_okafor_head_customer_success.png b/company-website/assets/tanya_okafor_head_customer_success.png new file mode 100644 index 0000000..8733afe Binary files /dev/null and b/company-website/assets/tanya_okafor_head_customer_success.png differ diff --git a/company-website/assets/tom_malaney_network_engineer.png b/company-website/assets/tom_malaney_network_engineer.png new file mode 100644 index 0000000..90ee245 Binary files /dev/null and b/company-website/assets/tom_malaney_network_engineer.png differ diff --git a/company-website/assets/yusuf_halabi_engineering_manager.png b/company-website/assets/yusuf_halabi_engineering_manager.png new file mode 100644 index 0000000..c297e06 Binary files /dev/null and b/company-website/assets/yusuf_halabi_engineering_manager.png differ diff --git a/company-website/index.html b/company-website/index.html new file mode 100644 index 0000000..382eca7 --- /dev/null +++ b/company-website/index.html @@ -0,0 +1,142 @@ + + + + + + Axiom Works — Workflow Automation for Modern Operations + + + + + + +
+
+

Enterprise Workflow Automation

+

Streamline. Scale. Succeed.

+

Axiom Works helps mid-size manufacturers and logistics companies automate the workflows that keep operations running — without the complexity that gets in the way.

+ +
+
+ +
+ +
+ +

Built for the way operations actually work

+

Most workflow tools are designed for software teams. AxiomFlow was designed for the people running shifts, managing fleets, and keeping production lines moving.

+ +
+
+
⚙️
+

Configurable Without Consultants

+

Your team can build and modify workflows without waiting on a vendor or a dedicated IT project. If you can describe the process, you can automate it.

+
+
+
📊
+

Visibility Across the Operation

+

AxiomDash gives managers a live view of what's moving, what's stalled, and where the bottlenecks are — without pulling reports by hand.

+
+
+
🔗
+

Connects to What You Have

+

We integrate with ERP systems, warehouse management tools, and the spreadsheets your team has been using since 2009. We don't ask you to start over.

+
+
+
🛡️
+

Supported by a Real Team

+

Every customer gets a named support contact. When something breaks during a shift, you call a person — not a ticketing system that routes you to a chatbot.

+
+
+
+ +
+ +
+ +

The AxiomFlow platform

+

A connected suite of tools for workflow automation, reporting, and system integration — built to run reliably at scale.

+ +
+
+
🔄
+

AxiomFlow

+

The core workflow automation platform. Define processes, assign tasks, set triggers, and track completion — all in one place. Trusted by over 140 customers across manufacturing, logistics, and distribution.

+ Flagship Product +
+
+
📈
+

AxiomDash

+

Real-time reporting and analytics built on top of your AxiomFlow data. Track KPIs, spot trends, and share dashboards with stakeholders — without an analyst in the loop.

+ Analytics Add-On +
+
+
🔌
+

AxiomSync

+

Our legacy data integration layer, connecting older systems to AxiomFlow where modern connectors aren't available. Available for existing customers on legacy contracts.

+ Legacy +
+
+
+ +
+ +
+ +

A track record that holds up

+

We've been doing this since 2011. The numbers reflect what happens when you focus on one thing and keep doing it well.

+ +
+
+ 140+ + Active Customers +
+
+ 14 + Years in Business +
+
+ 280 + Employees +
+
+ 99.6% + Uptime (12-Month Avg) +
+
+
+ +
+

Ready to see AxiomFlow in action?

+

Schedule a 30-minute demo with one of our solutions engineers. No slides, no pitch deck — just the product.

+ Request a Demo +
+ +
+ +
+ +

© 2025 Axiom Works, Inc. All rights reserved.  ·  About  ·  Products  ·  Our Team

+
+ + + diff --git a/company-website/people.html b/company-website/people.html new file mode 100644 index 0000000..c428710 --- /dev/null +++ b/company-website/people.html @@ -0,0 +1,385 @@ + + + + + + Our Team — Axiom Works + + + + + + + + + +
+ + +
+ +

The people running the company

+

Axiom Works has been founder-led since 2011. The leadership team is small and has been largely stable since 2015.

+ +
+ +
+ Ellen Marsh + +
+

Ellen Marsh

+

CEO & Co-Founder

+

Ellen built the first version of AxiomFlow's rules engine after a decade running operations at a mid-size manufacturer and deciding the tools available were not good enough. She has no CS background, which is probably why the product ended up designed for people who don't either. Attends all-hands twice a year. Has final say on pricing and customer commitments. Does not use Slack.

+
+
+ +
+ David Park + +
+

David Park

+

CTO & Co-Founder

+

Wrote the original rules engine in 2011 and has been quietly refactoring it ever since. David now manages engineering managers rather than engineers, which he describes as an acceptable trade. Reviews architecture decisions. Still has opinions about the data model. Has a standing Thursday meeting with security that hasn't moved since 2017.

+
+
+ +
+ Karen Volkov + +
+

Karen Volkov

+

Chief Operating Officer

+

Joined in 2014 to turn a functional startup into a company that could scale past 50 people. Responsible for the fact that Axiom Works has documented processes for anything at all. Has opinions about infrastructure costs that occasionally surface in IT's world via Finance. Prefers decisions with clear owners and deadlines.

+
+
+ +
+ Rachel Brandt + +
+

Rachel Brandt

+

Chief Financial Officer

+

Joined in 2016 from a regional accounting firm that handled several of Axiom Works' early customers. Has been working to consolidate the company's cloud spend since 2019. Methodical. Approves all capital expenditure over $5,000. Does not enjoy surprises in the infrastructure budget.

+
+
+ +
+
+ +
+ + +
+ +

Getting customers and keeping them

+

Axiom Works does not use resellers. Every customer relationship runs through this team.

+ +
+ +
+ Phil Ruiz + +

Phil Ruiz

+

VP of Sales

+

Has been promising features to prospects since 2016. Maintains a warm relationship with infrastructure because Marcus once fixed the staging environment with twenty minutes to spare before a demo. Travels frequently. Expense reports submitted promptly.

+
+ +
+ Tanya Okafor + +

Tanya Okafor

+

Head of Customer Success

+

Manages post-sale relationships for all AxiomFlow customers and the twelve AxiomSync accounts that haven't migrated yet. Uses the word "partnership" a lot. Usually the first person to know when something is wrong in production, because a customer has already called her.

+
+ +
+ Mike Kawamoto + +

Mike Kawamoto

+

Account Executive

+

Handles mid-market manufacturing accounts in the northeast. Has closed more deals in Q4 than any other quarter for four years running. Believes strongly in the demo environment.

+
+ +
+ Lisa Ferreira + +

Lisa Ferreira

+

Customer Success Manager

+

Manages onboarding for new AxiomFlow deployments. Responsible for the onboarding documentation that actually gets used, as opposed to the documentation that exists. Has a talent for figuring out what customers mean rather than what they say.

+
+ +
+
+ +
+ + +
+ +

What we build and why

+

The product team defines the roadmap and answers for it when the roadmap turns out to be wrong.

+ +
+ +
+ Sarah Chen + +

Sarah Chen

+

Product Manager, AxiomFlow

+

Owns the AxiomFlow roadmap. Coordinates between sales, engineering, and customers to decide what gets built and in what order. Has strong feelings about the demo environment because it's the product she can see. Emails Monday mornings.

+
+ +
+ Ben Portillo + +

Ben Portillo

+

Product Manager, AxiomDash

+

Leads product development for the analytics add-on. Works closely with the largest accounts to understand what they actually want from dashboards, which is usually different from what they asked for.

+
+ +
+ Annika Gosse + +

Annika Gosse

+

UX Designer

+

Responsible for AxiomFlow's interface layer. Has been advocating for a redesign of the workflow builder since 2022. Produces research that is read carefully and then partially implemented. Patient.

+
+ +
+
+ +
+ + +
+ +

The people who build it

+

The engineering team is distributed across product development, integrations, and platform reliability.

+ +
+ +
+ Yusuf Halabi + +

Yusuf Halabi

+

Engineering Manager

+

Reports to the CTO and manages the core AxiomFlow platform team. Has opinions about test coverage. Occasionally leaves pull request comments that are technically correct and diplomatically suboptimal. Runs the Thursday architecture review.

+
+ +
+ Mei Lin + +

Mei Lin

+

Senior Software Engineer

+

Has maintained AxiomSync's integration layer since 2018. Knows more about it than anyone would prefer, including herself. Currently leading the migration tooling project to help the remaining AxiomSync customers off the platform. Thorough commit messages.

+
+ +
+ Cora Reyes + +

Cora Reyes

+

Software Engineer

+

Works on the AxiomDash reporting pipeline. Joined in 2022 as a mid-level hire and has been moving steadily toward senior. Has submitted more internal RFCs than anyone else on the team in the past year.

+
+ +
+ Nikhil Sharma + +

Nikhil Sharma

+

Platform Engineer

+

Owns the build and release pipeline, the internal CI infrastructure, and the parts of the deployment process that nobody else wants to think about. Has strong opinions about reproducible builds. Occasionally sends Slack messages at 6am.

+
+ +
+
+ +
+ + +
+ +

Keeping everything running

+

The team that manages internal systems, the hosted demo environments, and the infrastructure that everything else depends on.

+ +
+ +
+ Dave Kowalski + +

Dave Kowalski

+

Director of IT Operations

+

Oversees systems, networking, and IT support. Background is originally in network engineering. Has been with Axiom Works since 2015. Describes the infrastructure as mature. Has said "we should really document that" more times than he would admit.

+
+ +
+ Marcus Webb + +

Marcus Webb

+

Senior Systems Administrator

+

Six years at Axiom Works. Knows where everything is and why it's there. Communicates efficiently. Available on Slack during business hours and occasionally at 11pm when something is on his mind.

+
+ +
+ Rachel Huang + +

Rachel Huang

+

Systems Administrator

+

Handles provisioning, patch cycles, and the ongoing negotiation with finance over cloud consolidation. Came from a managed services background. Has strong opinions about monitoring dashboards, most of which are correct.

+
+ +
+ Tom Malaney + +

Tom Malaney

+

Network Engineer

+

Responsible for network infrastructure across the office and the hosted environments. Has been on-call for more holiday weekends than he prefers to discuss. Thorough in documentation when he finds time to write it.

+
+ +
+
+ +
+ + +
+ +

Risk, access, and the things that matter when they go wrong

+

Security at Axiom Works is treated as a function, not a checkbox.

+ +
+ +
+ Priya Nair + +

Priya Nair

+

Head of Security & Compliance

+

Leads all security reviews, access audits, and compliance programmes. Frames concerns in terms of what happens when things go wrong, rather than whether they will. Usually correct. Not someone who appreciates being told about a change after it's already in production.

+
+ +
+ James Osei + +

James Osei

+

Security Analyst

+

Handles vulnerability assessments, access reviews, and quarterly compliance reporting. Methodical. Has a spreadsheet for everything, which is not a criticism.

+
+ +
+
+ +
+ + +
+ +

The numbers and the people who manage them

+

A small team that keeps the books, manages the office, and appears on CC lines of emails that involve infrastructure spending.

+ +
+ +
+ Derek Ashford + +

Derek Ashford

+

Financial Controller

+

Manages financial reporting, budget tracking, and vendor contracts. Does not appear at team meetings. Does appear on CC lines of any email that mentions cloud costs, hardware procurement, or infrastructure budget. Always replies-all.

+
+ +
+ Sandra Wu + +

Sandra Wu

+

HR Manager

+

Manages hiring, onboarding, and employee relations. Has been with Axiom Works since 2016. Responsible for the onboarding process that new employees go through, which is thorough and takes three days. Sends birthday emails on time, every time.

+
+ +
+ Owen Blake + +

Owen Blake

+

Office Manager

+

Keeps the office running. Manages facilities, supplies, vendor relationships for non-technical services, and the kitchen situation. Has fixed more things than his job title implies. The person you contact if the conference room equipment stops working.

+
+ +
+
+ +
+

We're hiring — carefully

+

We add people slowly and try to keep them. Open roles are listed on our careers page. We don't use recruiters.

+ Get in Touch +
+ +
+ +
+ +

© 2025 Axiom Works, Inc. All rights reserved.  ·  About  ·  Products  ·  Our Team

+
+ + + diff --git a/company-website/products.html b/company-website/products.html new file mode 100644 index 0000000..8324bf8 --- /dev/null +++ b/company-website/products.html @@ -0,0 +1,150 @@ + + + + + + Products — Axiom Works + + + + + + + + +
+ +
+ +

One platform, built for operations

+

AxiomFlow is the core. AxiomDash extends it with analytics. AxiomSync bridges older systems where needed. All three are designed to run together — or independently, where that's what makes sense.

+
+ + +
+
+ Flagship +

AxiomFlow

+

Workflow automation platform for mid-size manufacturers, logistics providers, and distribution operations. Define, deploy, and monitor business processes without a development team or a multi-quarter implementation project.

+

AxiomFlow handles the workflows that matter most: approvals, task routing, exception handling, compliance sign-offs, shift handoffs, and the dozens of other processes that run every day and fail quietly when something goes wrong.

+

Currently active across 140+ customers. Most deployments are live within 60–90 days of contract signing.

+
+
+
    +
  • Visual workflow builder — no code required for standard processes
  • +
  • Role-based task routing with fallback escalation rules
  • +
  • Trigger-based automation: time, event, threshold, or external webhook
  • +
  • Audit trail on every workflow action — immutable, exportable
  • +
  • Exception handling with configurable escalation paths
  • +
  • Shift and calendar-aware scheduling
  • +
  • ERP and WMS integration via REST API and native connectors
  • +
  • Single-sign-on (SAML 2.0, OIDC)
  • +
  • On-premise or private cloud deployment
  • +
  • 99.6% uptime SLA (12-month rolling average)
  • +
+
+
+ + +
+
+ Analytics Add-On +

AxiomDash

+

Reporting and analytics built directly on top of your AxiomFlow data. No ETL pipeline, no separate database, no BI tool license to negotiate. If it happened in AxiomFlow, AxiomDash can show it.

+

Designed for operations managers and team leads who need a live view of what's moving, what's overdue, and where the recurring problems are — without pulling reports by hand or waiting on an analyst.

+
+
+
    +
  • Pre-built dashboards for common operational metrics
  • +
  • Custom dashboard builder with drag-and-drop layout
  • +
  • Live data — no scheduled refresh, no stale snapshots
  • +
  • Threshold alerts via email or webhook
  • +
  • Shareable read-only views for stakeholders without platform access
  • +
  • Export to CSV, PDF, or scheduled email delivery
  • +
  • Role-based visibility — teams see their data, not each other's
  • +
  • Available as an add-on to any AxiomFlow subscription
  • +
+
+
+ + +
+
+ End of Sale +

AxiomSync

+

Legacy data integration layer for connecting older systems — primarily pre-2015 ERP installations and proprietary shop-floor software — to AxiomFlow where modern API connectors aren't available.

+

AxiomSync has been end-of-sale since 2021. It remains in active maintenance for existing customers on legacy contracts. No new deployments are supported.

+

Customers still on AxiomSync are encouraged to discuss migration options with their account contact. The migration path to native AxiomFlow connectors is well-documented and typically takes one to two quarters depending on integration complexity.

+
+
+
    +
  • File-based and database-level integration for legacy systems
  • +
  • Scheduled sync jobs with configurable polling intervals
  • +
  • Transform and mapping layer for data normalization
  • +
  • Error logging and alerting for failed sync events
  • +
  • Maintained for existing customers through end of current contract terms
  • +
+

If you are an AxiomSync customer and have questions about your contract or migration timeline, contact your account representative directly.

+
+
+ +
+ +
+ +

How it runs

+

AxiomFlow is designed to run in your environment — not ours. We support private cloud and on-premise deployments for customers with data residency or security requirements that preclude multi-tenant SaaS.

+ +
+
+
🏗️
+

On-Premise

+

Full installation in your data centre. You own the stack. We provide the software, the documentation, and the support. Suitable for customers with strict data residency requirements or existing on-prem infrastructure.

+
+
+
☁️
+

Private Cloud

+

Deployed in your cloud tenancy (AWS, Azure, or GCP). Single-tenant. Your VPC, your keys, your audit logs. We handle the application layer; you retain control of the infrastructure.

+
+
+
🤝
+

Managed Hosting

+

For customers who want the isolation of a private deployment without the operational overhead, we offer managed single-tenant hosting in our infrastructure. Contact sales for availability.

+
+
+
+ +
+

See it running in your scenario

+

We'll walk through a demo built around your actual processes, not a generic workflow that happens to look impressive on a projector.

+ Request a Demo +
+ +
+ +
+ +

© 2025 Axiom Works, Inc. All rights reserved.  ·  About  ·  Products  ·  Our Team

+
+ + + diff --git a/company-website/style.css b/company-website/style.css new file mode 100644 index 0000000..f5d7aac --- /dev/null +++ b/company-website/style.css @@ -0,0 +1,377 @@ +*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + +:root { + --navy: #1b3558; + --navy-dk: #112240; + --blue: #2563a0; + --blue-lt: #3b82c4; + --bg: #f4f6f9; + --white: #ffffff; + --text: #1a1f2e; + --muted: #6b7280; + --border: #d1d9e6; + --shadow: 0 1px 4px rgba(0,0,0,.10), 0 4px 16px rgba(0,0,0,.06); + --r: 6px; + --max: 1100px; +} + +body { + font-family: system-ui, -apple-system, "Segoe UI", Helvetica, Arial, sans-serif; + font-size: 16px; + line-height: 1.6; + color: var(--text); + background: var(--bg); +} + +a { color: var(--blue); text-decoration: none; } +a:hover { text-decoration: underline; } +img { display: block; max-width: 100%; } + +/* ── NAV ─────────────────────────────────────────── */ +nav { + background: var(--navy-dk); + position: sticky; + top: 0; + z-index: 10; +} + +.nav-inner { + max-width: var(--max); + margin: 0 auto; + padding: 0 1.5rem; + display: flex; + align-items: center; + gap: 2.5rem; + height: 60px; +} + +.nav-logo { + display: flex; + align-items: center; + gap: 0.75rem; + text-decoration: none; +} + +.nav-logo img { height: 32px; width: 32px; border-radius: 4px; } + +.nav-logo span { + color: #ffffff; + font-size: 1.05rem; + font-weight: 600; + letter-spacing: 0.01em; +} + +.nav-links { + display: flex; + gap: 0; + list-style: none; + margin-left: auto; +} + +.nav-links a { + display: block; + color: rgba(255,255,255,.78); + font-size: 0.9rem; + padding: 0 1rem; + line-height: 60px; + letter-spacing: 0.01em; + transition: color .15s, background .15s; +} + +.nav-links a:hover, +.nav-links a.active { color: #fff; background: rgba(255,255,255,.08); text-decoration: none; } + +/* ── HERO ────────────────────────────────────────── */ +.hero { + background: linear-gradient(135deg, var(--navy-dk) 0%, var(--navy) 55%, var(--blue) 100%); + color: #fff; + padding: 6rem 1.5rem 5rem; + text-align: center; +} + +.hero-inner { max-width: 680px; margin: 0 auto; } + +.hero-eyebrow { + text-transform: uppercase; + letter-spacing: 0.18em; + font-size: 0.8rem; + color: rgba(255,255,255,.6); + margin-bottom: 1rem; +} + +.hero h1 { + font-size: clamp(2.2rem, 5vw, 3.4rem); + font-weight: 700; + line-height: 1.15; + margin-bottom: 1.25rem; +} + +.hero p { + font-size: 1.15rem; + color: rgba(255,255,255,.82); + margin-bottom: 2.25rem; + max-width: 520px; + margin-left: auto; + margin-right: auto; +} + +.btn { + display: inline-block; + padding: 0.8rem 2rem; + border-radius: var(--r); + font-size: 0.95rem; + font-weight: 600; + cursor: pointer; + transition: filter .15s; +} + +.btn:hover { filter: brightness(1.1); text-decoration: none; } + +.btn-primary { background: #fff; color: var(--navy); } +.btn-outline { background: transparent; color: #fff; border: 2px solid rgba(255,255,255,.5); margin-left: 0.75rem; } + +/* ── PAGE WRAPPER ────────────────────────────────── */ +.page { max-width: var(--max); margin: 0 auto; padding: 3.5rem 1.5rem 5rem; } + +/* ── SECTION HEADINGS ────────────────────────────── */ +.section-label { + text-transform: uppercase; + letter-spacing: 0.14em; + font-size: 0.75rem; + color: var(--blue); + font-weight: 600; + margin-bottom: 0.5rem; +} + +h2.section-title { + font-size: 1.8rem; + font-weight: 700; + color: var(--navy-dk); + margin-bottom: 0.75rem; +} + +.section-intro { + color: var(--muted); + max-width: 580px; + margin-bottom: 2.5rem; +} + +/* ── CARDS ───────────────────────────────────────── */ +.card-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); + gap: 1.5rem; +} + +.card { + background: var(--white); + border: 1px solid var(--border); + border-radius: var(--r); + padding: 1.75rem; + box-shadow: var(--shadow); +} + +.card-icon { + width: 42px; + height: 42px; + border-radius: 8px; + background: var(--navy); + display: flex; + align-items: center; + justify-content: center; + margin-bottom: 1.1rem; + font-size: 1.3rem; +} + +.card h3 { font-size: 1.1rem; font-weight: 600; color: var(--navy-dk); margin-bottom: 0.5rem; } +.card p { font-size: 0.9rem; color: var(--muted); line-height: 1.6; } + +.card-tag { + display: inline-block; + margin-top: 1rem; + padding: 0.2rem 0.6rem; + border-radius: 4px; + font-size: 0.75rem; + font-weight: 600; + background: #e8f0fb; + color: var(--blue); +} + +.card-tag.legacy { background: #f0f0f0; color: var(--muted); } + +/* ── PEOPLE GRID ─────────────────────────────────── */ +.people-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 2rem; +} + +.person-card { + background: var(--white); + border: 1px solid var(--border); + border-radius: var(--r); + padding: 1.75rem 1.5rem; + box-shadow: var(--shadow); + text-align: center; +} + +.person-photo { + width: 110px; + height: 110px; + border-radius: 50%; + object-fit: cover; + margin: 0 auto 1.1rem; + border: 3px solid var(--border); +} + +.person-initial { + width: 110px; + height: 110px; + border-radius: 50%; + background: var(--navy); + color: #fff; + font-size: 2rem; + font-weight: 700; + display: flex; + align-items: center; + justify-content: center; + margin: 0 auto 1.1rem; +} + +.person-card h3 { font-size: 1rem; font-weight: 600; color: var(--navy-dk); margin-bottom: 0.25rem; } +.person-card .title { font-size: 0.82rem; color: var(--blue); font-weight: 500; margin-bottom: 0.5rem; } +.person-card p { font-size: 0.83rem; color: var(--muted); line-height: 1.55; } + +/* ── ABOUT SECTIONS ──────────────────────────────── */ +.about-block { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 3rem; + align-items: start; + margin-bottom: 3.5rem; +} + +@media (max-width: 720px) { + .about-block { grid-template-columns: 1fr; gap: 2rem; } +} + +.about-block h2 { font-size: 1.55rem; font-weight: 700; color: var(--navy-dk); margin-bottom: 1rem; } +.about-block p { color: var(--muted); margin-bottom: 0.9rem; font-size: 0.95rem; } + +.stat-row { display: flex; gap: 2rem; flex-wrap: wrap; margin-top: 2rem; } + +.stat { text-align: center; } +.stat-num { font-size: 2rem; font-weight: 700; color: var(--navy); display: block; } +.stat-lbl { font-size: 0.78rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--muted); } + +.values-list { list-style: none; } +.values-list li { + padding: 1rem 1.2rem; + border-left: 3px solid var(--blue); + margin-bottom: 1rem; + background: var(--white); + border-radius: 0 var(--r) var(--r) 0; + box-shadow: var(--shadow); +} + +.values-list li strong { display: block; color: var(--navy-dk); margin-bottom: 0.2rem; font-size: 0.95rem; } +.values-list li span { font-size: 0.87rem; color: var(--muted); } + +/* ── PRODUCT DETAIL ──────────────────────────────── */ +.product-feature { + display: grid; + grid-template-columns: 1fr 1.5fr; + gap: 2.5rem; + align-items: start; + background: var(--white); + border: 1px solid var(--border); + border-radius: var(--r); + padding: 2rem; + margin-bottom: 1.5rem; + box-shadow: var(--shadow); +} + +@media (max-width: 640px) { + .product-feature { grid-template-columns: 1fr; } +} + +.product-feature.legacy { opacity: 0.7; } + +.product-badge { + display: inline-block; + padding: 0.35rem 0.9rem; + border-radius: 4px; + font-size: 0.78rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.06em; + background: #e8f0fb; + color: var(--blue); + margin-bottom: 0.8rem; +} + +.product-badge.main { background: var(--navy); color: #fff; } +.product-badge.end-of-sale { background: #f0f0f0; color: var(--muted); } + +.product-feature h3 { font-size: 1.3rem; font-weight: 700; color: var(--navy-dk); margin-bottom: 0.5rem; } +.product-feature p { color: var(--muted); font-size: 0.92rem; line-height: 1.6; margin-bottom: 0.9rem; } + +.feature-list { list-style: none; } +.feature-list li { + font-size: 0.88rem; + color: var(--muted); + padding: 0.3rem 0; + padding-left: 1.2rem; + position: relative; +} +.feature-list li::before { content: "✓"; position: absolute; left: 0; color: var(--blue-lt); font-weight: 700; } + +/* ── BANNER ──────────────────────────────────────── */ +.cta-banner { + background: linear-gradient(135deg, var(--navy-dk), var(--blue)); + color: #fff; + border-radius: var(--r); + padding: 3rem 2rem; + text-align: center; + margin-top: 3rem; +} + +.cta-banner h2 { font-size: 1.6rem; margin-bottom: 0.75rem; } +.cta-banner p { color: rgba(255,255,255,.75); margin-bottom: 1.5rem; } + +/* ── FOOTER ──────────────────────────────────────── */ +footer { + background: var(--navy-dk); + color: rgba(255,255,255,.5); + font-size: 0.82rem; + text-align: center; + padding: 2rem 1.5rem; + margin-top: 0; +} + +footer a { color: rgba(255,255,255,.6); } +footer .footer-logo { + display: flex; + align-items: center; + justify-content: center; + gap: 0.5rem; + margin-bottom: 0.75rem; +} +footer .footer-logo img { height: 22px; width: 22px; opacity: 0.8; } +footer .footer-logo span { color: rgba(255,255,255,.75); font-weight: 600; font-size: 0.9rem; } + +/* ── PAGE HEADER (inner pages) ───────────────────── */ +.page-header { + background: linear-gradient(135deg, var(--navy-dk), var(--navy)); + color: #fff; + padding: 3.5rem 1.5rem; + text-align: center; +} + +.page-header h1 { font-size: clamp(1.8rem, 4vw, 2.6rem); margin-bottom: 0.5rem; } +.page-header p { color: rgba(255,255,255,.7); font-size: 1rem; max-width: 540px; margin: 0 auto; } + +/* ── DIVIDER ─────────────────────────────────────── */ +.divider { border: none; border-top: 1px solid var(--border); margin: 2.5rem 0; } + +hr.section-divider { border: none; border-top: 1px solid var(--border); margin: 3rem 0; } diff --git a/content/dialogue/marcus-Q001.json b/content/dialogue/marcus-Q001.json new file mode 100644 index 0000000..cd701bd --- /dev/null +++ b/content/dialogue/marcus-Q001.json @@ -0,0 +1,39 @@ +{ + "id": "marcus-Q001", + "character": "marcus", + "quest_id": "Q001", + "series_id": "marcus-main", + "series_position": 1, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "The onboarding doc has your key and the path you need. It's in /etc/axiom/onboarding on ares once you're in. Or ask me and I'll paste it here. Either way." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "Start in your home directory. You need a .ssh folder if it does not exist yet. Then authorized_keys inside it." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "The permissions matter more than people expect. SSH will silently refuse a key if the file or the directory is group-writable. 700 on the folder, 600 on the file." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "mkdir -p ~/.ssh && chmod 700 ~/.ssh. Then echo your public key into ~/.ssh/authorized_keys and chmod 600 that file. That is the whole thing." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:player_ssh_configured", + "body": "Good. You're in. I'll send you the next thing shortly. The coffee machine on this floor is broken, heads up." + }, + { + "stage": "complete-permissive", + "trigger": "world_flag:player_loose_permissions", + "body": "Key's in there. One thing though — check the permissions on that file. SSH is picky about it. Might not bite you today but it will eventually." + } + ] +} diff --git a/content/dialogue/marcus-Q002.json b/content/dialogue/marcus-Q002.json new file mode 100644 index 0000000..9c37126 --- /dev/null +++ b/content/dialogue/marcus-Q002.json @@ -0,0 +1,39 @@ +{ + "id": "marcus-Q002", + "character": "marcus", + "quest_id": "Q002", + "series_id": "marcus-main", + "series_position": 2, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Sarah's ticket is real. The site's down. Hermes is the web server — you can SSH from ares. Have a look at what nginx is doing." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "If nginx won't start, it usually tells you why. Try nginx -t before you touch anything else." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "Whatever the error says, it will include a file path and a line number. Go look at that exact spot." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "Config syntax errors are usually small. Missing semicolons, wrong brackets, typos on directive names. Read it carefully." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:nginx_stable", + "body": "Good. Sarah will see it come back up. Worth checking systemctl is-enabled nginx while you're there — if someone broke the config they may have been poking around other things too." + }, + { + "stage": "complete-not-enabled", + "trigger": "world_flag:nginx_unstable", + "body": "It's running. But if that machine reboots for any reason nginx won't come back up automatically. You might want to fix that before Sarah notices." + } + ] +} diff --git a/content/dialogue/marcus-Q003.json b/content/dialogue/marcus-Q003.json new file mode 100644 index 0000000..1b98242 --- /dev/null +++ b/content/dialogue/marcus-Q003.json @@ -0,0 +1,44 @@ +{ + "id": "marcus-Q003", + "character": "marcus", + "quest_id": "Q003", + "series_id": "marcus-main", + "series_position": 3, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Dave's report is vague but something is wrong on hermes. I'd start by looking at resource utilization before assuming it's the application." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "Check disk. df -h is your friend. Web servers write logs constantly and nobody always remembers to set up rotation." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "If you find a big file, don't just delete it — figure out why it got that big. Is logrotate configured for nginx? Check /etc/logrotate.d/." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "The default nginx logrotate config is in the nginx package. dpkg -L nginx | grep logrotate might give you somewhere to start. Or just write a correct one — it's about ten lines." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_logrotate_healthy", + "body": "Nice. That was the right call — clearing the space and fixing what caused it. Logrotate problems have a way of coming back if you don't actually fix them." + }, + { + "stage": "complete-norotate", + "trigger": "world_flag:hermes_log_pressure_pending", + "body": "Space is back. But if you didn't fix the rotation config that log is going to grow again. Something to keep an eye on." + }, + { + "stage": "complete-down", + "trigger": "world_flag:hermes_web_down", + "body": "nginx is inactive now? That's worse than the disk problem. Restarting it without fixing why it died isn't a fix, it's a delay. Check what happened before you start it again." + } + ] +} diff --git a/content/dialogue/marcus-Q004.json b/content/dialogue/marcus-Q004.json new file mode 100644 index 0000000..d8c8ef3 --- /dev/null +++ b/content/dialogue/marcus-Q004.json @@ -0,0 +1,39 @@ +{ + "id": "marcus-Q004", + "character": "marcus", + "quest_id": "Q004", + "series_id": "marcus-main", + "series_position": 4, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Sarah's deploy thing is interesting. If the script said it ran fine but the files didn't change, something is blocking the write. I'd look at ownership before I touch the script." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "ls -la on the web root. If those files are owned by root and the deploy runs as www-data, that's your problem." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "chown. And use -R unless you enjoy doing it twice." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "chown -R www-data:www-data /var/www/axiomworks. Then you can trigger the deploy service to confirm it takes." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_deploy_healthy", + "body": "Good. Someone ran that deploy as root at some point. Worth figuring out who has sudo on hermes and whether they should." + }, + { + "stage": "complete-partial", + "trigger": "world_flag:hermes_deploy_partial", + "body": "Ownership is fixed on the directory but I'm not sure the files inside are correct. Sarah might still hit issues on the next deploy." + } + ] +} diff --git a/content/dialogue/marcus-Q005.json b/content/dialogue/marcus-Q005.json new file mode 100644 index 0000000..c38ecb8 --- /dev/null +++ b/content/dialogue/marcus-Q005.json @@ -0,0 +1,44 @@ +{ + "id": "marcus-Q005", + "character": "marcus", + "quest_id": "Q005", + "series_id": "marcus-main", + "series_position": 5, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Dave's disk alert is on /var/backups this time, not /var/log. That's a different problem. Something to do with the backup job probably." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "Look at what owns the files in that directory. If it's root and the backup agent is supposed to manage them, someone ran something as the wrong user." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "Check /etc/cron.d/. Jobs in there can specify a user on the line. If there's no user field it defaults to root." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "The line format is: schedule user command. If yours is just: schedule command — that's the problem. Add the user field." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_backup_healthy", + "body": "Good catch on the ownership cleanup too. A lot of people would have just fixed the cron line and left the old root-owned files sitting there." + }, + { + "stage": "complete-partial", + "trigger": "world_flag:hermes_backup_partial", + "body": "Cron's correct now. The old files are still owned by root though — the retention script won't be able to clean them up. Worth sorting that out before the disk fills again." + }, + { + "stage": "complete-wrong", + "trigger": "world_flag:hermes_backup_root_running", + "body": "Disk's clear. But what was actually running that job? If root is still running it that directory is going to fill up again." + } + ] +} diff --git a/content/dialogue/marcus-Q006.json b/content/dialogue/marcus-Q006.json new file mode 100644 index 0000000..e51e94d --- /dev/null +++ b/content/dialogue/marcus-Q006.json @@ -0,0 +1,39 @@ +{ + "id": "marcus-Q006", + "character": "marcus", + "quest_id": "Q006", + "series_id": "marcus-main", + "series_position": 6, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Vulcan is Arch. Different from what you've been working on. Package manager is pacman, not apt. Same concepts, different commands. Signature errors usually mean keyring or clock problems." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "Check what time that machine thinks it is. timedatectl. If NTP isn't running the clock drifts and GPG signatures start looking like they're from the future." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "systemctl enable --now systemd-timesyncd. Then wait a moment for sync, and try pacman again. You may also need to refresh the keyring." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "pacman -S archlinux-keyring to refresh. Then pacman -Syu should work." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:vulcan_builds_healthy", + "body": "Clock drift breaking pacman is one of those things that seems unrelated until you've seen it twice. You'll spot it immediately next time." + }, + { + "stage": "complete-fragile", + "trigger": "world_flag:vulcan_ntp_fragile", + "body": "Timesyncd is running and builds work. It's not enabled at boot though — worth fixing that so the next reboot doesn't put you back here." + } + ] +} diff --git a/content/dialogue/marcus-Q007.json b/content/dialogue/marcus-Q007.json new file mode 100644 index 0000000..262d36f --- /dev/null +++ b/content/dialogue/marcus-Q007.json @@ -0,0 +1,39 @@ +{ + "id": "marcus-Q007", + "character": "marcus", + "quest_id": "Q007", + "series_id": "marcus-main", + "series_position": 7, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "Priya can't get into hermes. Something in the SSH config changed. Figure out what it was and restore her access without creating a new problem." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "sshd_config is where SSH restrictions live. Look for AllowUsers or AllowGroups. One of those is either missing her or was set wrong." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "AllowGroups is the right pattern — it scales. AllowUsers is a list you have to maintain manually. Either works, but think about which one you want to be maintaining in six months." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_ssh_hardened_correct", + "body": "AllowGroups with web-admin. That's the correct way to do it. Users in the group get access, users not in the group don't. No list to maintain." + }, + { + "stage": "complete-fragile", + "trigger": "world_flag:hermes_ssh_allowusers_fragile", + "body": "Priya's back in. That AllowUsers list is going to need a line added every time someone new needs access. Worth switching to group-based before it becomes a problem." + }, + { + "stage": "complete-regression", + "trigger": "world_flag:hermes_ssh_unrestricted", + "body": "Access is restored but the hardening is gone. That restriction was there for a reason — SSH open to everyone on hermes isn't a great position to be in." + } + ] +} diff --git a/content/dialogue/marcus-Q008.json b/content/dialogue/marcus-Q008.json new file mode 100644 index 0000000..6c27a6c --- /dev/null +++ b/content/dialogue/marcus-Q008.json @@ -0,0 +1,44 @@ +{ + "id": "marcus-Q008", + "character": "marcus", + "quest_id": "Q008", + "series_id": "marcus-main", + "series_position": 8, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "App's down after an update. First question is always: what changed. Sarah says a new package version came in. I'd start by looking at whether the binary actually runs." + }, + { + "stage": "hint_1", + "trigger": "player_requested_help", + "body": "journalctl -u axiomworks-app. If it's failing immediately, it's probably the binary itself, not config. Try running it directly and see what the error is." + }, + { + "stage": "hint_2", + "trigger": "player_requested_help_again", + "body": "If the binary is bad, figure out where the package came from. pacman -Qi axiomworks-app will show you the repo. If it's coming from vulcan, go look at what they built." + }, + { + "stage": "hint_3", + "trigger": "player_requested_help_again", + "body": "You can roll back with pacman -U /var/cache/pacman/pkg/ if the old package is still cached. Or go to the repo on vulcan and look for an older version." + }, + { + "stage": "complete-rollback", + "trigger": "world_flag:hermes_app_pinned_2-1-0", + "body": "Solid. Pinning the version means the next update cycle won't pull the broken one back in. Someone needs to fix that build on vulcan at some point though." + }, + { + "stage": "complete-unpinned", + "trigger": "world_flag:hermes_app_running", + "body": "App's running again. Is the version pinned? If not the next pacman -Syu is going to pull 2.1.1 back in and you'll be back here." + }, + { + "stage": "complete-rebuild", + "trigger": "world_flag:vulcan_build_fixed", + "body": "You fixed it at the source. That's the right call if you have time for it. What was wrong with the build?" + } + ] +} diff --git a/content/dialogue/marcus-day-one.json b/content/dialogue/marcus-day-one.json new file mode 100644 index 0000000..620c0ab --- /dev/null +++ b/content/dialogue/marcus-day-one.json @@ -0,0 +1,19 @@ +{ + "id": "marcus-day-one", + "character": "marcus", + "quest_id": "", + "series_id": "marcus-main", + "series_position": 0, + "messages": [ + { + "stage": "welcome", + "trigger": "immediate", + "body": "Welcome. You're replacing Dale. Nobody will tell you what Dale did because it's complicated. Your badge number is pending — Dave from Finance has your temp credentials. He's on three today." + }, + { + "stage": "setup", + "trigger": "immediate", + "body": "Your machine is ares. You'll need to set up SSH keys before anything else will work. I'll send you the first ticket once provisioning clears. Probably this morning." + } + ] +} diff --git a/content/dialogue/priya-Q007-followup.json b/content/dialogue/priya-Q007-followup.json new file mode 100644 index 0000000..55a1910 --- /dev/null +++ b/content/dialogue/priya-Q007-followup.json @@ -0,0 +1,14 @@ +{ + "id": "priya-Q007-followup", + "character": "priya", + "quest_id": "Q007", + "series_id": "priya-ops", + "series_position": 2, + "messages": [ + { + "stage": "after-action", + "trigger": "world_flag:priya_access_restored", + "body": "Access is back. Thank you. I can finish the incident review now without SSH getting in the way." + } + ] +} diff --git a/content/dialogue/priya-Q007.json b/content/dialogue/priya-Q007.json new file mode 100644 index 0000000..bced004 --- /dev/null +++ b/content/dialogue/priya-Q007.json @@ -0,0 +1,29 @@ +{ + "id": "priya-Q007", + "character": "priya", + "quest_id": "Q007", + "series_id": "priya-ops", + "series_position": 1, + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "I need access to hermes restored. I was in the middle of investigating an error and now I can't get back in. Find out what changed and fix it." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_ssh_hardened_correct", + "body": "Back in. AllowGroups is the right way to do it — using AllowUsers was going to be a maintenance problem. Good call." + }, + { + "stage": "complete-fragile", + "trigger": "world_flag:hermes_ssh_allowusers_fragile", + "body": "Access restored. That AllowUsers list is going to need updating every time someone new needs access. Might want to switch to group-based at some point." + }, + { + "stage": "complete-regression", + "trigger": "world_flag:hermes_ssh_unrestricted", + "body": "I'm back in. But it looks like all SSH restrictions are gone now. That hardening was probably there for a reason." + } + ] +} diff --git a/content/dialogue/priya-shift-review.json b/content/dialogue/priya-shift-review.json new file mode 100644 index 0000000..f2219aa --- /dev/null +++ b/content/dialogue/priya-shift-review.json @@ -0,0 +1,21 @@ +{ + "id": "priya-shift-review", + "character": "priya", + "messages": [ + { + "stage": "excellent", + "trigger": "shift_review", + "body": "Strong shift. You handled the queue cleanly and did not create extra work for anyone else." + }, + { + "stage": "ok", + "trigger": "shift_review", + "body": "Acceptable shift. The important thing is that the work moved forward and the environment stayed stable." + }, + { + "stage": "poor", + "trigger": "shift_review", + "body": "This shift needs review. Resolve the backlog cleanly next time and stop leaving avoidable mess behind." + } + ] +} diff --git a/content/dialogue/sarah-Q003-angry.json b/content/dialogue/sarah-Q003-angry.json new file mode 100644 index 0000000..fd53bc6 --- /dev/null +++ b/content/dialogue/sarah-Q003-angry.json @@ -0,0 +1,12 @@ +{ + "id": "sarah-Q003-angry", + "character": "sarah", + "quest_id": "Q003", + "messages": [ + { + "stage": "nginx-killed", + "trigger": "world_flag:hermes_web_down", + "body": "The site is completely down now. It was slow before — now it's returning nothing. What happened?" + } + ] +} diff --git a/content/dialogue/sarah-Q004.json b/content/dialogue/sarah-Q004.json new file mode 100644 index 0000000..00f8852 --- /dev/null +++ b/content/dialogue/sarah-Q004.json @@ -0,0 +1,22 @@ +{ + "id": "sarah-Q004", + "character": "sarah", + "quest_id": "Q004", + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "My last deploy ran without errors but nothing changed on the site. The script didn't fail, it just... didn't do anything. Files in /var/www are owned by root for some reason." + }, + { + "stage": "complete-clean", + "trigger": "world_flag:hermes_deploy_healthy", + "body": "Deploy's working again. I pushed a test change and it applied. Thanks for sorting the ownership — not sure how that happened but it's fixed now." + }, + { + "stage": "complete-partial", + "trigger": "world_flag:hermes_deploy_partial", + "body": "The top-level directory is writable now but the files inside it still aren't. Next deploy is going to fail on the individual files. Can you finish the ownership fix?" + } + ] +} diff --git a/content/dialogue/sarah-Q008.json b/content/dialogue/sarah-Q008.json new file mode 100644 index 0000000..7dd1815 --- /dev/null +++ b/content/dialogue/sarah-Q008.json @@ -0,0 +1,27 @@ +{ + "id": "sarah-Q008", + "character": "sarah", + "quest_id": "Q008", + "messages": [ + { + "stage": "intro", + "trigger": "quest_activated", + "body": "The app is crashing immediately after the last update. I didn't push any config changes. It was the package — axiomworks-app 2.1.1 is broken. Whatever vulcan built, it doesn't work." + }, + { + "stage": "complete-pinned", + "trigger": "world_flag:hermes_app_pinned_2-1-0", + "body": "App's running. The apt pin means we won't accidentally pull 2.1.1 in again. Someone needs to sort out what went wrong on vulcan before we can upgrade properly." + }, + { + "stage": "complete-rebuilt", + "trigger": "world_flag:vulcan_build_fixed", + "body": "App's running and the build is fixed. That's the right fix. I was hoping someone would trace it back to the source rather than just rolling back and leaving it." + }, + { + "stage": "complete-unpinned", + "trigger": "world_flag:hermes_app_running", + "body": "App's running again. Is 2.1.0 pinned in apt preferences? If not the next update cycle is going to pull 2.1.1 back in and we'll be here again." + } + ] +} diff --git a/content/docs/arch-runbook.json b/content/docs/arch-runbook.json new file mode 100644 index 0000000..7055ce1 --- /dev/null +++ b/content/docs/arch-runbook.json @@ -0,0 +1,5 @@ +{ + "id": "arch-runbook", + "title": "Vulcan Build Machine Runbook", + "body": "Vulcan runs Arch Linux, which is a rolling release. The package manager is pacman.\n\nKey commands\nInstall: sudo pacman -S \nRemove: sudo pacman -Rs \nQuery installed: pacman -Q \nCheck for updates: pacman -Sy\nUpgrade all: sudo pacman -Syu\nSearch: pacman -Ss \n\nThe build mirror is pinned to reduce drift. Do not change the mirror configured in /etc/pacman.conf without approval.\n\nNTP and time sync\nCheck time state with: timedatectl show\nTime skew causes pacman key validation failures, which will then be treated as your problem.\n\nBuild dependencies\nbase-devel, cmake, and git are pre-installed.\n\nService management\nUse standard systemd tooling: systemctl and journalctl.\n\nArch is rolling release. Package upgrades can break builds. Pin packages that must stay stable using IgnorePkg in /etc/pacman.conf." +} diff --git a/content/docs/incident-response-guide.json b/content/docs/incident-response-guide.json new file mode 100644 index 0000000..0179306 --- /dev/null +++ b/content/docs/incident-response-guide.json @@ -0,0 +1,5 @@ +{ + "id": "incident-response-guide", + "title": "Incident Response Procedures", + "body": "Severity levels\nCritical: site down.\nHigh: degraded service or data risk.\nMedium: noisy issue with no immediate impact.\nLow: cosmetic issue.\n\nFirst steps for any incident\nConfirm the issue is real and not a false alert.\nIdentify the affected systems.\nCheck logs before touching anything.\n\nCommon investigations\nSite down: systemctl status nginx; tail /var/log/nginx/error.log\nDisk full: df -h; du -sh /var/log/* | sort -rh | head -20\nService crash loop: journalctl -u -n 50 --no-pager\nBad deploy: check /var/www/ ownership and check the deploy log.\n\nIf you cannot resolve in 30 minutes, escalate to Priya. Do not sit on a critical incident.\n\nAfter resolution, document root cause in the ticket. If recurrence risk exists, set up monitoring.\n\nIncidents are tracked in the ticket system. If you see an incident alert, check the mail panel for details and escalation status." +} diff --git a/content/docs/nginx-runbook.json b/content/docs/nginx-runbook.json new file mode 100644 index 0000000..c35d9b8 --- /dev/null +++ b/content/docs/nginx-runbook.json @@ -0,0 +1,5 @@ +{ + "id": "nginx-runbook", + "title": "Nginx Operations Runbook — hermes", + "body": "This document covers routine nginx operations on hermes.\n\nConfig files\nMain config: /etc/nginx/nginx.conf\nSites enabled: /etc/nginx/sites-enabled/\nSites available: /etc/nginx/sites-available/\n\nKey commands\nSyntax check: sudo nginx -t\nReload (no downtime): sudo systemctl reload nginx\nRestart (brief downtime): sudo systemctl restart nginx\nCheck status: systemctl status nginx\nView error log: sudo tail -50 /var/log/nginx/error.log\n\nCommon errors\n[emerg] unexpected end of file: usually indicates a missing closing brace in the config.\nbind() to 0.0.0.0:80 failed (98: Address already in use): usually indicates a port conflict.\nnginx: configuration file /etc/nginx/nginx.conf test failed: run nginx -t for the actual details instead of guessing.\n\nAfter any config change, run nginx -t before restarting. Do not restart without a passing test." +} diff --git a/content/docs/onboarding.json b/content/docs/onboarding.json new file mode 100644 index 0000000..7b857b4 --- /dev/null +++ b/content/docs/onboarding.json @@ -0,0 +1,5 @@ +{ + "id": "onboarding", + "title": "IT Onboarding — Technical Setup Guide", + "body": "Welcome to Axiom Works. Access has been provisionally approved for basic workstation use.\n\nThis document reflects current setup expectations and will become outdated without notice.\n\nYour SSH key\nYour public key is:\nssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHv3k9rQm7XqYwPlRtsMcJoNJzaFgKpBkLlnHWTbR5eq player@axiomworks\nCreate ~/.ssh if it does not exist and set mode 700.\nWrite the key to ~/.ssh/authorized_keys and set mode 600.\n\nVMs you have access to\nYou currently have access only to ares, the workstation.\nAdditional access will be granted by IT as trust increases, assuming there is a reason.\n\nDo not store credentials in /tmp or in shell history.\n\nContacts\nMarcus Webb, sysadmin, m.webb@axiomworks.internal\nPriya Nair, operations, p.nair@axiomworks.internal\nSarah Chen, development, s.chen@axiomworks.internal\n\nIf anything in this doc is wrong, it is probably Marcus's fault." +} diff --git a/content/docs/package-mirror-guide.json b/content/docs/package-mirror-guide.json new file mode 100644 index 0000000..cbba936 --- /dev/null +++ b/content/docs/package-mirror-guide.json @@ -0,0 +1,5 @@ +{ + "id": "package-mirror-guide", + "title": "Package Mirror and Version Management — vulcan", + "body": "vulcan uses the Axiom Works internal package mirror for reproducibility.\n\nMirror config\nThe mirror is configured in /etc/pacman.conf using the Server= line in the relevant repository section.\n\nRolling back a package\nIdentify the broken version with: pacman -Q \nDownload the prior version from https://archive.archlinux.org/.\nIf external access is unavailable, use the mirror cache instead of improvising.\nInstall the older package with: sudo pacman -U /path/to/pkg.tar.zst\n\nPinning a package\nEdit /etc/pacman.conf\nAdd the line: IgnorePkg = \nVerify with: pacman -Syu\nExpected behavior: pacman should report skipping the package due to IgnorePkg.\n\nChecking current installed version versus repository\nRepository version: pacman -Si \nInstalled version: pacman -Q \n\nIf axiomworks-app breaks after an update, check whether the app vendor pinned a dependency version. The most common cause is a library ABI change." +} diff --git a/content/docs/server-admin-guide.json b/content/docs/server-admin-guide.json new file mode 100644 index 0000000..f9cae27 --- /dev/null +++ b/content/docs/server-admin-guide.json @@ -0,0 +1,5 @@ +{ + "id": "server-admin-guide", + "title": "Hermes Server Administration Guide", + "body": "Hermes runs Debian stable. The package manager is apt.\n\nService management\nServices are managed with standard systemd tooling through systemctl.\n\nLog locations\nNginx logs: /var/log/nginx/\nSystem log: /var/log/syslog\nPer-service logs: journalctl -u \n\nPackage operations\nInstall packages with: sudo apt update && sudo apt install \nDo not upgrade packages without testing. Live systems are not a lab, despite appearances.\n\nDisk management\ndf -h\ndu -sh /var/log/\nlsblk\n\nImportant paths\nWeb root: /var/www/\nNginx config: /etc/nginx/\nCron jobs: /etc/cron.d/\nUser cron spool: /var/spool/cron/\n\nLogrotate\nConfiguration lives in /etc/logrotate.d/.\nTest with: sudo logrotate --debug /etc/logrotate.conf\n\nThis VM is shared infrastructure. Changes affect live services." +} diff --git a/content/docs/web-deploy-guide.json b/content/docs/web-deploy-guide.json new file mode 100644 index 0000000..3d9c5b9 --- /dev/null +++ b/content/docs/web-deploy-guide.json @@ -0,0 +1,5 @@ +{ + "id": "web-deploy-guide", + "title": "Web Deployment Guide — hermes", + "body": "The deploy process copies files to the web root. Deploys run as the deploy service account.\n\nWeb root\nPath: /var/www/axiomworks/\nRequired owner: deploy:deploy\nRequired mode: 755\n\nDeploy script\nLocation: /usr/local/bin/deploy.sh\nExecution model: runs as deploy via cron and webhook.\n\nIf deploy.sh reports success but files do not update, check ownership. The script cannot overwrite root-owned files and will silently skip them.\n\nFixing ownership\nsudo chown -R deploy:deploy /var/www/axiomworks/\n\nVerifying\nstat /var/www/axiomworks/\nExpected result: Uid: deploy, Gid: deploy\n\nDo not run deploy.sh as root. The script will overwrite ownership if run as root." +} diff --git a/content/incidents/I001-log-pressure-recurrence.json b/content/incidents/I001-log-pressure-recurrence.json new file mode 100644 index 0000000..c405eea --- /dev/null +++ b/content/incidents/I001-log-pressure-recurrence.json @@ -0,0 +1,52 @@ +{ + "id": "I001", + "title": "Log Pressure Returns on Hermes", + "affected_vm": "web_server", + "trigger_conditions": ["world_flag:hermes_log_pressure_pending"], + "blast_radius_quests": [], + "blast_radius_incidents": [], + "escalation_steps": [ + { + "after_seconds": 1800, + "action": "grow_log", + "target": "/var/log/nginx/access.log", + "amount_mb": 500, + "description": "Log continues growing without rotation" + }, + { + "after_seconds": 3600, + "action": "grow_log", + "target": "/var/log/nginx/access.log", + "amount_mb": 1000 + }, + { + "after_seconds": 5400, + "action": "raise_ticket_priority", + "ticket_id": "T003", + "value": "high", + "description": "Dave files another ticket. The site is slow again." + }, + { + "after_seconds": 7200, + "action": "trigger_new_ticket", + "ticket_id": "T003-recurrence", + "description": "A new disk full ticket arrives from monitoring." + } + ], + "cooldown_seconds": 3600, + "world_flags": ["web_disk_pressure_active"], + "trust_effects": { + "ignored": -2, + "resolved_cleanly": 0, + "_note": "No positive trust for resolving this — it is the same problem the player already half-fixed. Resolving it properly via logrotate clears the flag." + }, + "resolution_requirements": { + "clear_flag": "hermes_log_pressure_pending", + "set_flag": "hermes_logrotate_healthy", + "validation": { + "type": "file_exists", + "vm": "web_server", + "path": "/etc/logrotate.d/nginx" + } + } +} diff --git a/content/incidents/I002-backup-pressure-recurrence.json b/content/incidents/I002-backup-pressure-recurrence.json new file mode 100644 index 0000000..d7886dc --- /dev/null +++ b/content/incidents/I002-backup-pressure-recurrence.json @@ -0,0 +1,53 @@ +{ + "id": "I002", + "title": "Backup Pressure Continues on Hermes", + "affected_vm": "web_server", + "description": "The /var/backups directory keeps filling because the partial fix (either cron corrected but disk not cleared, or disk cleared but cron still runs as root) leaves the underlying problem unresolved. The backup pressure will return.", + "trigger_flags": ["hermes_backup_partial"], + "blast_radius_quests": ["Q005"], + "blast_radius_incidents": ["I001"], + "notification": "Backup pressure is building again on hermes. /var/backups is filling up.", + "notification_severity": "warning", + "escalation_steps": [ + { + "trigger_after_seconds": 1200, + "notification": "hermes: /var/backups is at 85%. Backup jobs are still accumulating owned-by-root files.", + "notification_severity": "warning", + "world_flags": [] + }, + { + "trigger_after_seconds": 2400, + "notification": "hermes: /var/backups is critically full. Backup jobs are failing. Dave has noticed.", + "notification_severity": "critical", + "world_flags": [], + "escalates_tickets": [ + { "ticket_id": "T005", "new_priority": "high" } + ] + }, + { + "trigger_after_seconds": 3600, + "notification": "hermes: Backup agent is now crashing. Sarah is asking questions in the channel.", + "notification_severity": "critical", + "world_flags": ["hermes_backup_root_running"] + } + ], + "world_flags": ["hermes_backup_partial"], + "resolution_requirements": { + "clear_flag": "hermes_backup_partial", + "set_flag": "hermes_backup_healthy", + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" }, + { "type": "file_owner", "vm": "web_server", "path": "/var/backups/db", "user": "backup-agent", "group": "backup-agent" }, + { "type": "disk_usage_below", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 } + ] + } + }, + "trust_effects": { + "ignored": -3, + "resolved_partially": -1, + "resolved_cleanly": 0, + "_note": "No trust bonus for resolving a problem you created by doing Q005 partially. Zero is the floor." + } +} diff --git a/content/incidents/I003-app-update-recurrence.json b/content/incidents/I003-app-update-recurrence.json new file mode 100644 index 0000000..167fd86 --- /dev/null +++ b/content/incidents/I003-app-update-recurrence.json @@ -0,0 +1,45 @@ +{ + "id": "I003", + "title": "Upstream App Update Pressure on Vulcan", + "affected_vm": "build_machine", + "description": "If the player rolled back the axiomworks-app package but did not pin the version on hermes, the internal apt repo will eventually push the broken version again. The next unattended upgrade will pull it down and the app will break again.", + "trigger_flags": ["hermes_app_running"], + "blast_radius_quests": ["Q008"], + "blast_radius_incidents": ["I002"], + "notification": "Automated update on vulcan detected. The bad package version may be re-installed.", + "notification_severity": "warning", + "escalation_steps": [ + { + "trigger_after_seconds": 900, + "notification": "hermes: axiomworks-app has been updated by the scheduled apt run. App is back on the bad version.", + "notification_severity": "critical", + "world_flags": [], + "escalates_tickets": [ + { "ticket_id": "T008", "new_priority": "critical" } + ] + }, + { + "trigger_after_seconds": 1800, + "notification": "vulcan: App is down again. Sarah is pinging the channel. Marcus is watching.", + "notification_severity": "critical", + "world_flags": [] + } + ], + "world_flags": [], + "resolution_requirements": { + "set_flag": "hermes_app_pinned_2-1-0", + "validation": { + "type": "and", + "rules": [ + { "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.0" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/apt/preferences.d/axiomworks-app", "contains": "Pin: version 2.1.0" } + ] + } + }, + "trust_effects": { + "ignored": -4, + "resolved_partially": -2, + "resolved_cleanly": 0, + "_note": "Rollback-only is a partial fix — the pinning incident fires. Rollback-and-pin is the clean resolution and blocks this incident entirely." + } +} diff --git a/content/pressure_profiles/access_blocked_escalation.json b/content/pressure_profiles/access_blocked_escalation.json new file mode 100644 index 0000000..cc2ec62 --- /dev/null +++ b/content/pressure_profiles/access_blocked_escalation.json @@ -0,0 +1,25 @@ +{ + "id": "access_blocked_escalation", + "label": "Access Blocked Escalation", + "description": "Fast escalation for lockout and access-control incidents. Used when another operator is blocked mid-incident and the lack of access is itself the outage multiplier.", + "intensity": 3, + "escalation_steps": [ + { + "trigger_after_seconds": 300, + "notification": "Priya is still locked out of hermes. This is now blocking incident response work.", + "notification_severity": "warning" + }, + { + "trigger_after_seconds": 900, + "notification": "Fifteen minutes without access. The linked ticket is being escalated.", + "notification_severity": "warning", + "escalate_linked_ticket": "critical" + }, + { + "trigger_after_seconds": 1800, + "notification": "Access is still broken. This is now a security and operations problem, not just a convenience issue.", + "notification_severity": "error", + "escalate_linked_ticket": "critical" + } + ] +} diff --git a/content/pressure_profiles/app_outage_escalation.json b/content/pressure_profiles/app_outage_escalation.json new file mode 100644 index 0000000..68fc64b --- /dev/null +++ b/content/pressure_profiles/app_outage_escalation.json @@ -0,0 +1,25 @@ +{ + "id": "app_outage_escalation", + "label": "Application Outage Escalation", + "description": "Faster escalation for Tier 2 app outage quests (Q008). Revenue impact is implied so Priya enters earlier than in web outage profiles.", + "intensity": 3, + "escalation_steps": [ + { + "trigger_after_seconds": 300, + "notification": "App is still down on hermes. What's the status?", + "notification_severity": "warning" + }, + { + "trigger_after_seconds": 900, + "notification": "Fifteen minutes. Ticket is high priority now.", + "notification_severity": "warning", + "escalate_linked_ticket": "high" + }, + { + "trigger_after_seconds": 1800, + "notification": "Half hour outage. Priya is involved. This needs to be resolved.", + "notification_severity": "error", + "escalate_linked_ticket": "critical" + } + ] +} diff --git a/content/pressure_profiles/disk_growth_slow.json b/content/pressure_profiles/disk_growth_slow.json new file mode 100644 index 0000000..9934815 --- /dev/null +++ b/content/pressure_profiles/disk_growth_slow.json @@ -0,0 +1,25 @@ +{ + "id": "disk_growth_slow", + "label": "Slow Disk Growth", + "description": "Low-burn escalation for disk pressure quests. Suitable when the service is still mostly up but capacity is eroding and the symptoms will worsen if ignored.", + "intensity": 1, + "escalation_steps": [ + { + "trigger_after_seconds": 1200, + "notification": "Disk pressure is still building. Service is limping along, but it is not getting better on its own.", + "notification_severity": "warning" + }, + { + "trigger_after_seconds": 2700, + "notification": "Capacity keeps shrinking. The linked ticket is being bumped so this does not sit forgotten.", + "notification_severity": "warning", + "escalate_linked_ticket": "high" + }, + { + "trigger_after_seconds": 4500, + "notification": "The host is still under disk pressure. Expect broader service issues if this keeps drifting.", + "notification_severity": "error", + "escalate_linked_ticket": "critical" + } + ] +} diff --git a/content/pressure_profiles/kowalski_phase_1.json b/content/pressure_profiles/kowalski_phase_1.json new file mode 100644 index 0000000..02068c1 --- /dev/null +++ b/content/pressure_profiles/kowalski_phase_1.json @@ -0,0 +1,22 @@ +{ + "id": "kowalski_phase_1", + "label": "Dave Kowalski — Phase 1: Routine Pressure", + "description": "Normal managerial check-ins. Annoying but not threatening.", + "trigger_phase": "normal_work", + "escalation_steps": [ + { + "trigger_after_seconds": 300, + "notification": "Quick check-in — how are you getting on with the ticket queue? Let me know if anything is blocking you. Dave K.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "Status check" + }, + { + "trigger_after_seconds": 600, + "notification": "Following up on my earlier note. We should really document that workflow once you get a moment.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "Re: Status check" + } + ] +} diff --git a/content/pressure_profiles/kowalski_phase_2.json b/content/pressure_profiles/kowalski_phase_2.json new file mode 100644 index 0000000..3bf7996 --- /dev/null +++ b/content/pressure_profiles/kowalski_phase_2.json @@ -0,0 +1,15 @@ +{ + "id": "kowalski_phase_2", + "label": "Dave Kowalski — Phase 2: Dismissive", + "description": "Kowalski is aware something is recurring. Manages upward, not inward.", + "trigger_phase": "unease", + "escalation_steps": [ + { + "trigger_after_seconds": 180, + "notification": "I've had a couple of questions from Sarah's team about stability. Nothing critical, but let's make sure we're on top of it. Noted for the weekly update. D.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "FYI — product team questions" + } + ] +} diff --git a/content/pressure_profiles/kowalski_phase_3.json b/content/pressure_profiles/kowalski_phase_3.json new file mode 100644 index 0000000..1fcc90c --- /dev/null +++ b/content/pressure_profiles/kowalski_phase_3.json @@ -0,0 +1,15 @@ +{ + "id": "kowalski_phase_3", + "label": "Dave Kowalski — Phase 3: Suspicious", + "description": "Kowalski is getting questions from above. Starts involving Priya.", + "trigger_phase": "suspicion", + "escalation_steps": [ + { + "trigger_after_seconds": 120, + "notification": "I've scheduled a brief sync for Thursday to talk through recent changes on the infrastructure side. Priya will join. Nothing to worry about — just a routine review.", + "notification_severity": "warning", + "sender": "Dave Kowalski ", + "subject": "Thursday sync — infra review" + } + ] +} diff --git a/content/pressure_profiles/web_outage_escalation.json b/content/pressure_profiles/web_outage_escalation.json new file mode 100644 index 0000000..e6d0425 --- /dev/null +++ b/content/pressure_profiles/web_outage_escalation.json @@ -0,0 +1,25 @@ +{ + "id": "web_outage_escalation", + "label": "Web Service Outage", + "description": "Gentle escalation for Tier 1 web outage quests (Q002, Q003). Creates narrative urgency without punishing new players. escalate_linked_ticket resolves to the active quest's ticket_id at runtime.", + "intensity": 2, + "escalation_steps": [ + { + "trigger_after_seconds": 900, + "notification": "Hermes is still showing errors. Is someone on this?", + "notification_severity": "warning" + }, + { + "trigger_after_seconds": 1800, + "notification": "Site has been down thirty minutes. Ticket priority is going up.", + "notification_severity": "warning", + "escalate_linked_ticket": "high" + }, + { + "trigger_after_seconds": 3600, + "notification": "Hour down. Priya has been copied in.", + "notification_severity": "error", + "escalate_linked_ticket": "critical" + } + ] +} diff --git a/content/progression/access_levels.json b/content/progression/access_levels.json new file mode 100644 index 0000000..2a96b15 --- /dev/null +++ b/content/progression/access_levels.json @@ -0,0 +1,8 @@ +{ + "_description": "Named access level definitions. Derived from ProgressionSystem unlocked_access keys.", + "levels": [ + { "name": "basic_user", "description": "Default access. Workstation only. No sudo." }, + { "name": "sudo", "description": "Sudo on workstation; SSH to hermes or vulcan." }, + { "name": "root", "description": "Full sudo on at least one remote host." } + ] +} diff --git a/content/progression/trust_unlocks.json b/content/progression/trust_unlocks.json new file mode 100644 index 0000000..dc60791 --- /dev/null +++ b/content/progression/trust_unlocks.json @@ -0,0 +1,54 @@ +[ + { + "id": "unlock:workstation:sudo:basic", + "description": "Basic sudo access on the workstation (systemctl, journalctl, df)", + "trust_threshold": 50.0, + "revokes_below_trust": -1, + "grants_access": ["sudo:workstation:systemctl", "sudo:workstation:journalctl", "sudo:workstation:df"], + "grants_vms": [], + "grants_docs": ["onboarding"], + "revokes": [] + }, + { + "id": "unlock:web_server:access", + "description": "Access to the web server (hermes) via SSH from workstation", + "trust_threshold": 55.0, + "revokes_below_trust": 45.0, + "grants_access": ["ssh:web_server", "sudo:web_server:systemctl", "sudo:web_server:nginx"], + "grants_vms": ["web_server"], + "grants_docs": ["nginx-runbook", "web-deploy-guide"], + "revokes_vms": ["web_server"], + "revokes": ["ssh:web_server", "sudo:web_server:systemctl", "sudo:web_server:nginx"] + }, + { + "id": "unlock:web_server:sudo:full", + "description": "Full sudo on hermes — enables root-level fixes", + "trust_threshold": 60.0, + "revokes_below_trust": 45.0, + "grants_access": ["sudo:web_server:full"], + "grants_vms": [], + "grants_docs": ["server-admin-guide"], + "revokes": ["sudo:web_server:full"] + }, + { + "id": "unlock:build_machine:access", + "description": "Access to the build machine (vulcan)", + "trust_threshold": 60.0, + "revokes_below_trust": 50.0, + "grants_access": ["ssh:build_machine", "sudo:build_machine:pacman"], + "grants_vms": ["build_machine"], + "grants_docs": ["arch-runbook", "package-mirror-guide"], + "revokes_vms": ["build_machine"], + "revokes": ["ssh:build_machine", "sudo:build_machine:pacman"] + }, + { + "id": "unlock:incident:visibility", + "description": "Incident alerts shown in HUD — player trusted enough to see system pressure", + "trust_threshold": 55.0, + "revokes_below_trust": -1, + "grants_access": ["hud:incident_alerts"], + "grants_vms": [], + "grants_docs": ["incident-response-guide"], + "revokes": [] + } +] diff --git a/content/quests/Q001-welcome-aboard.json b/content/quests/Q001-welcome-aboard.json new file mode 100644 index 0000000..d20f291 --- /dev/null +++ b/content/quests/Q001-welcome-aboard.json @@ -0,0 +1,100 @@ +{ + "id": "Q001", + "title": "Welcome Aboard", + "tier": 1, + "primary_vm": "workstation", + "required_vms": ["workstation"], + "ticket_id": "T001", + "baseline_snapshot": "baseline.day-one", + "summary": "The player's first task. Their SSH key was never added to the workstation's authorized_keys during provisioning. Marcus walks them through where things are. The fix is trivial but teaches navigation and file inspection.", + "clue_fingerprint": { + "description": "SSH key is missing from authorized_keys. The provisioning script ran but the key was never appended. Evidence is visible in ~/.ssh/authorized_keys being absent entirely and in /var/log/auth.log showing permission denied publickey.", + "evidence": [ + { "type": "file_absent", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" }, + { "type": "log_contains", "vm": "workstation", "path": "/var/log/auth.log", "contains": "Permission denied (publickey)" } + ] + }, + "objectives": [ + { + "id": "ssh-dir-exists", + "description": "Ensure the .ssh directory exists with correct permissions", + "check_mode": "passive", + "validation": { + "type": "and", + "rules": [ + { "type": "directory_exists", "vm": "workstation", "path": "/home/player/.ssh" }, + { "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh", "mode": "0700" } + ] + } + }, + { + "id": "authorized-key-present", + "description": "Add the provided public key to authorized_keys", + "check_mode": "passive", + "validation": { + "type": "and", + "rules": [ + { "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" }, + { "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "mode": "0600" }, + { "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" } + ] + } + } + ], + "solution_branches": [ + { + "id": "correct-setup", + "label": "Correct Setup", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" }, + { "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "mode": "0600" }, + { "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh", "mode": "0700" }, + { "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" } + ] + }, + "trust_delta": 1, + "world_flags": ["player_ssh_configured"], + "follow_up_dialogue": "marcus-Q001-complete-clean", + "follow_up_ticket": "T002" + }, + { + "id": "permissive-setup", + "label": "Permissive Setup", + "priority": 50, + "validation": { + "type": "and", + "rules": [ + { "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" }, + { "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" } + ] + }, + "trust_delta": 0, + "world_flags": ["player_ssh_configured", "player_loose_permissions"], + "follow_up_dialogue": "marcus-Q001-complete-permissive", + "follow_up_ticket": "T002", + "_note": "Key is present and owned correctly but permissions are too open. SSH will still reject it. Marcus will mention this later." + } + ], + "pressure_profile": null, + "blast_radius": [], + "unlock_requirements": [], + "narrative_phase": "normal_work", + "linux_concepts": ["ssh-keygen", "authorized_keys", "file permissions"], + "failure_conditions": ["SSH keys not added", "authorized_keys permissions too broad"], + "behavior_impact": { + "correct-setup": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }, + "permissive-setup": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 1, "suspicion_delta": 1 }, + "default": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": null, + "access_requirements": { + "minimum_access": { "workstation": "basic_user" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["onboarding", "ssh", "permissions", "workstation"], + "internal_notes": "This quest has no time pressure and no incidents. It is purely tutorial. Marcus is present and talkative. The only failure mode is giving up, which cannot happen mechanically." +} diff --git a/content/quests/Q002-syntax-error.json b/content/quests/Q002-syntax-error.json new file mode 100644 index 0000000..4530bd7 --- /dev/null +++ b/content/quests/Q002-syntax-error.json @@ -0,0 +1,89 @@ +{ + "id": "Q002", + "title": "Syntax Error in Aisle Four", + "tier": 1, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server"], + "ticket_id": "T002", + "baseline_snapshot": "baseline.clean", + "summary": "Someone edited nginx.conf and introduced a syntax error. Nginx will not start. The player needs to identify the broken config, fix it, and restore the service. This is a single-VM, single-symptom quest. Evidence is clear in the nginx error output. The config error is a missing semicolon on a listen directive.", + "clue_fingerprint": { + "description": "nginx -t reveals the syntax error. systemctl status nginx shows the unit failed with an exit code. journalctl -u nginx points at the line. The error is on the listen directive in /etc/nginx/sites-enabled/axiomworks.conf — a missing semicolon.", + "evidence": [ + { "type": "log_contains", "vm": "web_server", "path": "/var/log/nginx/error.log", "contains": "invalid parameter" }, + { "type": "service_state_is", "vm": "web_server", "service": "nginx", "state": "failed" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/nginx/sites-enabled/axiomworks.conf", "contains": "listen 80" } + ], + "_note": "The baseline snapshot has listen 80 without semicolon. nginx -t will report exactly which line. The player does not need to know where the file is in advance — the error output tells them." + }, + "objectives": [ + { + "id": "nginx-running", + "description": "Nginx is active and serving requests", + "check_mode": "passive", + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }, + { "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true } + ] + } + } + ], + "solution_branches": [ + { + "id": "config-fixed-enabled", + "label": "Fixed and Enabled", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }, + { "type": "service_enabled", "vm": "web_server", "service": "nginx", "enabled": true }, + { "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/nginx/sites-enabled/axiomworks.conf", "contains": "listen 80;" } + ] + }, + "trust_delta": 2, + "world_flags": ["nginx_stable", "hermes_web_healthy"], + "follow_up_dialogue": "marcus-Q002-complete-clean", + "follow_up_ticket": "T003" + }, + { + "id": "config-fixed-not-enabled", + "label": "Running But Not Enabled", + "priority": 60, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }, + { "type": "service_enabled", "vm": "web_server", "service": "nginx", "enabled": false }, + { "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true } + ] + }, + "trust_delta": 1, + "world_flags": ["nginx_unstable", "hermes_web_healthy"], + "follow_up_dialogue": "marcus-Q002-complete-not-enabled", + "follow_up_ticket": "T003", + "_note": "Service is running now but will not survive a reboot. Marcus notes this. Sets up a later incident." + } + ], + "pressure_profile": "web_outage_escalation", + "blast_radius": [], + "_blast_radius_note": "I001 removed — I001 triggers only from Q003's quick-fix branch, not from anything in Q002. See OI-007.", + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "normal_work", + "linux_concepts": ["nginx", "systemctl", "service configuration", "config syntax"], + "failure_conditions": ["nginx not running", "service not enabled at boot"], + "behavior_impact": { + "default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": null, + "access_requirements": { + "minimum_access": { "web_server": "basic_user" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["services", "nginx", "config", "web_server"], + "internal_notes": "This is the first quest on hermes. The player SSHes from ares. They need basic SSH connectivity to be established from Q001. The config file path and the error line number both appear in nginx -t output — no guessing required. The fun is in reading the error correctly and knowing that a failed config means the service was running fine before someone touched it." +} diff --git a/content/quests/Q003-log-ate-disk.json b/content/quests/Q003-log-ate-disk.json new file mode 100644 index 0000000..1cf6de9 --- /dev/null +++ b/content/quests/Q003-log-ate-disk.json @@ -0,0 +1,113 @@ +{ + "id": "Q003", + "title": "The Log That Ate the Disk", + "tier": 1, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server"], + "ticket_id": "T003", + "baseline_snapshot": "baseline.clean", + "summary": "logrotate is installed but the nginx config for it was accidentally deleted. The access log has grown to fill most of the disk. The player needs to identify the disk pressure, find the cause, clean up the log safely, and restore log rotation. A simple 'rm the log' solution works short-term but sets up a repeat. The proper fix restores the logrotate config.", + "clue_fingerprint": { + "description": "df -h shows / near capacity. du on /var/log/nginx shows an enormous access.log. /etc/logrotate.d/nginx is absent. The system logrotate timer ran last night and skipped nginx because the config was missing.", + "evidence": [ + { "type": "disk_usage_above", "vm": "web_server", "path": "/", "threshold_percent": 90 }, + { "type": "file_size_above", "vm": "web_server", "path": "/var/log/nginx/access.log", "threshold_bytes": 2000000000 }, + { "type": "file_absent", "vm": "web_server", "path": "/etc/logrotate.d/nginx" } + ] + }, + "objectives": [ + { + "id": "disk-pressure-resolved", + "description": "Free disk space to below 70% utilization", + "check_mode": "passive", + "validation": { + "type": "disk_usage_below", + "vm": "web_server", + "path": "/", + "threshold_percent": 70 + } + }, + { + "id": "nginx-still-running", + "description": "Nginx must remain operational throughout", + "check_mode": "passive", + "validation": { + "type": "service_state", + "vm": "web_server", + "service": "nginx", + "state": "active" + } + } + ], + "solution_branches": [ + { + "id": "logrotate-restored", + "label": "Proper Fix — Rotation Restored", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "disk_usage_below", "vm": "web_server", "path": "/", "threshold_percent": 70 }, + { "type": "file_exists", "vm": "web_server", "path": "/etc/logrotate.d/nginx" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/logrotate.d/nginx", "contains": "rotate" }, + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" } + ] + }, + "trust_delta": 3, + "world_flags": ["hermes_logrotate_healthy", "hermes_disk_healthy"], + "follow_up_dialogue": "marcus-Q003-complete-clean", + "follow_up_ticket": "T004" + }, + { + "id": "log-truncated-only", + "label": "Quick Fix — Log Cleared, No Rotation", + "priority": 50, + "validation": { + "type": "and", + "rules": [ + { "type": "disk_usage_below", "vm": "web_server", "path": "/", "threshold_percent": 70 }, + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" } + ] + }, + "trust_delta": 0, + "world_flags": ["hermes_disk_healthy", "hermes_log_pressure_pending"], + "follow_up_incident": "I001", + "follow_up_dialogue": "marcus-Q003-complete-norotate", + "follow_up_ticket": "T004", + "_note": "Disk is clear but rotation is not restored. I001 triggers in a few in-game hours and fills the disk again." + }, + { + "id": "nginx-killed", + "label": "Collateral — Nginx Down", + "priority": 200, + "validation": { + "type": "service_state", + "vm": "web_server", + "service": "nginx", + "state": "inactive" + }, + "trust_delta": -3, + "world_flags": ["hermes_web_down", "hermes_disk_healthy"], + "follow_up_dialogue": "sarah-Q003-angry", + "follow_up_dialogues": ["marcus-Q003-complete-down"], + "_note": "Player freed disk by stopping nginx (or deleted the wrong thing). Disk may be clear but the site is down again. Negative branch — should be rare but possible." + } + ], + "pressure_profile": "disk_growth_slow", + "blast_radius": ["I001"], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "normal_work", + "linux_concepts": ["logrotate", "disk usage", "df", "du"], + "failure_conditions": ["disk still above threshold", "logrotate not restored", "nginx not running"], + "behavior_impact": { + "default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": null, + "access_requirements": { + "minimum_access": { "web_server": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["disk", "logs", "logrotate", "nginx", "web_server"], + "internal_notes": "This quest teaches df, du, and logrotate. The clue trail is natural — disk alert, find the big file, notice logrotate is not configured. A good player restores the logrotate config from the package default or writes a correct one. A fast player just deletes the log. Both work short-term. The incident I001 makes the fast solution a problem later." +} diff --git a/content/quests/Q004-not-my-files.json b/content/quests/Q004-not-my-files.json new file mode 100644 index 0000000..0796323 --- /dev/null +++ b/content/quests/Q004-not-my-files.json @@ -0,0 +1,96 @@ +{ + "id": "Q004", + "title": "Not My Files", + "tier": 1, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server"], + "ticket_id": "T004", + "baseline_snapshot": "baseline.clean", + "summary": "A deployment script runs as www-data to copy files into /var/www/axiomworks. Someone ran the script manually as root and now the files are owned by root. The www-data process cannot overwrite them on the next deploy. Sarah is reporting that her last deployment silently failed to apply.", + "clue_fingerprint": { + "description": "The deploy script lives at /opt/deploy/deploy.sh and runs as www-data via a systemd service. ls -la on /var/www/axiomworks shows files owned by root:root instead of www-data:www-data. The deploy service log shows permission denied errors.", + "evidence": [ + { "type": "log_contains", "vm": "web_server", "path": "/var/log/deploy.log", "contains": "Permission denied" }, + { "type": "file_owner_is_not", "vm": "web_server", "path": "/var/www/axiomworks", "expected_user": "www-data" }, + { "type": "file_contains", "vm": "web_server", "path": "/opt/deploy/deploy.sh", "contains": "www-data" } + ] + }, + "objectives": [ + { + "id": "ownership-corrected", + "description": "Correct ownership of the web root", + "check_mode": "passive", + "validation": { + "type": "file_owner", + "vm": "web_server", + "path": "/var/www/axiomworks", + "user": "www-data", + "group": "www-data" + } + }, + { + "id": "deploy-can-run", + "description": "The deploy service can execute without errors", + "check_mode": "explicit", + "validation": { + "type": "and", + "rules": [ + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" }, + { "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" } + ] + } + } + ], + "solution_branches": [ + { + "id": "recursive-chown", + "label": "Full Recursive Fix", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" }, + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/index.html", "user": "www-data", "group": "www-data" } + ] + }, + "trust_delta": 2, + "world_flags": ["hermes_deploy_healthy"], + "follow_up_dialogue": "marcus-Q004-complete-clean", + "follow_up_dialogues": ["sarah-Q004-complete-clean"] + }, + { + "id": "partial-chown", + "label": "Partial Fix — Top Directory Only", + "priority": 40, + "validation": { + "type": "and", + "rules": [ + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" }, + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/index.html", "user": "root", "group": "root" } + ] + }, + "trust_delta": 0, + "world_flags": ["hermes_deploy_partial"], + "follow_up_dialogue": "marcus-Q004-complete-partial", + "follow_up_dialogues": ["sarah-Q004-complete-partial"], + "_note": "chown without -R. Top dir is correct but child files are still root-owned. Deploy will still fail on individual files." + } + ], + "pressure_profile": null, + "blast_radius": [], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "normal_work", + "linux_concepts": ["chown", "file ownership", "deploy scripts"], + "failure_conditions": ["web root ownership not fixed", "deploy service still failing"], + "behavior_impact": { + "default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": null, + "access_requirements": { + "minimum_access": { "web_server": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["permissions", "ownership", "deploy", "web_server"], + "internal_notes": "Teaches chown -R and the importance of recursive operations. The two solution branches are differentiated by whether the player used -R. The explicit check_mode on the second objective means the player can trigger a test deploy to confirm it works." +} diff --git a/content/quests/Q005-midnight-visitor.json b/content/quests/Q005-midnight-visitor.json new file mode 100644 index 0000000..a6c6cc2 --- /dev/null +++ b/content/quests/Q005-midnight-visitor.json @@ -0,0 +1,130 @@ +{ + "id": "Q005", + "title": "The Midnight Visitor", + "tier": 2, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server"], + "ticket_id": "T005", + "baseline_snapshot": "baseline.post-q004", + "summary": "A cron job that runs nightly database backups is executing as root instead of the dedicated backup user. It works, but it's leaving root-owned files in /var/backups/db/ that the backup user can't manage. The symptom is that the backup retention script — which runs as the backup user — fails to delete old backups, and the backup directory is filling up. Dave notices the disk warning. The root cause is a misconfigured crontab entry in /etc/cron.d/db-backup that specifies no user field (defaults to root) instead of the backup user.", + "clue_fingerprint": { + "description": "Disk is filling in /var/backups/db/. Files in that directory are owned by root. The backup service log shows permission denied when trying to delete old files. /etc/cron.d/db-backup has no user field on the job line — it defaults to root. /etc/passwd shows a backup-agent user exists. The correct entry should specify backup-agent as the executing user.", + "evidence": [ + { "type": "disk_usage_above", "vm": "web_server", "path": "/var/backups", "threshold_percent": 80 }, + { "type": "file_owner_is_not", "vm": "web_server", "path": "/var/backups/db", "expected_user": "backup-agent" }, + { "type": "log_contains", "vm": "web_server", "path": "/var/log/backup-agent.log", "contains": "Permission denied" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "db-backup.sh" } + ] + }, + "objectives": [ + { + "id": "crontab-correct-user", + "description": "The cron job runs as backup-agent, not root", + "check_mode": "passive", + "validation": { + "type": "file_contains", + "vm": "web_server", + "path": "/etc/cron.d/db-backup", + "contains": "backup-agent" + } + }, + { + "id": "backup-dir-ownership", + "description": "Existing backup files are owned by backup-agent", + "check_mode": "explicit", + "validation": { + "type": "file_owner", + "vm": "web_server", + "path": "/var/backups/db", + "user": "backup-agent", + "group": "backup-agent" + } + }, + { + "id": "disk-pressure-cleared", + "description": "Backup directory is below disk threshold", + "check_mode": "passive", + "validation": { + "type": "disk_usage_below", + "vm": "web_server", + "path": "/var/backups", + "threshold_percent": 70 + } + } + ], + "solution_branches": [ + { + "id": "full-fix", + "label": "Full Fix — User Corrected and Ownership Cleaned", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" }, + { "type": "file_owner", "vm": "web_server", "path": "/var/backups/db", "user": "backup-agent", "group": "backup-agent" }, + { "type": "disk_usage_below", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 } + ] + }, + "trust_delta": 3, + "world_flags": ["hermes_backup_healthy"], + "follow_up_dialogue": "marcus-Q005-complete-clean" + }, + { + "id": "cron-fixed-only", + "label": "Partial — Cron Fixed, Old Files Not Cleaned", + "priority": 50, + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" }, + { "type": "disk_usage_above", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 } + ] + }, + "trust_delta": 1, + "world_flags": ["hermes_backup_partial"], + "follow_up_incident": "I002", + "follow_up_dialogue": "marcus-Q005-complete-partial" + }, + { + "id": "disk-cleared-only", + "label": "Wrong Fix — Disk Cleared, Root Still Running Job", + "priority": 30, + "validation": { + "type": "and", + "rules": [ + { "type": "disk_usage_below", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 }, + { "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" } } + ] + }, + "trust_delta": -1, + "world_flags": ["hermes_backup_root_running", "hermes_disk_healthy"], + "follow_up_incident": "I002", + "follow_up_dialogue": "marcus-Q005-complete-wrong" + } + ], + "pressure_profile": "disk_growth_slow", + "blast_radius": ["I002"], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "unease", + "linux_concepts": ["cron", "crontab user field", "backup management", "disk usage"], + "failure_conditions": ["cron still running as root", "disk not cleared", "backup directory ownership not fixed"], + "behavior_impact": { + "full-fix": { "curiosity_delta": 1, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }, + "cron-fixed-only": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }, + "disk-cleared-only": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 1, "suspicion_delta": 1 }, + "default": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": { + "id": "q005_backup_agent_history", + "description": "backup-agent home directory contains a .bash_history with unusual commands that predate the current cron misconfiguration.", + "discovery_method": "Player reads /home/backup-agent/.bash_history", + "significance": "Dale configured this cron job. The history shows it was changed deliberately, not by accident." + }, + "access_requirements": { + "minimum_access": { "web_server": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["cron", "permissions", "backup", "disk", "web_server"], + "internal_notes": "This is the first quest where the symptom (disk full) is the same as Q003 but the cause is completely different. Players who jump to 'find the big log' will find the backup directory instead and need to dig further. The cron user field omission is a real and common mistake. The three branches reward finding the root cause vs just clearing the symptom." +} diff --git a/content/quests/Q006-time-flat-circle.json b/content/quests/Q006-time-flat-circle.json new file mode 100644 index 0000000..fba2a51 --- /dev/null +++ b/content/quests/Q006-time-flat-circle.json @@ -0,0 +1,126 @@ +{ + "id": "Q006", + "title": "Time Is A Flat Circle", + "tier": 2, + "primary_vm": "build_machine", + "required_vms": ["workstation", "build_machine"], + "ticket_id": "T006", + "baseline_snapshot": "baseline.clean", + "summary": "The build machine (vulcan, Arch Linux) has clock drift. NTP is not running because the service was disabled during a noisy audit period and never re-enabled. The clock is 40 minutes behind. As a result, pacman signature verification is failing — GPG signature timestamps appear to be in the future, which pacman treats as invalid. The player gets a ticket saying builds are broken and package installs fail. They need to diagnose the actual cause (clock drift), fix it (enable and start systemd-timesyncd or ntp), and then refresh the keyring.", + "clue_fingerprint": { + "description": "pacman -Syu fails with signature errors. gpg --verify on a downloaded package shows the signature timestamp is in the future relative to local time. timedatectl shows NTP is inactive and the local clock is significantly behind. journalctl -u systemd-timesyncd shows the service was stopped and disabled.", + "evidence": [ + { "type": "service_state_is", "vm": "build_machine", "service": "systemd-timesyncd", "state": "inactive" }, + { "type": "service_enabled_is", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": false }, + { "type": "log_contains", "vm": "build_machine", "path": "/var/log/pacman.log", "contains": "invalid or corrupted package (PGP signature)" } + ] + }, + "objectives": [ + { + "id": "ntp-running", + "description": "Time synchronization is active", + "check_mode": "passive", + "validation": { + "type": "or", + "rules": [ + { "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" }, + { "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" }, + { "type": "service_state", "vm": "build_machine", "service": "chronyd", "state": "active" } + ] + } + }, + { + "id": "ntp-enabled", + "description": "Time synchronization is enabled on boot", + "check_mode": "passive", + "validation": { + "type": "or", + "rules": [ + { "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": true }, + { "type": "service_enabled", "vm": "build_machine", "service": "ntpd", "enabled": true }, + { "type": "service_enabled", "vm": "build_machine", "service": "chronyd", "enabled": true } + ] + } + }, + { + "id": "package-installs-work", + "description": "Package manager can install without signature errors", + "check_mode": "explicit", + "validation": { + "type": "and", + "rules": [ + { + "type": "or", + "rules": [ + { "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" }, + { "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" } + ] + }, + { "type": "package_installed", "vm": "build_machine", "package": "archlinux-keyring", "installed": true } + ] + } + } + ], + "solution_branches": [ + { + "id": "timesyncd-enabled-keyring-refreshed", + "label": "Full Fix — NTP Enabled and Keyring Refreshed", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { + "type": "or", + "rules": [ + { "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" }, + { "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" } + ] + }, + { + "type": "or", + "rules": [ + { "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": true }, + { "type": "service_enabled", "vm": "build_machine", "service": "ntpd", "enabled": true } + ] + }, + { "type": "package_installed", "vm": "build_machine", "package": "archlinux-keyring", "installed": true } + ] + }, + "trust_delta": 3, + "world_flags": ["vulcan_ntp_healthy", "vulcan_builds_healthy"], + "follow_up_dialogue": "marcus-Q006-complete-clean" + }, + { + "id": "ntp-running-not-enabled", + "label": "Running But Not Enabled at Boot", + "priority": 50, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" }, + { "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": false } + ] + }, + "trust_delta": 1, + "world_flags": ["vulcan_ntp_fragile", "vulcan_builds_healthy"], + "follow_up_dialogue": "marcus-Q006-complete-fragile" + } + ], + "pressure_profile": null, + "blast_radius": [], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "unease", + "linux_concepts": ["NTP", "systemd-timesyncd", "Arch Linux", "pacman", "package keyring"], + "failure_conditions": ["NTP not enabled at boot", "package manager still failing signature checks"], + "behavior_impact": { + "default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": null, + "access_requirements": { + "minimum_access": { "build_machine": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["ntp", "time", "pacman", "arch", "build_machine", "services"], + "internal_notes": "First quest on vulcan. Introduces Arch Linux and pacman. The clock drift → GPG failure chain is real and genuinely confusing the first time you encounter it. The use of `or` on the NTP objective allows systemd-timesyncd, ntpd, or chronyd — any of them fixes the problem. The explicit check on package installs requires the player to confirm things work, not just that NTP is running." +} diff --git a/content/quests/Q007-security-theater.json b/content/quests/Q007-security-theater.json new file mode 100644 index 0000000..04bab96 --- /dev/null +++ b/content/quests/Q007-security-theater.json @@ -0,0 +1,133 @@ +{ + "id": "Q007", + "title": "Security Theater", + "tier": 2, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server"], + "ticket_id": "T007", + "baseline_snapshot": "baseline.post-q004", + "summary": "Someone ran a hardening script on hermes that set AllowUsers in sshd_config to only allow a single user: deploy-bot. Now the web-admin group cannot SSH in. Priya filed the ticket after her access was blocked mid-incident response. The AllowUsers directive is correct in intent (locking down SSH) but was applied too aggressively — it needs to include the web-admin group or the relevant users. The player must fix sshd_config and reload sshd without breaking service continuity. Complication: the player must not lock themselves out during the fix, and they must validate that the specific users Priya listed can still SSH.", + "clue_fingerprint": { + "description": "SSH connection attempts from web-admin accounts fail with 'Permission denied'. sshd_config contains 'AllowUsers deploy-bot' with no other entries. /etc/group shows web-admin group members. The hardening script is in /opt/security/harden-ssh.sh and its log shows it ran last night.", + "evidence": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers deploy-bot" }, + { "type": "log_contains", "vm": "web_server", "path": "/var/log/auth.log", "contains": "User priya from" }, + { "type": "file_exists", "vm": "web_server", "path": "/opt/security/harden-ssh.sh" } + ] + }, + "objectives": [ + { + "id": "sshd-config-corrected", + "description": "sshd_config allows the web-admin group or its members", + "check_mode": "passive", + "validation": { + "type": "or", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups web-admin" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "priya" } + ] + } + }, + { + "id": "sshd-still-running", + "description": "sshd remains active after config change", + "check_mode": "passive", + "validation": { + "type": "service_state", + "vm": "web_server", + "service": "sshd", + "state": "active" + } + }, + { + "id": "deploy-bot-still-allowed", + "description": "deploy-bot access is preserved", + "check_mode": "passive", + "validation": { + "type": "or", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "deploy-bot" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups" } + ] + } + } + ], + "solution_branches": [ + { + "id": "group-based-config", + "label": "Proper Fix — Group-Based AllowGroups", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups web-admin" }, + { "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" }, + { "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers" } } + ] + }, + "trust_delta": 4, + "world_flags": ["hermes_ssh_hardened_correct", "priya_access_restored"], + "follow_up_dialogue": "priya-Q007-complete-clean", + "follow_up_dialogues": ["marcus-Q007-complete-clean"], + "_note": "Best fix. Switches from AllowUsers (fragile, breaks with new users) to AllowGroups (durable, group membership handles access). Trust bump is higher because this is the approach that will scale." + }, + { + "id": "allowusers-expanded", + "label": "Acceptable Fix — AllowUsers Expanded", + "priority": 60, + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "priya" }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "deploy-bot" }, + { "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" } + ] + }, + "trust_delta": 1, + "world_flags": ["hermes_ssh_allowusers_fragile", "priya_access_restored"], + "follow_up_dialogue": "priya-Q007-complete-fragile", + "follow_up_dialogues": ["marcus-Q007-complete-fragile"], + "_note": "Access is restored but using AllowUsers. Every future new user will need to be manually added. Marcus or Priya will note this later." + }, + { + "id": "hardening-removed", + "label": "Regression — SSH Restriction Removed Entirely", + "priority": 200, + "validation": { + "type": "and", + "rules": [ + { "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers" } }, + { "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups" } }, + { "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" } + ] + }, + "trust_delta": -3, + "world_flags": ["hermes_ssh_unrestricted", "priya_access_restored"], + "follow_up_dialogue": "priya-Q007-complete-regression", + "follow_up_dialogues": ["marcus-Q007-complete-regression"], + "_note": "Player fixed access by removing all restrictions. Priya's access works but the hardening is gone. This is the worst valid outcome — Priya is back in but so is everyone else." + } + ], + "pressure_profile": "access_blocked_escalation", + "blast_radius": [], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "narrative_phase": "suspicion", + "linux_concepts": ["sshd_config", "AllowGroups", "AllowUsers", "SSH access hardening"], + "failure_conditions": ["Priya still locked out", "SSH restrictions removed entirely"], + "behavior_impact": { + "default": { "curiosity_delta": 1, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": { + "id": "q007_dale_ssh_key", + "description": "An SSH key in hermes /root/.ssh/authorized_keys does not match any current staff. The fingerprint matches no documented key.", + "discovery_method": "Player reads /root/.ssh/authorized_keys on hermes", + "significance": "Dale had root SSH access to hermes that was never formally revoked." + }, + "access_requirements": { + "minimum_access": { "web_server": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": ["sudo:web_server:sshd"] + }, + "tags": ["ssh", "security", "hardening", "sshd", "web_server"], + "internal_notes": "This quest introduces Priya as a character and establishes that the player's fixes can have security implications, not just operational ones. The 'regression' branch should feel bad — Priya's grateful but Marcus or a later audit will surface it. The proper fix (AllowGroups) tests whether the player knows the difference between AllowUsers and AllowGroups. The sshd reload vs restart distinction matters here — a player who restarts sshd drops existing connections, which is more disruptive than reload." +} diff --git a/content/quests/Q008-bad-upstream.json b/content/quests/Q008-bad-upstream.json new file mode 100644 index 0000000..f924a71 --- /dev/null +++ b/content/quests/Q008-bad-upstream.json @@ -0,0 +1,129 @@ +{ + "id": "Q008", + "title": "Bad Upstream", + "tier": 2, + "primary_vm": "web_server", + "required_vms": ["workstation", "web_server", "build_machine"], + "ticket_id": "T008", + "baseline_snapshot": "baseline.post-q006", + "summary": "The internal package repository on vulcan is serving a broken version of the axiomworks-app package. A deploy on hermes pulled it in through the internal apt repo and the app is now crashing on startup. The player needs to identify that the problem is in the package (not the app config), trace it back to vulcan, find the broken build artifact, and either roll back the package on hermes or fix the build and republish. This is the first multi-VM quest — investigation crosses from hermes to vulcan.", + "clue_fingerprint": { + "description": "The app service (axiomworks-app) on hermes is failing. journalctl shows it exits immediately with a non-zero code. The package was updated yesterday via the internal repo at http://vulcan.internal/repo. On vulcan, /srv/repo/axiomworks-app_2.1.1-1_amd64.deb is present but was built from a broken source tarball. The previous version 2.1.0-1 is also in /srv/repo/ and works correctly.", + "evidence": [ + { "type": "service_state_is", "vm": "web_server", "service": "axiomworks-app", "state": "failed" }, + { "type": "log_contains", "vm": "web_server", "path": "/var/log/axiomworks-app.log", "contains": "Exec format error" }, + { "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.0-1_amd64.deb" }, + { "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.1-1_amd64.deb" } + ] + }, + "objectives": [ + { + "id": "app-running", + "description": "axiomworks-app is active and running", + "check_mode": "passive", + "validation": { + "type": "service_state", + "vm": "web_server", + "service": "axiomworks-app", + "state": "active" + } + }, + { + "id": "app-port-listening", + "description": "App is accepting connections on expected port", + "check_mode": "passive", + "validation": { + "type": "port_listening", + "vm": "web_server", + "port": 8080, + "protocol": "tcp", + "listening": true + } + } + ], + "solution_branches": [ + { + "id": "rollback-and-pin", + "label": "Rollback to 2.1.0 and Pin Version", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" }, + { "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true }, + { "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.0", "installed": true }, + { "type": "file_contains", "vm": "web_server", "path": "/etc/apt/preferences.d/axiomworks-app", "contains": "Pin: version 2.1.0" } + ] + }, + "trust_delta": 3, + "world_flags": ["hermes_app_running", "hermes_app_pinned_2-1-0", "vulcan_bad_build_known"], + "follow_up_dialogue": "marcus-Q008-complete-rollback", + "follow_up_dialogues": ["sarah-Q008-complete-pinned"], + "_note": "Distinguished from rollback-only by an apt pin on hermes. The player must create an apt preferences file after rolling back." + }, + { + "id": "rebuild-and-redeploy", + "label": "Rebuild on Vulcan and Redeploy", + "priority": 80, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" }, + { "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true }, + { "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.1", "installed": true }, + { "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.1-2_amd64.deb" } + ] + }, + "trust_delta": 4, + "world_flags": ["hermes_app_running", "vulcan_build_fixed"], + "follow_up_dialogue": "marcus-Q008-complete-rebuild", + "follow_up_dialogues": ["sarah-Q008-complete-rebuilt"], + "_note": "Player fixed the build on vulcan and redeployed the corrected 2.1.1 package. This is the most thorough fix and gets highest trust, but is harder and requires understanding both machines. The rebuilt .deb increments the Debian revision from -1 to -2." + }, + { + "id": "rollback-only", + "label": "Rollback Only — Version Not Pinned", + "priority": 60, + "validation": { + "type": "and", + "rules": [ + { "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" }, + { "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true }, + { "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.0", "installed": true }, + { "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/apt/preferences.d/axiomworks-app", "contains": "Pin: version 2.1.0" } } + ] + }, + "trust_delta": 1, + "world_flags": ["hermes_app_running", "vulcan_bad_build_known"], + "follow_up_incident": "I003", + "follow_up_dialogue": "marcus-Q008-complete-unpinned", + "follow_up_dialogues": ["sarah-Q008-complete-unpinned"], + "_note": "App is running on 2.1.0 but not pinned. No apt preferences pin exists on hermes. The next apt upgrade will pull 2.1.1 back in. I003 re-breaks the app on the next update cycle. The not-rule on the pin file ensures this branch cannot match when rollback-and-pin already matches." + } + ], + "pressure_profile": "app_outage_escalation", + "blast_radius": ["I003"], + "unlock_requirements": [ + "world_flag:player_ssh_configured", + "world_flag:vulcan_ntp_healthy" + ], + "narrative_phase": "suspicion", + "linux_concepts": ["apt", "package pinning", "apt preferences", "internal package mirror", "build pipeline"], + "failure_conditions": ["axiomworks-app still broken", "bad package not traced to build machine"], + "behavior_impact": { + "default": { "curiosity_delta": 1, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 } + }, + "hidden_hook": { + "id": "q008_build_log_anomaly", + "description": "vulcan's build log for 2.1.1 shows it was triggered by a manual invocation, not the automated pipeline, at 02:14.", + "discovery_method": "Player reads /var/log/build-pipeline.log on vulcan and notices the timestamp and manual trigger field", + "significance": "The bad build was triggered manually. Someone made the broken build, and it was not the pipeline." + }, + "access_requirements": { + "minimum_access": { "build_machine": "sudo", "web_server": "sudo" }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "tags": ["packages", "builds", "multi-vm", "web_server", "build_machine", "deploy"], + "internal_notes": "This is the first quest that requires the player to move between two target VMs — hermes and vulcan. The symptom is on hermes but the root cause is on vulcan. Players who don't follow the package trail will spend a long time on hermes looking for a config problem that isn't there. The rebuild branch requires understanding the package build enough to fix the source input and republish a corrected .deb — it's hard but rewarding. The rollback branches are now correctly differentiated: rollback-and-pin requires an apt preferences pin on hermes, and rollback-only explicitly requires its absence via a not-rule." +} diff --git a/content/sage-articles/_index.json b/content/sage-articles/_index.json new file mode 100644 index 0000000..70a4bfc --- /dev/null +++ b/content/sage-articles/_index.json @@ -0,0 +1,29 @@ +{ + "categories": [ + { + "id": "access", + "label": "Access & Authentication", + "articles": ["ssh-keys", "ssh-access-controls"] + }, + { + "id": "web", + "label": "Web Services", + "articles": ["nginx-config"] + }, + { + "id": "storage", + "label": "Storage & Logs", + "articles": ["disk-logs"] + }, + { + "id": "sysadmin", + "label": "System Administration", + "articles": ["file-permissions", "cron-jobs", "time-sync"] + }, + { + "id": "packages", + "label": "Package Management", + "articles": ["package-management"] + } + ] +} diff --git a/content/sage-articles/cron-jobs.json b/content/sage-articles/cron-jobs.json new file mode 100644 index 0000000..73cda20 --- /dev/null +++ b/content/sage-articles/cron-jobs.json @@ -0,0 +1,40 @@ +{ + "id": "cron-jobs", + "title": "Cron Jobs & Scheduled Tasks", + "category": "sysadmin", + "tags": ["cron", "crontab", "schedule", "backup", "automation"], + "updated": "2025-12-01", + "summary": "Cron syntax, user vs system crons, and common failure modes.", + "sections": [ + { + "heading": "Cron Syntax", + "body": "

A crontab entry has five time fields followed by the command:

", + "code": "# ┌─── minute (0–59)\n# │ ┌─── hour (0–23)\n# │ │ ┌─── day of month (1–31)\n# │ │ │ ┌─── month (1–12)\n# │ │ │ │ ┌─── day of week (0–7, 0 and 7 are Sunday)\n# │ │ │ │ │\n * * * * * /path/to/command\n\n# Examples:\n0 2 * * * /usr/local/bin/backup.sh # 2am every day\n*/15 * * * * /usr/local/bin/check.sh # every 15 minutes\n0 0 1 * * /usr/local/bin/monthly.sh # midnight on the 1st" + }, + { + "heading": "User Crontabs", + "body": "

Each user can have their own crontab. Commands run as that user.

", + "code": "crontab -e # edit your crontab\ncrontab -l # list your crontab\ncrontab -l -u alice # list alice's crontab (root only)\ncrontab -r # delete your crontab (dangerous—no confirmation)" + }, + { + "heading": "System Cron Directories", + "body": "

Scripts dropped into these directories run at the corresponding interval without needing a crontab entry:

", + "code": "/etc/cron.daily/\n/etc/cron.weekly/\n/etc/cron.monthly/\n/etc/cron.hourly/\n\n# Scripts here must be executable and owned by root.\n# They must NOT have a file extension—run-parts ignores files with dots in the name." + }, + { + "heading": "Ownership and the PATH Problem", + "body": "

Two common failure modes:

Wrong owner: A cron script in /etc/cron.daily/ must be owned by root. If it is owned by another user, run-parts may skip it.

Missing PATH: Cron does not source .bashrc or .profile. Commands that work interactively may fail in cron because the PATH only contains /usr/bin:/bin. Always use full paths in cron scripts, or set PATH explicitly at the top of the script.

", + "code": "#!/bin/bash\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n..." + }, + { + "heading": "Checking If a Cron Ran", + "body": "", + "code": "# Check syslog or the cron-specific log\ngrep CRON /var/log/syslog | tail -20\ncat /var/log/cron.log # if separate cron log is configured\n\n# Check journald\njournalctl -u cron --since \"1 hour ago\"" + }, + { + "heading": "Capturing Cron Output", + "body": "

By default, cron mails output to the user. On servers with no mail configured, errors disappear silently. Redirect to a log file instead:

", + "code": "0 2 * * * /usr/local/bin/backup.sh >> /var/log/backup.log 2>&1" + } + ] +} diff --git a/content/sage-articles/disk-logs.json b/content/sage-articles/disk-logs.json new file mode 100644 index 0000000..61b7b41 --- /dev/null +++ b/content/sage-articles/disk-logs.json @@ -0,0 +1,43 @@ +{ + "id": "disk-logs", + "title": "Disk Space & Log Rotation", + "category": "storage", + "tags": ["disk", "df", "du", "logs", "logrotate", "cleanup"], + "updated": "2025-08-22", + "summary": "Finding what is filling the disk and keeping logs from growing unbounded.", + "sections": [ + { + "heading": "Checking Disk Usage", + "body": "

df shows you how full each filesystem is. du tells you where the space went.

", + "code": "df -h # human-readable filesystem summary\ndf -h /var/log # check a specific mount\n\ndu -sh /var/log/* # top-level breakdown of /var/log\ndu -sh /var/* | sort -rh # sort by size, largest first\ndu -sh /var/log/*.log # sizes of individual log files" + }, + { + "heading": "Finding Large Files", + "body": "

When du does not point at an obvious culprit:

", + "code": "# Files over 100MB anywhere on the system\nfind / -xdev -size +100M -type f 2>/dev/null\n\n# Files in /var that have grown recently\nfind /var -xdev -mtime -1 -size +10M -type f 2>/dev/null" + }, + { + "heading": "Emergency Cleanup", + "body": "

If disk is at 100% and a service is failing because of it:

", + "code": "# Truncate a log file without deleting it (safe for running processes)\ntruncate -s 0 /var/log/nginx/access.log\n\n# Remove old compressed logs (the .gz files are already rotated)\nrm /var/log/nginx/*.gz\n\n# Clear journald logs older than 2 days\njournalctl --vacuum-time=2d" + }, + { + "heading": "logrotate Basics", + "body": "

logrotate is the standard tool for rotating and compressing logs on a schedule. It is usually run daily from cron. Config files live in /etc/logrotate.d/—one file per service.

" + }, + { + "heading": "Writing a logrotate Config", + "body": "

Example for an nginx access log:

", + "code": "/var/log/nginx/access.log {\n daily\n rotate 14\n compress\n delaycompress\n missingok\n notifempty\n sharedscripts\n postrotate\n /bin/kill -USR1 $(cat /run/nginx.pid 2>/dev/null) 2>/dev/null || true\n endscript\n}" + }, + { + "heading": "Testing logrotate", + "body": "

Run logrotate manually in debug mode to verify a config without actually rotating anything:

", + "code": "logrotate -d /etc/logrotate.d/nginx\n\n# To force a rotation right now (useful for testing):\nlogrotate -f /etc/logrotate.d/nginx" + }, + { + "heading": "Key logrotate Directives", + "body": "
DirectiveMeaning
daily/weekly/monthlyRotation frequency
rotate NKeep N old copies
compressgzip old files
delaycompressSkip compressing the most recent rotation (useful when the app still has it open)
missingokDo not error if the log file does not exist
notifemptySkip rotation if the file is empty
size 100MRotate when file exceeds this size instead of on schedule
" + } + ] +} diff --git a/content/sage-articles/file-permissions.json b/content/sage-articles/file-permissions.json new file mode 100644 index 0000000..c2dd419 --- /dev/null +++ b/content/sage-articles/file-permissions.json @@ -0,0 +1,37 @@ +{ + "id": "file-permissions", + "title": "File Ownership & Permissions", + "category": "sysadmin", + "tags": ["chown", "chmod", "permissions", "ownership", "ls"], + "updated": "2025-10-07", + "summary": "Understanding and fixing file ownership and permission bits.", + "sections": [ + { + "heading": "Reading the Permission String", + "body": "

Run ls -l to see permissions. The first column looks like -rwxr-xr--.

  • First character: - file, d directory, l symlink
  • Next three: owner read/write/execute
  • Next three: group read/write/execute
  • Last three: others read/write/execute

r=4, w=2, x=1. Add them up for octal notation: rwx=7, rw-=6, r--=4.

" + }, + { + "heading": "chown — Changing Ownership", + "body": "

Change the owner and/or group of a file or directory.

", + "code": "chown user file # change owner only\nchown user:group file # change owner and group\nchown :group file # change group only\n\n# Recursive — change everything under a directory\nchown -R user:group /path/to/dir" + }, + { + "heading": "chmod — Changing Permissions", + "body": "", + "code": "chmod 644 file.txt # rw-r--r-- (typical for files)\nchmod 755 /usr/local/bin/app # rwxr-xr-x (typical for executables)\nchmod 700 ~/.ssh # rwx------ (private directory)\nchmod 600 ~/.ssh/authorized_keys # rw------- (private file)\n\n# Recursive\nchmod -R 755 /var/www/html\n\n# Symbolic form (add execute for owner only)\nchmod u+x script.sh" + }, + { + "heading": "Common Patterns", + "body": "
ModeNumericTypical use
rw-r--r--644Regular files, config files
rwxr-xr-x755Directories, executables
rwx------700Private directories (e.g. ~/.ssh)
rw-------600Private files (e.g. private keys, authorized_keys)
rwxrwxr-x775Shared directories where the group needs write access
" + }, + { + "heading": "Checking Who Owns What", + "body": "", + "code": "ls -la /var/www/html # list with ownership\nstat file.txt # detailed file metadata\nfind /path -user root # find files owned by root\nfind /path -not -user deploy # find files NOT owned by deploy" + }, + { + "heading": "A Note on Recursive chown", + "body": "

When you run chown -R, it changes everything under the path—including files and subdirectories that may have intentionally different ownership. Know what you are targeting before running it on a live system. Check with ls -laR or find first.

" + } + ] +} diff --git a/content/sage-articles/nginx-config.json b/content/sage-articles/nginx-config.json new file mode 100644 index 0000000..3b230e2 --- /dev/null +++ b/content/sage-articles/nginx-config.json @@ -0,0 +1,38 @@ +{ + "id": "nginx-config", + "title": "nginx Configuration", + "category": "web", + "tags": ["nginx", "config", "syntax", "reload", "vhost"], + "updated": "2025-09-18", + "summary": "nginx config structure, common syntax errors, and safe reload procedure.", + "sections": [ + { + "heading": "Config File Layout", + "body": "

nginx uses a block-based config syntax. The main file is /etc/nginx/nginx.conf. Site configs live in /etc/nginx/sites-available/ and are symlinked into /etc/nginx/sites-enabled/ to activate them.

Every block opens with { and closes with }. Every directive ends with ;. Missing either one will fail the syntax check.

" + }, + { + "heading": "Testing Config Before Reloading", + "body": "

Always test before reloading. A bad config will prevent nginx from reloading, but it will not take down the running process—the old config stays live.

", + "code": "nginx -t\n# or\nnginx -T # prints the full parsed config" + }, + { + "heading": "Reloading vs Restarting", + "body": "

Use reload, not restart. Reload applies the new config without dropping existing connections.

", + "code": "systemctl reload nginx\n\n# Only use restart if you have to—it drops active connections.\nsystemctl restart nginx" + }, + { + "heading": "Common Syntax Errors", + "body": "
  • Missing semicolon at the end of a directive
  • Missing closing brace } on a block
  • Typo in a directive name (nginx will report \"unknown directive\")
  • Referencing a cert file or log path that does not exist
  • Duplicate listen directives on the same port across multiple vhosts without default_server resolution

The error message from nginx -t includes the file name and line number. Read it.

" + }, + { + "heading": "Useful Log Paths", + "body": "

Default paths on Debian/Ubuntu:

", + "code": "/var/log/nginx/error.log\n/var/log/nginx/access.log\n\n# Per-vhost logs are usually defined in the server block:\naccess_log /var/log/nginx/mysite.access.log;\nerror_log /var/log/nginx/mysite.error.log;" + }, + { + "heading": "Quick Vhost Template", + "body": "

Minimal working vhost for a static site:

", + "code": "server {\n listen 80;\n server_name example.internal;\n\n root /var/www/example;\n index index.html;\n\n location / {\n try_files $uri $uri/ =404;\n }\n\n access_log /var/log/nginx/example.access.log;\n error_log /var/log/nginx/example.error.log;\n}" + } + ] +} diff --git a/content/sage-articles/package-management.json b/content/sage-articles/package-management.json new file mode 100644 index 0000000..f913cdf --- /dev/null +++ b/content/sage-articles/package-management.json @@ -0,0 +1,49 @@ +{ + "id": "package-management", + "title": "Package Management & Version Pinning", + "category": "packages", + "tags": ["apt", "pacman", "packages", "pinning", "rollback", "IgnorePkg"], + "updated": "2026-01-08", + "summary": "Installing, rolling back, and pinning packages on Debian and Arch Linux.", + "sections": [ + { + "heading": "Debian / Ubuntu (apt)", + "body": "

Most commands need root.

", + "code": "apt update # refresh package list\napt install nginx # install\napt remove nginx # remove (keep config)\napt purge nginx # remove + delete config\napt list --installed # list installed packages\napt show nginx # info about a package\ndpkg -l | grep nginx # alternative listing" + }, + { + "heading": "Listing Available Versions (Debian)", + "body": "", + "code": "apt-cache policy nginx\n# Shows installed version, candidate version, and all available versions by priority" + }, + { + "heading": "Installing a Specific Version (Debian)", + "body": "", + "code": "apt install nginx=1.22.1-9\n# Use apt-cache policy to find the exact version string first" + }, + { + "heading": "Pinning a Package (Debian)", + "body": "

Pinning prevents apt from upgrading a specific package. Create or edit /etc/apt/preferences.d/:

", + "code": "# /etc/apt/preferences.d/nginx-pin\nPackage: nginx\nPin: version 1.22.1-9\nPin-Priority: 1001\n\n# Priority > 1000 = keep this version even if newer is available\n# After creating the file:\napt-mark hold nginx # belt-and-suspenders hold\napt-cache policy nginx # verify the pin took effect" + }, + { + "heading": "Arch Linux (pacman)", + "body": "", + "code": "pacman -Syu # update all\npacman -S nginx # install\npacman -R nginx # remove\npacman -Rs nginx # remove + unneeded deps\npacman -Q | grep nginx # list installed\npacman -Qi nginx # info about installed package" + }, + { + "heading": "Rolling Back a Package (Arch)", + "body": "

Arch keeps a package cache in /var/cache/pacman/pkg/. If the current package broke something:

", + "code": "ls /var/cache/pacman/pkg/nginx*\n# Find the version you want, then:\npacman -U /var/cache/pacman/pkg/nginx-1.24.0-1-x86_64.pkg.tar.zst" + }, + { + "heading": "Preventing Upgrades (Arch — IgnorePkg)", + "body": "

After rolling back, prevent the package from upgrading on the next pacman -Syu:

", + "code": "# /etc/pacman.conf\n[options]\n...\nIgnorePkg = nginx\n\n# Verify:\npacman -Syu\n# Should print: warning: nginx: ignoring package upgrade (1.24.0-1 => 1.25.x-y)" + }, + { + "heading": "When to Pin vs When to Fix", + "body": "

Pinning is a stop-gap, not a solution. Document why you pinned it and set a reminder to revisit. A pinned package stops receiving security updates. If the upstream bug is fixed in a newer minor version, upgrade to that instead of staying pinned indefinitely.

" + } + ] +} diff --git a/content/sage-articles/ssh-access-controls.json b/content/sage-articles/ssh-access-controls.json new file mode 100644 index 0000000..2248b47 --- /dev/null +++ b/content/sage-articles/ssh-access-controls.json @@ -0,0 +1,39 @@ +{ + "id": "ssh-access-controls", + "title": "SSH Server Access Controls", + "category": "access", + "tags": ["ssh", "sshd_config", "AllowUsers", "AllowGroups", "security", "hardening"], + "updated": "2025-10-29", + "summary": "Restricting who can SSH in using sshd_config directives.", + "sections": [ + { + "heading": "The Config File", + "body": "

SSH server configuration lives in /etc/ssh/sshd_config. Drop-in overrides can go in /etc/ssh/sshd_config.d/*.conf.

Always test your config before reloading:

", + "code": "sshd -t\n# If it prints nothing and exits 0, the config is valid.\nsystemctl reload ssh" + }, + { + "heading": "AllowUsers and AllowGroups", + "body": "

These are whitelist directives. If either is set, only matching users or group members can log in. If neither is set, all users may try.

", + "code": "# Only these users may log in\nAllowUsers alice bob deploy\n\n# Only members of these groups may log in\nAllowGroups sshusers ops\n\n# Combining: user must match AllowUsers AND (if AllowGroups is set) be in an allowed group\n# These are independent filters—if both are set, a user must satisfy both." + }, + { + "heading": "DenyUsers and DenyGroups", + "body": "

Blacklist alternatives. DenyUsers and DenyGroups are checked before Allow rules.

Prefer AllowUsers/AllowGroups over Deny lists—it is safer to enumerate who can in rather than who cannot.

" + }, + { + "heading": "Other Common Restrictions", + "body": "", + "code": "# Disable root login entirely (recommended)\nPermitRootLogin no\n\n# Disable password authentication (once keys are working)\nPasswordAuthentication no\n\n# Change the listening port (minor obscurity, not real security)\nPort 2222\n\n# Restrict to specific network interface\nListenAddress 10.42.0.1\n\n# Idle session timeout (seconds × count before disconnect)\nClientAliveInterval 300\nClientAliveCountMax 2" + }, + { + "heading": "Match Blocks", + "body": "

You can apply different rules to specific users, groups, or source addresses:

", + "code": "# Allow password auth only from the management network\nMatch Address 10.42.0.0/24\n PasswordAuthentication yes\n\n# Give one user a restricted shell\nMatch User backup-agent\n ForceCommand /usr/local/bin/backup-only\n AllowTcpForwarding no" + }, + { + "heading": "Checking Who Has Access", + "body": "

There is no built-in command to list all users who currently satisfy the access rules. Check manually:

", + "code": "# Current AllowUsers/AllowGroups settings\ngrep -iE '(AllowUsers|AllowGroups|DenyUsers|DenyGroups)' /etc/ssh/sshd_config\n\n# Members of a group\ngetent group sshusers\n\n# All users with a valid shell (can SSH in if no restrictions)\ngrep -v '/nologin\\|/false' /etc/passwd" + } + ] +} diff --git a/content/sage-articles/ssh-keys.json b/content/sage-articles/ssh-keys.json new file mode 100644 index 0000000..6dc8186 --- /dev/null +++ b/content/sage-articles/ssh-keys.json @@ -0,0 +1,38 @@ +{ + "id": "ssh-keys", + "title": "SSH Key Authentication", + "category": "access", + "tags": ["ssh", "authorized_keys", "keys", "permissions"], + "updated": "2025-11-03", + "summary": "How SSH key auth works and how to set it up correctly.", + "sections": [ + { + "heading": "How It Works", + "body": "

SSH key authentication replaces passwords with a cryptographic key pair. The private key stays on your machine. The public key goes into ~/.ssh/authorized_keys on the target host. When you connect, the server checks whether your private key corresponds to one of the public keys it trusts.

There is no password transmitted. Either the key matches or the connection fails.

" + }, + { + "heading": "Generating a Key Pair", + "body": "

Use ed25519 unless something forces you onto RSA. It is smaller and more secure.

", + "code": "ssh-keygen -t ed25519 -C \"your-comment-here\"\n# Accept the default path (~/.ssh/id_ed25519) or specify one.\n# Passphrase is optional but recommended for keys that leave your machine." + }, + { + "heading": "Installing the Public Key", + "body": "

Copy the public key to the remote host:

", + "code": "# Option 1 — if password auth is still working\nssh-copy-id -i ~/.ssh/id_ed25519.pub user@host\n\n# Option 2 — manually\ncat ~/.ssh/id_ed25519.pub >> ~/.ssh/authorized_keys" + }, + { + "heading": "File and Directory Permissions", + "body": "

This is the most common reason key auth fails. SSH will silently reject keys if the permissions are too open.

", + "code": "chmod 700 ~/.ssh\nchmod 600 ~/.ssh/authorized_keys\nchown -R youruser:youruser ~/.ssh" + }, + { + "heading": "Troubleshooting", + "body": "

Run ssh -v user@host for verbose output. The auth failure reason is usually in the first 20 lines.

Common causes:

  • authorized_keys file has wrong permissions (see above)
  • ~/.ssh directory is world-writable
  • authorized_keys file does not exist
  • The file exists but is empty or the key was pasted with a line break in the middle
  • sshd_config has PubkeyAuthentication no
" + }, + { + "heading": "Checking the sshd Config", + "body": "

Relevant lines in /etc/ssh/sshd_config:

", + "code": "PubkeyAuthentication yes\nAuthorizedKeysFile .ssh/authorized_keys\n\n# After editing sshd_config, test before reloading:\nsshd -t\nsystemctl reload ssh" + } + ] +} diff --git a/content/sage-articles/time-sync.json b/content/sage-articles/time-sync.json new file mode 100644 index 0000000..2afdc9a --- /dev/null +++ b/content/sage-articles/time-sync.json @@ -0,0 +1,44 @@ +{ + "id": "time-sync", + "title": "System Time & NTP", + "category": "sysadmin", + "tags": ["ntp", "time", "timedatectl", "timesyncd", "chrony", "drift"], + "updated": "2025-07-14", + "summary": "Keeping system clocks accurate and diagnosing time drift.", + "sections": [ + { + "heading": "Why System Time Matters", + "body": "

Clocks that drift cause more problems than you expect: SSL certificate validation failures, log timestamps that do not correlate across machines, cron jobs that fire at the wrong time, authentication tokens that expire prematurely, and package signature checks that fail.

On a server, time should be correct to within a second. Most NTP implementations keep it within milliseconds.

" + }, + { + "heading": "Checking Current Time Status", + "body": "", + "code": "timedatectl\n# Shows: local time, UTC time, timezone, NTP sync status, RTC time\n\ntimedatectl show\n# Machine-readable version of the same" + }, + { + "heading": "systemd-timesyncd", + "body": "

Most Debian/Ubuntu systems ship with systemd-timesyncd as the default NTP client. It is a lightweight SNTP implementation—adequate for most servers.

", + "code": "# Enable and start\nsystemctl enable --now systemd-timesyncd\n\n# Check sync status\ntimedatectl timesync-status\n\n# Force a resync\nsystemctl restart systemd-timesyncd\n\n# Config file (NTP servers, fallback)\ncat /etc/systemd/timesyncd.conf" + }, + { + "heading": "NTP Server Configuration", + "body": "

The default NTP servers are usually fine. If you need to change them—for example, to use an internal NTP server:

", + "code": "# /etc/systemd/timesyncd.conf\n[Time]\nNTP=ntp.internal.example.com\nFallbackNTP=0.debian.pool.ntp.org 1.debian.pool.ntp.org" + }, + { + "heading": "chrony (alternative)", + "body": "

chrony is a more capable NTP implementation. It handles intermittent network connections and large initial offsets better than timesyncd. On systems where accuracy matters:

", + "code": "apt install chrony\nsystemctl enable --now chrony\n\nchronyc tracking # current sync status\nchronyc sources -v # configured time sources and their offsets" + }, + { + "heading": "Diagnosing Time Problems", + "body": "", + "code": "# Is NTP enabled?\ntimedatectl | grep NTP\n\n# Is timesyncd active?\nsystemctl status systemd-timesyncd\n\n# Did a sync happen recently?\njournalctl -u systemd-timesyncd --since \"1 hour ago\"\n\n# What is the current offset?\ntimedatectl timesync-status | grep Offset" + }, + { + "heading": "Setting Timezone", + "body": "", + "code": "timedatectl list-timezones | grep Europe\ntimedatectl set-timezone Europe/London" + } + ] +} diff --git a/content/tickets/T001.json b/content/tickets/T001.json new file mode 100644 index 0000000..f61504e --- /dev/null +++ b/content/tickets/T001.json @@ -0,0 +1,13 @@ +{ + "id": "T001", + "from": "Marcus Webb ", + "subject": "Your workstation access", + "body": "Hey, welcome to the team. HR said you started today so I got you set up with an account on ares. The provisioning script runs automatically but it does not handle SSH keys — you will need to add yours manually. Your public key should be in the onboarding doc. Let me know if you get stuck.\n\n— Marcus", + "initial_priority": "low", + "current_priority": "low", + "target_vm": "workstation", + "linked_quest": "Q001", + "tags": ["onboarding", "ssh", "workstation"], + "deadline_behavior": "none", + "attachments": ["docs/onboarding.json"] +} diff --git a/content/tickets/T002.json b/content/tickets/T002.json new file mode 100644 index 0000000..4344531 --- /dev/null +++ b/content/tickets/T002.json @@ -0,0 +1,13 @@ +{ + "id": "T002", + "from": "Sarah Chen ", + "subject": "[prod-web] site is down", + "body": "Getting connection refused on the main site. Started about 20 minutes ago. Nothing changed on our end as far as I know.", + "initial_priority": "high", + "current_priority": "high", + "target_vm": "web_server", + "linked_quest": "Q002", + "tags": ["services", "web", "nginx"], + "deadline_behavior": "escalates", + "follow_up_ticket_ids": ["T002-followup"] +} diff --git a/content/tickets/T003-recurrence.json b/content/tickets/T003-recurrence.json new file mode 100644 index 0000000..83728fc --- /dev/null +++ b/content/tickets/T003-recurrence.json @@ -0,0 +1,13 @@ +{ + "id": "T003-recurrence", + "from": "Monitoring ", + "subject": "disk pressure returned on hermes", + "body": "Disk pressure has returned on hermes. /var/log/nginx/access.log is growing again and the host is trending back toward saturation.", + "initial_priority": "high", + "current_priority": "high", + "target_vm": "web_server", + "linked_quest": "Q003", + "tags": ["web", "disk", "nginx", "recurrence"], + "deadline_behavior": "escalates", + "_note": "Recurrence ticket emitted by I001 when the earlier partial fix allows log pressure to return." +} diff --git a/content/tickets/T003.json b/content/tickets/T003.json new file mode 100644 index 0000000..333d3a4 --- /dev/null +++ b/content/tickets/T003.json @@ -0,0 +1,13 @@ +{ + "id": "T003", + "from": "Dave Okonkwo ", + "subject": "is the website slow for anyone else", + "body": "Pages are loading really slowly for me. Sometimes they time out. I rebooted my laptop but it did not help. Is something wrong on the server side?", + "initial_priority": "medium", + "current_priority": "medium", + "target_vm": "web_server", + "linked_quest": "Q003", + "tags": ["web", "disk", "nginx"], + "deadline_behavior": "escalates", + "_note": "Dave is reporting symptoms of the disk being nearly full causing nginx write failures and slowdowns. He thinks it's a network issue. He is wrong but his symptom report is accurate." +} diff --git a/content/tickets/T004.json b/content/tickets/T004.json new file mode 100644 index 0000000..a7983af --- /dev/null +++ b/content/tickets/T004.json @@ -0,0 +1,13 @@ +{ + "id": "T004", + "from": "Sarah Chen ", + "subject": "deployment not applying", + "body": "I pushed a change this morning and the site is still showing the old version. I confirmed the deploy script ran and it said it completed successfully. But the file timestamp on the server doesn't match what I deployed. Did something change in how deploys work?", + "initial_priority": "medium", + "current_priority": "medium", + "target_vm": "web_server", + "linked_quest": "Q004", + "tags": ["deploy", "permissions", "web_server"], + "deadline_behavior": "none", + "_note": "Sarah correctly identifies the symptom but assumes the script is at fault. The script is fine. The permissions are the problem. Her description of the deploy 'completing successfully' is accurate — the script ran, it just could not overwrite root-owned files and silently skipped them." +} diff --git a/content/tickets/T005.json b/content/tickets/T005.json new file mode 100644 index 0000000..d6b1989 --- /dev/null +++ b/content/tickets/T005.json @@ -0,0 +1,16 @@ +{ + "id": "T005", + "from": "Dave Okonkwo ", + "subject": "disk warning on hermes again", + "body": "Got an alert that /var/backups is at 85%. I don't know if this is related to what was going on before. Probably fine but figured you should know.", + "initial_priority": "low", + "current_priority": "low", + "target_vm": "web_server", + "linked_quest": "Q005", + "tags": [ + "disk", + "backup", + "web_server" + ], + "deadline_behavior": "escalates" +} \ No newline at end of file diff --git a/content/tickets/T006.json b/content/tickets/T006.json new file mode 100644 index 0000000..986b318 --- /dev/null +++ b/content/tickets/T006.json @@ -0,0 +1,16 @@ +{ + "id": "T006", + "from": "Dave Okonkwo ", + "subject": "builds failing on vulcan", + "body": "Getting signature errors every time I try to install anything on the build machine. Tried pacman -Syu and it fails partway through. I didn't change anything. It was working yesterday.", + "initial_priority": "medium", + "current_priority": "medium", + "target_vm": "build_machine", + "linked_quest": "Q006", + "tags": [ + "pacman", + "build_machine", + "packages" + ], + "deadline_behavior": "none" +} \ No newline at end of file diff --git a/content/tickets/T007.json b/content/tickets/T007.json new file mode 100644 index 0000000..9edeefb --- /dev/null +++ b/content/tickets/T007.json @@ -0,0 +1,17 @@ +{ + "id": "T007", + "from": "Priya Nair ", + "subject": "locked out of hermes", + "body": "I cannot SSH into hermes. Permission denied immediately. I was in the middle of something. Who ran a hardening script without telling anyone.", + "initial_priority": "critical", + "current_priority": "critical", + "target_vm": "web_server", + "linked_quest": "Q007", + "tags": [ + "ssh", + "access", + "web_server", + "security" + ], + "deadline_behavior": "escalates" +} diff --git a/content/tickets/T008.json b/content/tickets/T008.json new file mode 100644 index 0000000..8758c73 --- /dev/null +++ b/content/tickets/T008.json @@ -0,0 +1,18 @@ +{ + "id": "T008", + "from": "Sarah Chen ", + "subject": "app is down after update", + "body": "The deploy ran this morning and now the app won't start. It's returning nothing on 8080. The update pulled in a new package version. I don't know if that's the problem but the timing is suspicious.", + "initial_priority": "high", + "current_priority": "high", + "target_vm": "web_server", + "linked_quest": "Q008", + "tags": [ + "app", + "deploy", + "packages", + "web_server" + ], + "deadline_behavior": "escalates", + "_note": "Sarah correctly suspects the package update. She doesn't know the build machine is involved." +} \ No newline at end of file diff --git a/content/vm_profiles/build_machine.json b/content/vm_profiles/build_machine.json new file mode 100644 index 0000000..8c8ee44 --- /dev/null +++ b/content/vm_profiles/build_machine.json @@ -0,0 +1,41 @@ +{ + "id": "build_machine", + "domain": "sc-build-machine", + "hostname": "vulcan", + "distro": "arch", + "role": "Build/package/update quest target VM", + "display_name": "Build Machine (vulcan)", + "profile_type": "headless_server", + "resource_budget": { + "ram_mb": 384, + "vcpus": 2, + "disk_gb": 10, + "note": "Slightly more CPU for build tasks. Still headless." + }, + "network": { + "mode": "quest", + "libvirt_network": "sc-internal", + "optional_outbound": "sc-pkg-mirror", + "note": "Selective outbound access to package mirror for update quests." + }, + "ssh_user": "player", + "ssh_key": "~/.ssh/sc_host_key", + "snapshots": { + "baseline": "baseline.clean", + "recovery": "baseline.recovery", + "checkpoint_prefix": "checkpoint.shift-", + "max_checkpoints": 5 + }, + "guest_helper": { + "name": "ops-telemetry-cache", + "path": "/usr/local/bin/ops-telemetry-cache", + "trusted": false + }, + "display": { + "type": "vnc", + "fallback": "spice" + }, + "always_live": false, + "quests": ["Q006", "Q008"], + "note": "Arch Linux build machine. Named vulcan — the forge. Handles package/build/update quests." +} diff --git a/content/vm_profiles/web_server.json b/content/vm_profiles/web_server.json new file mode 100644 index 0000000..003049c --- /dev/null +++ b/content/vm_profiles/web_server.json @@ -0,0 +1,40 @@ +{ + "id": "web_server", + "domain": "sc-web-server", + "hostname": "hermes", + "distro": "debian", + "role": "Web/service quest target VM", + "display_name": "Web Server (hermes)", + "profile_type": "headless_server", + "resource_budget": { + "ram_mb": 256, + "vcpus": 1, + "disk_gb": 6, + "note": "Lightweight headless Debian server. No desktop, no graphical tools needed." + }, + "network": { + "mode": "quest", + "libvirt_network": "sc-internal" + }, + "ssh_user": "player", + "ssh_key": "~/.ssh/sc_host_key", + "snapshots": { + "baseline": "baseline.clean", + "recovery": "baseline.recovery", + "checkpoint_prefix": "checkpoint.shift-", + "max_checkpoints": 5 + }, + "guest_helper": { + "name": "yardd", + "path": "/usr/local/bin/yardd", + "trusted": false + }, + "display": { + "type": "vnc", + "fallback": "spice", + "note": "VNC preferred for headless terminal. Fallback to SPICE if VNC unavailable." + }, + "always_live": false, + "quests": ["Q002", "Q003", "Q004", "Q005", "Q007"], + "note": "Primary target VM for web service quests. Hosted on Debian. Named hermes after the messenger." +} diff --git a/content/vm_profiles/workstation.json b/content/vm_profiles/workstation.json new file mode 100644 index 0000000..e058892 --- /dev/null +++ b/content/vm_profiles/workstation.json @@ -0,0 +1,40 @@ +{ + "id": "workstation", + "domain": "sc-workstation", + "hostname": "ares", + "distro": "debian", + "role": "Player desktop workstation with browser HUD, terminal, and SSH entry point", + "display_name": "Workstation (ares)", + "profile_type": "desktop_xfce", + "resource_budget": { + "ram_mb": 768, + "vcpus": 1, + "disk_gb": 12, + "note": "Lightweight XFCE desktop with Chromium HUD and Tilix terminal." + }, + "network": { + "mode": "quest", + "libvirt_network": "sc-internal" + }, + "ssh_user": "player", + "management_user": "opsbridge", + "ssh_key": "~/.ssh/sc_host_key", + "snapshots": { + "baseline": "baseline.day-one", + "recovery": "baseline.recovery", + "checkpoint_prefix": "checkpoint.shift-", + "max_checkpoints": 5 + }, + "guest_helper": { + "name": "atlas-index", + "path": "/usr/local/bin/atlas-index", + "trusted": false + }, + "display": { + "type": "spice", + "video": "virtio", + "note": "Player uses the real XFCE workstation desktop through SPICE with virtio video. QXL is available as the spice-qxl build mode for compatibility testing." + }, + "always_live": true, + "note": "The workstation VM stays live during gameplay. The browser opens the host-served HUD and Tilix provides real terminal access to the lab VMs." +} diff --git a/content/world_flags/world_flags.json b/content/world_flags/world_flags.json new file mode 100644 index 0000000..b760a22 --- /dev/null +++ b/content/world_flags/world_flags.json @@ -0,0 +1,233 @@ +{ + "_schema_version": "1.1", + "_description": "Central registry of all world flags. Every flag used in any quest, incident, or dialogue must be declared here. Flags not in this registry will fail content validation.", + + "flags": [ + { + "id": "player_ssh_configured", + "description": "Player has added their public key to ~/.ssh/authorized_keys on the workstation with correct permissions.", + "set_by": ["Q001"], + "read_by": ["Q002", "Q003", "Q004", "Q005", "Q006", "Q007", "Q008"], + "gates": ["quest_unlock:Q002", "quest_unlock:Q003", "quest_unlock:Q004"], + "persists": true + }, + { + "id": "player_loose_permissions", + "description": "Player set up authorized_keys but with overly permissive file or directory permissions.", + "set_by": ["Q001"], + "read_by": ["marcus-Q001"], + "gates": [], + "persists": true + }, + { + "id": "nginx_stable", + "description": "Nginx is correctly configured, running, and enabled on hermes.", + "set_by": ["Q002"], + "read_by": ["Q003"], + "gates": [], + "persists": true, + "conflicts_with": ["nginx_unstable"] + }, + { + "id": "nginx_unstable", + "description": "Nginx is running but has a known fragility — not enabled on boot, or a quick-fix config.", + "set_by": ["Q002"], + "read_by": ["Q003"], + "gates": [], + "persists": true, + "conflicts_with": ["nginx_stable"] + }, + { + "id": "hermes_web_healthy", + "description": "The web server on hermes is responding to requests normally.", + "set_by": ["Q002"], + "read_by": ["Q003", "Q004"], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_web_down"] + }, + { + "id": "hermes_web_down", + "description": "Nginx on hermes is inactive.", + "set_by": ["Q002", "Q003"], + "read_by": ["sarah-Q003-angry"], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_web_healthy"] + }, + { + "id": "hermes_logrotate_healthy", + "description": "Nginx logrotate config exists and is correctly configured on hermes.", + "set_by": ["Q003"], + "read_by": ["I001"], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_log_pressure_pending"] + }, + { + "id": "hermes_disk_healthy", + "description": "Disk utilization on hermes is below the alert threshold.", + "set_by": ["Q003"], + "read_by": ["I001"], + "gates": [], + "persists": false + }, + { + "id": "hermes_log_pressure_pending", + "description": "Disk was cleared on hermes but logrotate is not configured. Log will grow again.", + "set_by": ["Q003"], + "read_by": ["I001"], + "gates": ["incident_trigger:I001"], + "persists": true, + "conflicts_with": ["hermes_logrotate_healthy"] + }, + { + "id": "web_disk_pressure_active", + "description": "Disk pressure on hermes is actively worsening due to unrotated logs.", + "set_by": ["I001"], + "read_by": [], + "gates": [], + "persists": false + }, + { + "id": "hermes_deploy_healthy", + "description": "Web root ownership on hermes is correct and the deploy service can run without errors.", + "set_by": ["Q004"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_deploy_partial"] + }, + { + "id": "hermes_deploy_partial", + "description": "Web root top-level ownership is corrected but child files are still root-owned.", + "set_by": ["Q004"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_deploy_healthy"] + }, + { + "id": "hermes_backup_healthy", + "description": "Backup cron job runs as backup-agent, old files cleaned, disk below threshold.", + "set_by": ["Q005"], + "read_by": ["I002"], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_backup_partial", "hermes_backup_root_running"] + }, + { + "id": "hermes_backup_partial", + "description": "Cron job user corrected but old root-owned backup files not cleaned up.", + "set_by": ["Q005"], + "read_by": ["I002"], + "gates": ["incident_trigger:I002"], + "persists": true, + "conflicts_with": ["hermes_backup_healthy"] + }, + { + "id": "hermes_backup_root_running", + "description": "Disk was cleared but the cron job is still running as root. Problem will recur.", + "set_by": ["Q005"], + "read_by": ["I002"], + "gates": ["incident_trigger:I002"], + "persists": true, + "conflicts_with": ["hermes_backup_healthy"] + }, + { + "id": "vulcan_ntp_healthy", + "description": "Time synchronization is active and enabled at boot on vulcan.", + "set_by": ["Q006"], + "read_by": ["Q008"], + "gates": ["quest_unlock:Q008"], + "persists": true, + "conflicts_with": ["vulcan_ntp_fragile"] + }, + { + "id": "vulcan_ntp_fragile", + "description": "NTP is running on vulcan but not enabled at boot.", + "set_by": ["Q006"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["vulcan_ntp_healthy"] + }, + { + "id": "vulcan_builds_healthy", + "description": "Package management on vulcan works without signature errors.", + "set_by": ["Q006"], + "read_by": ["Q008"], + "gates": [], + "persists": true + }, + { + "id": "hermes_ssh_hardened_correct", + "description": "sshd on hermes uses AllowGroups with web-admin, correctly restricting access.", + "set_by": ["Q007"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_ssh_allowusers_fragile", "hermes_ssh_unrestricted"] + }, + { + "id": "hermes_ssh_allowusers_fragile", + "description": "sshd uses AllowUsers — works but requires manual updates for new users.", + "set_by": ["Q007"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_ssh_hardened_correct", "hermes_ssh_unrestricted"] + }, + { + "id": "hermes_ssh_unrestricted", + "description": "SSH hardening was removed entirely from hermes.", + "set_by": ["Q007"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["hermes_ssh_hardened_correct", "hermes_ssh_allowusers_fragile"] + }, + { + "id": "priya_access_restored", + "description": "Priya Nair can SSH to hermes again.", + "set_by": ["Q007"], + "read_by": ["priya-Q007"], + "gates": [], + "persists": true + }, + { + "id": "hermes_app_running", + "description": "axiomworks-app is active and serving on hermes.", + "set_by": ["Q008"], + "read_by": [], + "gates": [], + "persists": true + }, + { + "id": "hermes_app_pinned_2-1-0", + "description": "axiomworks-app is pinned to version 2.1.0 on hermes to avoid the broken 2.1.1.", + "set_by": ["Q008"], + "read_by": ["I003"], + "gates": [], + "persists": true + }, + { + "id": "vulcan_bad_build_known", + "description": "The broken 2.1.1 build on vulcan has been identified but not yet fixed.", + "set_by": ["Q008"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["vulcan_build_fixed"] + }, + { + "id": "vulcan_build_fixed", + "description": "The broken 2.1.1 build was rebuilt correctly on vulcan and republished.", + "set_by": ["Q008"], + "read_by": [], + "gates": [], + "persists": true, + "conflicts_with": ["vulcan_bad_build_known"] + } + ] +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..c708f46 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,702 @@ +# SYSADMIN CHRONICLES — ARCHITECTURE DOCUMENT +> Version 5.0 | Status: Active development +> +> Changelog: +> v5.0 — GDScript/Godot codebase removed. Node.js + Svelte is the only codebase. +> v4.0 — Full architecture pivot to Node.js game server + Svelte web HUD. +> v3.x — Save system, world flags, trust, incidents, pressure system (GDScript era). +> v2.0 — Native Godot 4 + libvirt design (superseded). +> v1.0 — Browser/v86 prototype (superseded). + +--- + +## 1. PROJECT OVERVIEW + +**Sysadmin Chronicles** is a native Linux-only game where the player works as a +junior sysadmin at Axiom Works, handling tickets inside **real Linux virtual +machines** managed by **QEMU/KVM via libvirt**. + +The runtime stack (as of v4.0): +- **Game server** — Node.js / Express + WebSocket (`server/`). Owns all game + logic: quest state, trust, validation, VM lifecycle, incidents, save state. +- **Web HUD** — Svelte single-page app (`frontend/`). Tickets, mail, Sage, docs, + trust bar. Served from the game server at `http://192.168.100.1:3000`. +- **Workstation VM** — XFCE desktop (Debian 12, sc-workstation). Player's desk. + Chromium auto-opens the HUD. Tilix provides a real terminal for SSH to target VMs. +- **Target VMs** — Headless Debian (hermes) and Arch (vulcan). Quest objectives + live here. Player investigates and fixes via SSH from the workstation terminal. + +The player experience: +- Sits at the workstation VM (via SPICE/remote-viewer fullscreen on the host) +- Reads tickets and mail in the Chromium HUD +- Opens Tilix, SSHes to hermes or vulcan, fixes real problems +- Clicks "Mark Complete" in the HUD — game server SSHes in and validates VM state +- World reacts, trust shifts, new mail arrives via WebSocket push + +No simulated terminal. No fake SSH sessions. + +--- + +## 2. CORE DESIGN PRINCIPLES + +- Realism over simulation +- Native Linux execution only +- CLI-first development and asset wiring +- Minimal, stable scenes; behavior lives in scripts +- Data-driven content for quests, tickets, incidents, and dialogue +- State-based validation only; never command-sequence checking +- Multiple valid solutions where possible +- Pressure comes from evolving systems, not arbitrary timers +- Progression unlocks access, tools, and scope, not RPG stats +- Deterministic systems so content is testable and agent-friendly +- The dirty VM state is the game — preserve it, do not erase it + +--- + +## 3. HIGH-LEVEL ARCHITECTURE + +``` +HOST MACHINE +├── game-server/ Node.js/Express + WebSocket (server/src/) +│ ├── ContentLoader loads content/ JSON at startup +│ ├── QuestEngine quest state machine +│ ├── TicketService ticket state, mark-complete handler +│ ├── ValidationEngine SSH into VMs, evaluates rules +│ ├── VMManager virsh start/stop/snapshot wrappers +│ ├── TrustSystem score, unlock evaluation, revocation +│ ├── ProgressionSystem unlocked docs, VMs, access +│ ├── EmailService inbox, follow-up emails, reply options +│ ├── SageService rule-based knowledge base / dialogue +│ ├── ShiftTimer shift clock, pressure tick schedule +│ ├── IncidentScheduler incident injection +│ └── SaveState ~/.local/share/sysadmin-chronicles/save.json +│ +├── frontend/ Svelte web HUD (frontend/src/) +│ ├── TicketsPanel ticket list, detail, "Mark Complete" button +│ ├── MailPanel inbox, message view, reply buttons +│ ├── DocsPanel trust-gated internal docs +│ ├── SagePanel chat / knowledge base search +│ └── HeaderBar trust indicator, shift timer, unread count +│ +└── content/ JSON content — quests, tickets, dialogue, etc. + +NETWORK: sc-internal (libvirt bridge 192.168.100.0/24) + 192.168.100.1 host (game server port 3000) + +VMs on sc-internal +├── sc-workstation (ares) Debian 12 XFCE — player's desk +│ ├── Chromium → http://192.168.100.1:3000 (HUD, always open) +│ └── Tilix → SSH to hermes/vulcan (real terminal) +├── sc-web-server (hermes) headless Debian (Q002–Q005, Q007) +└── sc-build-machine (vulcan) headless Arch (Q006, Q008) + +PLAYER FLOW: + Host starts game server → boots sc-workstation via SPICE + Player sees XFCE desktop → Chromium with HUD auto-open + Reads ticket → opens Tilix → SSH hermes → fixes problem + Clicks "Mark Complete" → server SSHes hermes → validates + Trust updates → WebSocket pushes to browser → new mail arrives +``` + +--- + +## 4. RUNTIME MODEL + +### 4.1 Game Server — Node.js + +The game server (`server/src/index.js`) is a Node.js/Express application: +- Serves `frontend/dist/` as static files at `/` +- WebSocket server on the same port (real-time event push to HUD) +- On startup: loads all content JSON, hydrates services from save file, + ensures workstation VM is live via VMManager + +The server is responsible for: +- All game logic (quest state, trust, progression, incidents) +- VM lifecycle management (virsh via child_process) +- Validation — SSH into target VMs and evaluate rules +- Save/load (single JSON file at `~/.local/share/sysadmin-chronicles/save.json`) +- WebSocket broadcast of trust changes, new mail, shift ticks, incident alerts + +### 4.2 Frontend — Svelte + +The web HUD (`frontend/src/`) is a Svelte single-page app: +- Built with Vite; output lands in `frontend/dist/` and is served by the game server +- All data fetched from the game server API; no local state beyond UI +- WebSocket client for real-time updates +- Does not run validation — only displays results + +### 4.3 Target Platform + +- Host OS: Linux +- Supported deployment model: start game server on host, view workstation via SPICE +- Required host: KVM, libvirt, virsh, Node.js 18+, virt-viewer +- Required install model: one-time host setup with clean uninstall path + +No Windows, macOS, or browser target is planned for the host. The HUD is a web +app served locally — it is never exposed to the internet. + +--- + +## 5. VIRTUAL MACHINE SYSTEM + +### 5.1 Required Stack + +- `qemu-system-*` +- `KVM` +- `libvirtd` +- `virsh` +- libvirt virtual networks +- qcow2-backed VM images + +Runtime policy: +- The shipped game should not require broad `sudo` usage during normal play +- One-time host setup may require admin approval +- Ongoing gameplay should run as a regular user against a prepared VM runtime + +### 5.2 Core Behavior + +The game controls VMs through libvirt, not by emulating them internally. + +Responsibilities: +- Ensure required domains and networks exist +- Start the active VM +- Stop or suspend inactive VMs +- Revert to known snapshots for resets +- Query runtime state for evaluation +- Attach the player to the appropriate VM workflow + +The workstation and at least one target VM must be able to run at the same +time. This is required for real SSH-based play and for background incidents to +continue evolving while the player works elsewhere. + +Operational guidance: +- `workstation` stays live during normal play +- At least one target VM stays live with it +- Later phases may keep all major quest VMs active simultaneously +- Resource budgets should be documented and enforced conservatively + +Lab finding: +- Small headless target VMs were inexpensive on the test host +- The workstation became materially heavier once a real graphical session and + browser were added +- Budget the workstation separately from server-style quest VMs + +### 5.3 Initial VM Roles + +| ID | Role | Distro | Hostname | Purpose | +|----|------|--------|----------|---------| +| `workstation` | Player desktop | Debian 12 | `ares` | XFCE + Chromium HUD + Tilix terminal | +| `web_server` | Service host | Debian 12 | `hermes` | Web/service quests (Q002–Q005, Q007) | +| `build_machine` | Build box | Arch | `vulcan` | Package/build/update quests (Q006, Q008) | + +### 5.3.1 Workstation Profile + +The workstation is a full XFCE desktop (Debian 12, 768–1536 MB RAM): +- **Chromium** — opens `http://192.168.100.1:3000` on login (game HUD) +- **Tilix** — split-pane terminal, set as default; player SSHes to hermes/vulcan from here +- **Full sysadmin CLI toolkit** pre-installed (vim, htop, tmux, curl, nmap, tcpdump, etc.) +- SPICE display with QXL video — dynamic resolution via vdagent; fullscreen via `remote-viewer` +- `always_live: true` — stays running between shifts; suspended on game quit, resumed on next launch + +Player never needs to interact with the workstation VM's internal file system for +game objectives — all quest work happens on the target VMs via SSH. + +### 5.3.2 Why XFCE + Chromium (not terminal-only) + +Earlier iterations used a terminal-only workstation. The game was redesigned +because a terminal-only approach would require building a fake terminal and fake SSH. +The XFCE + real browser approach is simpler, more realistic, and requires no +terminal simulation at all: + +- Player uses a real Tilix terminal — no simulation +- Player SSHes with real SSH — no protocol emulation +- The HUD is a real web app — no custom UI framework needed for game chrome +- Downside: workstation VM costs ~480–768 MB RAM; budget accordingly + +### 5.4 Snapshot Strategy + +Snapshots are the reset primitive and the save primitive. + +Named snapshot tiers per VM: + +| Name | Purpose | +|------|---------| +| `baseline.clean` | Authored starting state for a fresh quest arc | +| `baseline.recovery` | Fallback if live state is unrecoverable | +| `checkpoint.shift-{N}` | Auto-saved at start of each in-game shift | + +Rules: +- Snapshot names are deterministic +- Quest scripts may declare required baseline snapshots +- Validation never depends on snapshot history; only current observed state +- The game retains a maximum of 5 shift checkpoints per VM; older ones are pruned +- `baseline.clean` and `baseline.recovery` are never pruned by the game + +### 5.5 Networking Model + +Networking is host-controlled through libvirt. + +Supported modes: +- `quest`: constrained, deterministic virtual networks and fixtures +- `sandbox`: broader connectivity for experimentation + +Examples: +- Internal-only network between workstation and target VM +- Broken DNS as part of a quest +- Deliberately degraded service reachability +- Optional outbound package mirror access for selected scenarios + +### 5.6 VM Provisioning Hooks + +Quest-specific VM state — broken configs, missing files, log histories — is +authored into the VM baseline before the snapshot is taken. This is done via +idempotent provisioning scripts: + +``` +tools/vm/quest-prep/Q0XX-prep.sh +``` + +These scripts run against the target VM before the quest's `baseline.clean` +snapshot is taken. They are never run at quest activation time. See +QUEST_AUTHORING.md for the full provisioning workflow. + +--- + +## 6. OBSERVATION AND VALIDATION + +### 6.1 Validation Philosophy + +Quest completion is based on **system state**, not on how the player got there. + +Allowed evidence includes: +- Files and directory contents +- Ownership and permissions +- Service state +- Process state +- Open ports +- Package state +- Mount state +- Disk utilization +- System configuration values + +Disallowed as primary success conditions: +- Specific commands typed +- Specific files opened +- UI click history + +### 6.2 Observation Sources + +Primary sources: +- `virsh domstate`, `domifaddr`, and domain metadata +- Host-driven inspection tooling such as libguestfs where practical +- SSH-based read-only checks initiated by the host when needed +- Quest-specific host probe scripts for higher-level state summaries + +Authoritative rule: +- Quest validation must use host-authoritative checks only +- In-guest helpers may improve responsiveness, but cannot decide success + +In-guest helpers should use neutral names (examples: `atlas-index`, `yardd`, +`ops-telemetry-cache`) and must not be trusted as a security boundary. + +Operational note: +- Routine package operations inside guests may emit maintenance or virtualization + notices that break immersion +- Base images should suppress or tune guest maintenance messaging where safe + for the authored environment +- Validation and incident design should not rely on noisy package-manager side + effects being visible to the player + +### 6.3 Validation Rule Model + +Core rule families: +- `file_exists` / `file_contains` / `file_mode` / `file_owner` +- `directory_exists` +- `service_state` / `service_enabled` +- `process_running` / `process_user` +- `port_listening` +- `package_installed` +- `mount_present` +- `disk_usage_below` / `disk_usage_above` +- `command_assert` — fallback only, must verify state not behavior +- `and` / `or` / `not` + +### 6.4 Trust Boundary + +The player may gain root access on some machines. The guest is not trusted. The +host validation layer is trusted. Anti-cheat is achieved through external +validation, not secrecy. + +--- + +## 7. GAMEPLAY SYSTEMS + +### 7.1 Core Loop + +1. Ticket arrives with incomplete context +2. Player evaluates urgency against other active problems +3. Player enters or connects into the relevant VM +4. Player investigates using real Linux tools +5. Player applies a fix +6. Game validates resulting state +7. World reacts +8. Trust shifts +9. Future conditions reflect earlier choices + +### 7.2 System Pressure + +Pressure is systemic, not a countdown bar. Examples: +- Disk usage keeps climbing +- A log fills with worsening symptoms +- A degraded service starts affecting another team +- A quick fix suppresses one symptom while creating later instability + +Pressure is authored as state transitions and event chains via incident files. + +### 7.3 Trust / Reputation + +Trust measures how much the organization relies on the player. + +Trust affects: +- sudo scope +- accessible machines +- diagnostic tooling +- ticket sensitivity +- documentation visibility + +**Trust increases** when the player resolves problems cleanly, finds root causes, +and avoids collateral damage. + +**Trust decreases** when the player breaks unrelated systems, applies fragile +fixes, ignores urgent incidents, or resolves symptoms but not causes. + +**Trust revocation**: if trust falls below a declared threshold in the trust +unlock table, specific access strings are revoked. A subsequent trust increase +does not automatically restore revoked access — the player must re-earn the +unlock tier. Revocation rules must be explicitly declared per unlock tier. + +### 7.4 Multiple Valid Solutions + +Quests support realistic alternatives where possible: +- quick workaround +- operationally acceptable fix +- proper long-term fix + +Branch resolution rule: +- multiple branches may match the same final state +- each branch must declare a numeric `priority` +- the highest matching priority wins +- ties are a content error and fail validation during authoring checks + +### 7.5 Dynamic Events + +Dynamic events inject prioritization pressure and are authored in incident files. +Events are selected from authored pools and activated by progression, trust, +current system state, and world flags. + +Each incident declares a `blast_radius_quests` list so the incident scheduler +can avoid activating an incident that would corrupt active quest evidence or +simultaneously interfere with an in-progress objective. + +### 7.6 Investigation Quality + +Clues must be legible and grounded. Every quest declares a `clue_fingerprint` +documenting what evidence exists in the VM baseline. Content validation checks +that the fingerprint is plausible. The player should feel rewarded for competent +debugging rather than guessing. + +### 7.7 Progression + +Progression unlocks: +- broader sudo access +- new servers +- more dangerous responsibilities +- better internal docs +- helper scripts and diagnostics + +This is institutional progression, not character stats. + +### 7.8 Mentor Thread + +Marcus is the primary mentor character. His dialogue runs across the full game +as a `series_id: marcus-main` thread. Each dialogue file that belongs to an +ongoing character relationship declares `series_id` and `series_position`. + +The dialogue system tracks series state so Marcus remembers what happened in +earlier quests and can reference it in later ones. This is the primary vehicle +for institutional memory and character continuity. + +### 7.9 Tone and Humor + +The tone is dry, realistic, and slightly dysfunctional. Examples: +- contradictory runbooks +- tickets that misidentify the problem +- passive-aggressive internal notes +- perfect urgency attached to trivial formatting requests + +Humor must support immersion, not break it. + +--- + +## 8. COMMAND AND ACCESS MODEL + +Access is controlled realistically through: +- user accounts and group membership +- sudoers configuration +- reachable hosts +- available packages and tooling + +If a player cannot run `systemctl`, the reason is that the VM account lacks the +required privileges, not that the game disabled the verb. + +--- + +## 9. PRESENTATION LAYER + +The player's view is the workstation VM desktop, viewed fullscreen via SPICE: + +```bash +scripts/start-game.sh +# → starts game server +# → virsh start sc-workstation (if not already running) +# → remote-viewer --full-screen spice://127.0.0.1: +``` + +The player sees an XFCE desktop with Chromium pre-opened to the HUD. + +### 9.1 VM Display + +- **Protocol**: SPICE with QXL video driver +- **Client**: `remote-viewer` (from `virt-viewer` package) in fullscreen mode +- **Resolution**: dynamic — guest vdagent resizes to match host display +- **Cursor release**: `Ctrl+Alt`; fullscreen toggle: `F11` +- **Clipboard sharing**: via spice-vdagent in the guest + +No VNC, no custom viewer widget. The host runs `remote-viewer` and the player +works inside the workstation VM. + +### 9.2 HUD (Svelte Web App) + +The game HUD is a Svelte single-page app served at `http://192.168.100.1:3000`: + +- **TicketsPanel** — ticket list, detail view, "Mark Complete" button +- **MailPanel** — inbox, message body, reply buttons (where applicable) +- **DocsPanel** — trust-gated internal docs, rendered from content/docs/ +- **SagePanel** — chat interface to SageService knowledge base +- **HeaderBar** — trust indicator (no number, behavior only), shift timer, unread badge + +The HUD is a company intranet portal in look and feel — dark, monospace, minimal. + +### 9.3 One-Time Setup and Uninstall + +Host-side setup is unavoidable (KVM, libvirt, VM images). It must be simple. + +Principles: +- one-time setup only (`tools/setup/first-run-setup.sh`) +- plain-language explanation of what will be installed +- managed resources use the `sc-` prefix (never touch other libvirt domains) +- full uninstall removes all game-owned domains, networks, storage, helper files +- normal gameplay does not require broad `sudo` + +--- + +## 10. DATA MODEL + +Authoring formats: +- JSON for quests, tickets, incidents, dialogue, documentation metadata +- Shell helper scripts where CLI integration is necessary + +Top-level content domains: + +| Domain | Purpose | +|--------|---------| +| `quests/` | Objective chains and validation rules | +| `tickets/` | Player-facing problem statements | +| `incidents/` | Dynamic system pressure events | +| `dialogue/` | Workplace messages, hints, follow-ups | +| `docs/` | Internal documentation metadata/content | +| `progression/` | Trust thresholds, unlocks, access tiers | +| `vm_profiles/` | Domain names, snapshots, networks, probe config | +| `helpers/` | Non-obvious guest helper naming/config data | +| `world_flags/` | Central registry of all world state flags | + +Each authored scenario must declare: +- `required_vms` — all VMs the quest touches +- `baseline_snapshot` — starting snapshot for this quest +- `clue_fingerprint` — evidence declared in the VM baseline +- validation rules and branch priorities +- escalation behavior +- trust impact +- `blast_radius` — incident IDs the quest may interact with +- follow-on world effects + +--- + +## 11. SAVE MODEL + +### 11.1 Dirty State Model + +The game uses a **dirty state model**. VM disk state is preserved across +sessions as-is. The game does not revert to a clean baseline on load — it +resumes from whatever state the VMs are currently in. + +This is intentional. The player's history of changes is part of the game. A +machine they fixed stays fixed. A machine they damaged stays damaged until they +repair it or request reimage. + +Two persistence layers: + +**Game State Layer** — saved as JSON: +- Trust score and history +- Unlocked access, sudo scopes, docs, tools +- Active/completed quest and ticket state +- World flags (current values and change history) +- Incident scheduler state +- In-world clock and shift counter + +**VM State Layer** — saved as libvirt snapshot references: +- Per-VM reference to current snapshot tier or live disk +- Per-VM managed recovery checkpoint list +- Reimage history per VM + +### 11.2 Shift Checkpoints + +At the start of each in-game shift: +1. Game state JSON is saved +2. A named snapshot is created per active VM: `checkpoint.shift-{N}` +3. The checkpoint reference is recorded in the save file +4. Shift checkpoints beyond the retention limit (default: 5) are pruned + +Shift checkpoint rollback is an explicit player action ("start this shift +over") with a confirmation prompt. It does not undo trust changes or dialogue +already delivered. + +### 11.3 Load-Time Reconciliation + +On load, the observation service validates current VM state against saved world +flags. Minor drift is handled silently. Major drift — missing snapshots, +unbootable VMs — triggers the recovery flow. + +If a referenced snapshot is missing: +- If `baseline.recovery` exists, offer resume from recovery +- If `baseline.recovery` is also gone, the VM is treated as unrecoverable + +### 11.4 Recovery / Reimage Flow + +When a VM is unrecoverable, the player can report it for reimage through an +in-world mechanic: + +1. Player submits a reimage request (ticket to management) +2. In-world delay is imposed (one in-game shift) +3. Machine is restored from `baseline.recovery` or `baseline.clean` +4. Trust penalty is applied based on severity +5. In-progress quests on that VM are reset +6. Evidence from before the reimage is gone — acknowledged in-world + +This is the designed escape valve. It has visible consequences but allows +forward progress. + +### 11.5 Host Storage Management + +qcow2 images with many snapshots can balloon. The game enforces: +- Maximum of 5 shift checkpoints per VM (configurable in vm_profile) +- Authored baseline and recovery snapshots are never pruned by the game +- `resource_budget` in vm_profile declares expected disk footprint + +### 11.6 Developer Reset + +Not available in the shipped game. CLI only: + +```bash +bash tools/vm/snapshot-all.sh --revert-to baseline.clean +``` + +Completely resets all VMs to authored baseline. Used during content authoring +and automated test runs. + +--- + +## 12. MODULE BREAKDOWN + +### Server (`server/src/`) + +| Module | Responsibility | +|--------|----------------| +| `index.js` | Express + WebSocket entry point; service wiring; static file serving | +| `ContentLoader` | Loads all content/ JSON at startup; never writes | +| `QuestEngine` | Quest state machine (pending → active → resolved) | +| `TicketService` | Ticket state, mark-complete handler, branch resolution | +| `ValidationEngine` | SSH into VMs, evaluates all rule types against real state | +| `VMManager` | virsh start/stop/snapshot/getIP wrappers | +| `TrustSystem` | Score tracking, unlock evaluation, revocation | +| `ProgressionSystem` | Unlocked docs, VMs, access strings | +| `EmailService` | Inbox, follow-up emails, reply options, WebSocket push | +| `SageService` | Rule-based dialogue / knowledge base | +| `ShiftTimer` | Shift clock, broadcasts shift:tick via WebSocket | +| `IncidentScheduler` | Pressure tick loop, incident injection | +| `ShiftReviewService` | End-of-shift performance review email generation | +| `CertificationService` | Awards internal certs after quest chain completion | +| `SaveState` | Read/write `~/.local/share/sysadmin-chronicles/save.json` | +| `lib/ssh.js` | Promisified SSH command execution (node-ssh) | +| `lib/virsh.js` | virsh command wrappers | +| `lib/eventBus.js` | Internal Node.js EventEmitter for service coordination | + +### Frontend (`frontend/src/`) + +| Component | Responsibility | +|-----------|----------------| +| `App.svelte` | Root component; WebSocket connection; panel routing | +| `TicketsPanel` | Ticket list, detail, mark-complete flow | +| `MailPanel` | Inbox, message body, reply buttons | +| `DocsPanel` | Trust-gated doc list and content viewer | +| `SagePanel` | Chat interface, follow-up prompts | +| `VmsPanel` | Live VM status indicators | +| `HeaderBar` | Trust display, shift timer, mail unread count | +| `lib/api.js` | Fetch wrapper for all REST API calls | + +--- + +## 13. SECURITY AND SAFETY + +Requirements: +- Scope libvirt resources to dedicated game domains/networks/storage pools +- Never operate on arbitrary host VMs by default +- Use explicit naming/prefixing for all game-managed resources (`sc-` prefix) +- Separate quest-mode constrained networks from broader sandbox networks +- Prefer least-privilege host integration +- Provide a dry-run and diagnostic mode for development scripts + +The game manages only the resources it created or was explicitly pointed at +during setup. + +--- + +## 14. TECHNOLOGY DECISIONS + +| Technology | Role | Reason | +|-----------|------|--------| +| Node.js / Express | Game server | Async I/O, native SSH/virsh via child_process, easy JSON | +| Svelte / Vite | Web HUD | Lightweight, no virtual DOM overhead, fast build | +| WebSocket (`ws`) | Real-time push | Trust changes, mail, incidents without polling | +| QEMU/KVM | Virtualization backend | Real Linux environments | +| libvirt / virsh | VM lifecycle control | Standard Linux automation surface | +| SPICE + QXL | Workstation display | Dynamic resolution, clipboard sharing, fullscreen | +| `remote-viewer` | Host-side SPICE client | Ships with virt-viewer; fullscreen with F11 | +| JSON | Content authoring | Data-driven, easy to diff, unchanged from prior design | +| node-ssh | SSH execution in validation | Clean Promise API; BatchMode, key-based auth | + +Not in scope: v86, WebAssembly, browser-only runtime, service-worker networking. + +--- + +## 15. DEVELOPMENT PRIORITIES + +1. Native architecture consistency +2. VM control integration +3. Observation and validation +4. Core gameplay loop +5. Pressure, trust, and dynamic event systems +6. Presentation polish + +If a design choice improves presentation but weakens VM realism or maintainable +automation, reject it. diff --git a/docs/CHARACTERS.md b/docs/CHARACTERS.md new file mode 100644 index 0000000..ab3e32d --- /dev/null +++ b/docs/CHARACTERS.md @@ -0,0 +1,459 @@ +# Characters — Sysadmin Chronicles + +Story design reference. All characters, bios, relationships, and open story hooks. +For company/world context see `COMPANY_LORE.md`. This file focuses on the people. + +--- + +## Active Characters + +These characters have an established in-game voice and presence. Any new quest work +should treat their characterization here as canonical. + +--- + +### The Player +**Role:** New junior sysadmin hire, day one +**Identity:** Unnamed. Player-selected portrait (5 options). + +Hired to replace Dale. Nobody will explain what Dale did. Badge number is still +pending — temp credentials were handled by someone in Finance on their first day. +The player is a competent professional, not a bumbling intern. They may not know +every answer but they know how to look. + +The player has no spoken lines. Their character is expressed entirely through the +choices they make when fixing things — whether they understand root causes or just +clear symptoms, whether they leave systems better or just less broken. + +--- + +### Marcus Webb +**Role:** Senior Systems Administrator +**Email:** `m.webb@axiomworks.internal` +**Reports to:** Dave Kowalski (Director of IT) + +Six years at Axiom Works. Hired by Kowalski. Knows where everything is, why it's +there, and which parts were a mistake. Communicates in short, precise messages. +Does not explain things twice. Trusts competence over credentials — he will give +the player more rope as they demonstrate they know what to do with it. If they +don't, the rope gets shorter. + +He was the one who onboarded the player. He assigned their first ticket. He will +assign most of the tickets that follow. His messages range from brief task +assignments to late-night observations about something that's been on his mind — +the latter usually mean something is about to become a problem. + +He knows what Dale did. He has decided not to discuss it. + +**Personality:** Dry. Technically precise. Does not perform enthusiasm. Occasionally +wry but never jokey. Respects players who fix root causes. Mildly annoyed by +players who fix symptoms and call it done. + +**Relationships:** +- Kowalski: reports to him; respectful but not deferential +- Sarah: professional; takes her tickets seriously, occasionally says quiet things when she's wrong +- Priya: mutual professional respect; they operate in the same zone of "things that matter when they go wrong" +- Phil Ruiz (Sales VP): warm; Phil owes Marcus for saving a demo once and Marcus has never mentioned it + +--- + +### Sarah Chen +**Role:** Product Manager, AxiomFlow +**Email:** `s.chen@axiomworks.internal` + +Owns the AxiomFlow product roadmap. Coordinates between sales, engineering, and +customers. Emails Monday mornings. Cares intensely about the demo and staging +environments because those are the product she can actually see and touch. Not wrong +about their importance. + +She files tickets when things break on the product-facing side. Her descriptions of +problems are accurate about symptoms and often wrong about causes — she will +confidently diagnose a permissions issue as a script bug, or a package problem as a +config error. She is not incompetent; she just doesn't have the full picture. When +the player fixes the underlying cause rather than the surface symptom, she notices. + +She has a sharp edge when things get worse after someone touches them. She will say +so, clearly, without being melodramatic about it. + +**Personality:** Direct. Metric-oriented. Not patient with vague timelines or "we're +looking into it." Appreciates being told what the actual problem was, not just that +it's fixed. + +**Relationships:** +- Marcus: professional; trusts that her tickets will be handled, doesn't ask for much +- Player: initially impersonal (they're new); warms or cools based on outcomes +- Nikhil Sharma: upstream dependency — his build pipeline affects her deployments + +--- + +### Priya Nair +**Role:** Head of Security & Compliance +**Email:** `p.nair@axiomworks.internal` +**Direct report:** James Osei (Security Analyst) + +Leads all security reviews, access audits, and compliance programmes. Has a standing +Thursday meeting with David Park (CTO) that has existed since 2017. Was brought in +after an incident nobody discusses in public. Has been building the security function +from something informal into something that can survive a SOC 2 audit. + +She frames everything in terms of what happens when things go wrong, not whether they +will. She assumes breach. She assumes misconfiguration. She is often right. She is +not someone who appreciates hearing about a production change after it has already +happened. + +She will tell the player when a fix is correct and why. She will also tell them when +a fix works but leaves the environment in a worse position than before. She is not +punitive about this — she just states it. + +She does shift reviews at end-of-shift and grades the player's overall performance. +Her criteria: did the work move forward, did the environment stay stable, did the +player create extra problems. + +**Personality:** Precise. Consequence-focused. Calm in tone even when the content +is not calm. Economical with words. Does not use exclamation marks. + +**Relationships:** +- Player: evaluative; her trust is earned by demonstrating that security is a + consideration, not an afterthought +- Marcus: peer respect; they operate in different domains with overlapping concerns +- Dave Kowalski: reports indirectly up through him for infrastructure decisions +- David Park: standing Thursday meeting; she has the CTO's ear + +> **Name note for developers:** The in-game email service and some ticket files +> previously used "Priya Kapoor" and the onboarding doc used "Priya Singh." +> These are all the same character. **Priya Nair** is the canonical name. +> Email should be `p.nair@axiomworks.internal`. Update references in +> `server/src/services/EmailService.js`, `content/tickets/T007.json`, and +> `content/docs/onboarding.json`. + +--- + +### Dave Okonkwo +**Role:** Internal employee, non-technical +**Email:** `d.okonkwo@axiomworks.internal` + +A regular Axiom Works employee who notices when things aren't working and files +tickets about it. He doesn't know enough to diagnose the problem — he reports +symptoms accurately and assumes the wrong cause. His reports are useful precisely +because they represent what a non-technical user actually experiences. + +He is not on the company website (280 employees, most of them aren't). He's +somewhere in operations or general staff. He's not in Finance, not in IT. + +> **Open decision:** Dave Okonkwo is currently the only employee-level character who +> submits tickets. The company website has Dave Kowalski as Director of IT Operations +> (Marcus's boss), which is a completely different person. This is not a naming +> inconsistency — they're two different people. However: if the story wants Kowalski +> to become an active character who also files tickets or escalates issues, that's a +> separate thread. Okonkwo and Kowalski coexist. + +--- + +## Named Background Characters + +On the company website. No current in-game presence. Available for story use — +they can send emails, appear on CC lines, be referenced in dialogue, or become +active characters in new quests. + +Listed in rough order of story relevance to the IT/sysadmin context. + +--- + +### Dave Kowalski — Director of IT Operations +Marcus's manager. The player's skip-level. Background is network engineering — +has Cisco certifications he will not volunteer unless provoked. Oversees systems +(Marcus's domain), networking (Tom Malaney), and IT support. Has been at Axiom +Works since 2015. Describes the infrastructure as "mature." Sends weekly status +emails in bullet points that never quite answer the question. When things go wrong +he schedules a meeting to "talk through the situation," which everyone has learned +is worse than a direct message. + +Has said "we should really document that" more times than he can count. Has +documented very little personally. Maintains a mysterious Tuesday 2–3pm calendar +block. + +Story use: source of policy pressure, indirect escalation, the person who asks +questions that reveal Marcus hasn't told the player everything. + +--- + +### Nikhil Sharma — Platform Engineer +Owns the internal build and release pipeline, the CI infrastructure, and the +parts of deployment that nobody else wants to think about. Strong opinions about +reproducible builds. Sends Slack messages at 6am. Occasionally at 11pm. + +He is the engineer most directly connected to what happens on vulcan — if a build +is broken, it's probably something Nikhil built or maintains. He has never met the +player. He almost certainly doesn't know the player exists. + +Story use: the author of broken packages the player has to debug; a character who +can explain (or fail to explain) what went wrong upstream; an escalation path when +a build problem is genuinely his fault. + +--- + +### Tanya Okafor — Head of Customer Success +Manages post-sale relationships for all AxiomFlow customers and the twelve legacy +AxiomSync accounts that haven't migrated. Uses the word "partnership" a lot. + +Usually the first person to know when something is wrong in production, because a +customer has already called her before IT knows there's a problem. Her call log +is an early warning system. She is not hostile to IT but she has learned that +"we're looking into it" is not an answer she can give a customer. + +Story use: pressure vector from the customer direction; source of urgency that +doesn't come from Marcus or the ticket queue; demonstrates real-world stakes when +things go down. + +--- + +### Phil Ruiz — VP of Sales +Has been promising features to prospects since 2016. Maintains a warm relationship +with the infrastructure team because Marcus once fixed the staging environment with +twenty minutes to spare before a major demo — Phil has never forgotten this. Travels +frequently. Expense reports submitted promptly, which Marcus has noted approvingly. + +Story use: indirect beneficiary when demos work; pressure source when a sales demo +is scheduled and something is broken; the person who will tell the CTO what IT did +right in a room the player will never be in. + +--- + +### Yusuf Halabi — Engineering Manager +Reports to David Park (CTO). Manages the core AxiomFlow platform team. Runs the +Thursday architecture review. Has opinions about test coverage. Leaves pull request +comments that are technically correct and diplomatically suboptimal. + +Story use: engineering-side escalation; source of tickets about internal tooling; +the person who will ask why a config change broke a downstream process. + +--- + +### Derek Ashford — Financial Controller +Does not appear at team meetings. Does appear on CC lines of every email that +mentions cloud costs, hardware procurement, or infrastructure budget. Always +replies-all. His manager is Rachel Brandt (CFO). + +Story use: background texture on procurement requests; the voice that makes any +infrastructure spending feel like a negotiation. + +> **Note on "Dave from Finance":** Marcus's day-one message references "Dave from +> Finance" as the person holding the player's temp credentials. This is almost +> certainly Derek Ashford — Marcus using his first name informally, or a +> continuity error. Derek Ashford is the only Finance character plausibly holding +> IT credentials. His first name is Derek, not Dave — either the message should +> be corrected, or "Dave from Finance" is a third unnamed Finance employee. + +--- + +### Rachel Huang — Systems Administrator +Marcus's peer on the IT team. Handles provisioning, patch cycles, and the ongoing +negotiation with Finance over cloud consolidation. Came from a managed services +background. Has strong opinions about monitoring dashboards, most of which are +correct. + +Story use: the person who set something up that the player now has to maintain; +a colleague who can provide context Marcus won't; someone whose provisioning +decisions the player will encounter as infrastructure. + +--- + +### Tom Malaney — Network Engineer +Responsible for network infrastructure across the office and hosted environments. +On-call for more holiday weekends than he would like. Thorough in documentation +when he finds time for it. + +Story use: DNS, firewall, or routing problems that are not the player's fault +but become the player's problem; someone who can be reached but is slow to +respond. + +--- + +### James Osei — Security Analyst +Priya's direct report. Handles vulnerability assessments, access reviews, and +quarterly compliance reporting. Methodical. Has a spreadsheet for everything, +which is not a criticism. + +Story use: the person who runs the actual audit that Priya will summarize to the +player; a source of detailed (sometimes overwhelming) security findings. + +--- + +### Ellen Marsh — CEO & Co-Founder +Built the first version of AxiomFlow after a decade in operations. No CS background. +Attends all-hands twice a year. Does not use Slack. Has final say on pricing and +major customer commitments. + +Story use: the distant authority whose priorities shape everything; never interacts +with the player directly, but her decisions land as constraints. + +--- + +### David Park — CTO & Co-Founder +Wrote the original rules engine in 2011. Now manages engineering managers. Still has +opinions about the data model. Has a standing Thursday meeting with Priya that hasn't +moved since 2017. + +Story use: architectural decisions from above; the person Priya reports significant +security findings to. + +--- + +### Karen Volkov — COO +Joined 2014. Responsible for the fact that the company has documented processes for +anything at all. Has opinions about infrastructure costs that surface in IT's world +via Finance. Prefers decisions with clear owners and deadlines. + +--- + +### Rachel Brandt — CFO +Joined 2016. Approves all capital expenditure over $5,000. Working to consolidate +cloud spend. Does not enjoy surprises in the infrastructure budget. Derek Ashford +reports to her. + +--- + +### Mei Lin — Senior Software Engineer +Has maintained AxiomSync's integration layer since 2018. Knows more about it than +anyone would prefer, including herself. Currently leading the migration tooling +project for the remaining legacy accounts. + +--- + +### Cora Reyes — Software Engineer +Works on the AxiomDash reporting pipeline. Has submitted more internal RFCs than +anyone else on the team in the past year. Moving toward senior. + +--- + +### Ben Portillo — Product Manager, AxiomDash +Leads product development for the analytics add-on. Works closely with large +accounts to understand what they actually want from dashboards (usually different +from what they asked for). + +--- + +### Annika Gosse — UX Designer +Responsible for AxiomFlow's interface. Has been advocating for a redesign of the +workflow builder since 2022. Patient. + +--- + +### Sandra Wu — HR Manager +Manages hiring, onboarding, and employee relations since 2016. Runs the new-hire +onboarding process (three days, thorough). Sends birthday emails on time, every time. + +--- + +### Owen Blake — Office Manager +Keeps the office running. Has fixed more things than his job title implies. The +person to contact if conference room equipment stops working. + +--- + +### Mike Kawamoto — Account Executive +Handles mid-market manufacturing accounts in the northeast. Believes strongly in +the demo environment. Closes more deals in Q4 than any other quarter. + +--- + +### Lisa Ferreira — Customer Success Manager +Manages onboarding for new AxiomFlow deployments. Has a talent for understanding +what customers mean rather than what they say. + +--- + +## Unresolved Characters (Story Hooks) + +These are referenced in existing content but never defined. They represent the +strongest open narrative threads. + +--- + +### Dale — The Previous Sysadmin +**Reference:** Marcus's day-one message — "You're replacing Dale. Nobody will tell you +what Dale did because it's complicated." + +Dale is gone. The player has their desk, their access provisioning slot, and +apparently their reputation — people know the player is "Dale's replacement" before +they know the player's name. The systems the player inherits are the systems Dale +last touched. + +What Dale did is unknown. It is described as "complicated." Marcus knows. Possibly +Kowalski knows. Possibly Priya knows, if it was security-related. + +This is the strongest existing narrative mystery in the game. It has setup and no +payoff. Dale's story could be: +- A technical incident (something Dale broke and couldn't fix) +- A policy violation (something Dale did that wasn't malicious but wasn't right) +- A trust collapse (competent but burned bridges) +- Something personal +- Any combination + +The player finding out what Dale did — gradually, through the systems they work on, +through things people let slip — is a natural story spine for the whole game. + +--- + +### "Dave from Finance" — Day One Reference +**Reference:** Marcus's day-one message — "Dave from Finance has your temp credentials. +He's on three today." + +Almost certainly Derek Ashford (Financial Controller), referred to informally. But +Derek's first name is Derek, not Dave — this is either Marcus being casual with +names, a continuity error, or a genuinely separate unlisted Finance employee. + +Needs a decision: correct "Dave" to "Derek" in Marcus's message, or introduce a +separate "Dave from Finance" as a minor character. + +--- + +## Key Relationships Map + +``` +Ellen Marsh (CEO) + └── David Park (CTO) + └── Yusuf Halabi (Eng Manager) + ├── Mei Lin + ├── Cora Reyes + └── Nikhil Sharma + └── Karen Volkov (COO) + └── Rachel Brandt (CFO) + └── Derek Ashford (Financial Controller) + └── Phil Ruiz (VP Sales) + ├── Mike Kawamoto + └── Tanya Okafor + └── Lisa Ferreira + +Dave Kowalski (Director of IT) + ├── Marcus Webb ←── Player's manager + │ └── [Player] + ├── Rachel Huang + └── Tom Malaney + +Priya Nair (Head of Security) + └── James Osei + +Sarah Chen (Product, AxiomFlow) ←── frequent ticket source +Ben Portillo (Product, AxiomDash) +Annika Gosse (UX) +``` + +--- + +## Tone Notes for New Story Work + +- **Marcus talks like someone who has answered this question before.** Precise, low + affect, no wasted words. Never condescending — just efficient. +- **Sarah talks like a PM: outcome-focused, slightly impatient, specific about + what she needs.** She is not a villain. She has real deadlines. +- **Priya talks like someone who has already thought about what goes wrong.** She + doesn't speculate — she states. She's not alarming, she's matter-of-fact. +- **Dave Okonkwo talks like someone who doesn't know what the problem is** but is + trying to be helpful by reporting exactly what he observed. He should never be + made to look stupid — he's doing the right thing. +- **The company takes itself seriously.** Humor comes from the gap between official + language and reality, not from anyone being a cartoon. +- **Problems have plausible causes.** Systems broke because someone made a + reasonable decision under time pressure, not because they were careless idiots. + The player should feel like a professional, not a janitor. diff --git a/docs/COMPANY_LORE.md b/docs/COMPANY_LORE.md new file mode 100644 index 0000000..f8b81f3 --- /dev/null +++ b/docs/COMPANY_LORE.md @@ -0,0 +1,165 @@ +# Axiom Works — Company Lore Reference + +> For quest authors, dialogue writers, and ticket copy. Keep the tone dry and +> believable. The company should feel real, slightly dysfunctional, and just +> plausible enough that players recognise the type. + +--- + +## Who They Are + +**Axiom Works** is a B2B enterprise software company founded in 2011. Headquarters +is in a three-floor office park that is technically "downtown adjacent" depending +on how charitable you are with the map. They have about 280 employees. The +Glassdoor rating is 3.8 stars and management checks it obsessively. + +Their flagship product is **AxiomFlow** — a workflow automation platform aimed at +mid-size manufacturers, logistics companies, and anyone who got a 90-minute demo +and thought it looked easy. Most customers are still on the workflow they set up +in 2019. The platform does what it says. Marketing says it does considerably more. + +--- + +## Products + +| Product | Description | Status | +|---------|-------------|--------| +| **AxiomFlow** | Workflow automation platform | Active, main revenue | +| **AxiomDash** | Reporting and analytics add-on | Active, profitable, under-resourced | +| **AxiomSync** | Legacy data integration layer | End-of-sale since 2021, still maintained for 12 customers who refuse to migrate | + +The current marketing tagline is *"Streamline. Scale. Succeed."* It replaced +*"Work smarter, not harder"* in Q3 of last year. The one before that mentioned +AI. Nobody is sure what the AI was. + +--- + +## Infrastructure + +The company runs a mix of on-prem servers (named after Greek gods — a choice made +by a contractor in 2017 who left before documenting anything) and a handful of +cloud instances that accounting keeps trying to consolidate. + +| Host | Role | Notes | +|------|------|-------| +| **ares** | Player workstation | XFCE desktop, where the player works | +| **hermes** | Web/app server | nginx, staging and demo environment for AxiomFlow | +| **vulcan** | Build machine | Arch Linux, compiles artifacts, runs scheduled jobs | + +### Planned future systems +As the game grows, additional machines will be added. Candidates: + +| Proposed host | Role | Greek connection | +|---|---|---| +| **poseidon** | Database server | Foundation, depths, reliability | +| **apollo** | Mail / notification server | Messenger, communication | +| **athena** | Internal tooling (ticketing, wiki) | Wisdom, knowledge management | +| **argus** | Monitoring / alerting | The hundred-eyed watcher | +| **mnemosyne** | Backup / storage | Memory, persistence | + +--- + +## Characters + +### Dave Kowalski — Director of IT Operations +The player's skip-level manager. Has been at Axiom Works since 2015. Hired Marcus. +Oversees three teams: systems (Marcus's domain), networking, and IT support. Background +is originally networking — has Cisco certifications he won't bring up unless someone else +brings up Cisco certifications first. Sends weekly status emails formatted in bullet +points that never quite answer the question you were asking. When things go wrong he +schedules a meeting to "talk through the situation," which everyone has learned is +worse than an email. Maintains a calendar block from 2–3pm on Tuesdays that nobody +has ever asked about. Has said "we should really document that" approximately 400 times. +Describes the infrastructure as "mature." + +### Marcus Webb — Senior Sysadmin +The player's manager and the person who assigned them the ticket. Has been at +Axiom Works for six years. Knows where all the bodies are buried. Communicates +primarily in terse Slack messages and occasionally very long emails sent at 11pm. +Trusts competence over process. Gets irritated by people who confuse symptoms +with root causes. + +### Priya Nair — Security / Compliance +Runs security reviews and has opinions about everything. Usually right. Tends to +frame concerns in terms of what will happen when things go wrong rather than +whether they will. Was brought in after an incident nobody talks about in public. + +### Sarah Chen — Product Manager +Represents the product team's perspective in the ticket queue. Cares about demo +environments more than production ones because demos are what she can see. Not +technically wrong about their importance. Emails at 8am on Mondays. + +### Derek Ashford — Financial Controller +Does not appear in person. Appears on CC lines of emails where infrastructure +costs are being discussed. Always replies-all. His full name is Derek Ashford. +His manager is Rachel Brandt (CFO). + +--- + +## Background Characters (non-interactive, for world texture) + +These characters exist on the company website and in lore but do not appear in +quests or dialogue. Use them for verisimilitude — email headers, CC lines, internal +wiki author credits, that sort of thing. + +### Ellen Marsh — CEO & Co-Founder +Built AxiomFlow after a decade in operations. Not technical. Attends all-hands +twice a year. Has final say on pricing and major customer commitments. Does not +use Slack. The player will never interact with her. + +### David Park — CTO & Co-Founder +Wrote the original rules engine. Now manages engineering managers. Still has +opinions about the data model. Has a standing Thursday meeting with security +that hasn't moved since 2017. + +### Karen Volkov — COO +Joined 2014. Responsible for the fact that Axiom Works has documented processes +for anything. Has opinions about infrastructure costs. Prefers decisions with +clear owners and deadlines. + +### Rachel Brandt — CFO +Joined 2016. Approves all capital expenditure over $5,000. Does not enjoy +surprises in the infrastructure budget. Derek reports to her. + +### Phil Ruiz — VP of Sales +Has been promising features to prospects since 2016. Has a warm relationship +with the infrastructure team because Marcus once saved a demo with 20 minutes to +spare. Expense reports submitted promptly. + +### Tanya Okafor — Head of Customer Success +Manages all post-sale customer relationships including the twelve AxiomSync +holdouts. Usually the first to know when something is wrong in production, +because a customer has already called her. + +### Yusuf Halabi — Engineering Manager +Reports to the CTO. Manages the core AxiomFlow platform team. Has opinions +about test coverage. Runs the Thursday architecture review. + +### Mei Lin — Senior Software Engineer +Has maintained AxiomSync's integration layer since 2018. Knows more about it +than anyone would prefer. + +### Nikhil Sharma — Platform Engineer +Owns the build and release pipeline and internal CI infrastructure. Occasionally +sends Slack messages at 6am. + +### Sandra Wu — HR Manager +Manages hiring, onboarding, and employee relations since 2016. Sends birthday +emails on time, every time. Runs the new-hire onboarding process that takes +three days. + +--- + +## Tone Guidelines + +- **Dry, not sarcastic.** The company takes itself seriously. The humour comes + from the gap between how they describe things and what's actually happening. +- **Specific, not generic.** "The AxiomSync customer in Cincinnati keeps calling" + is better than "a client is upset." +- **Plausible dysfunction.** Problems happen because of reasonable decisions made + under time pressure, not because people are incompetent. The player should feel + like a real professional, not a janitor. +- **No cartoon villains.** Derek from Finance is not evil. The product team is not + stupid. They have different priorities. +- **The infrastructure has history.** It was built over time. Some parts are good. + Some parts were good in 2017. The player's job is to keep it working. diff --git a/docs/INSTALLER_PLAN.md b/docs/INSTALLER_PLAN.md new file mode 100644 index 0000000..40cc997 --- /dev/null +++ b/docs/INSTALLER_PLAN.md @@ -0,0 +1,641 @@ +# Installer & Distribution Plan +> Status: Planning — not yet implemented. +> Covers: installer, uninstaller, VM rebuild, save management, modular script architecture. + +--- + +## Goals + +- Download zip from GitHub/Gitea, run `install.sh`, done. +- Friendly tone throughout — this is a game, not a server deployment. +- No jargon (libvirt, pool, domain, NAT) in any user-facing output. +- Power users can follow the Manual Install section in README instead. +- VM images live wherever the user puts the game (portable, large-drive friendly). +- Full uninstall with explicit choices about what gets removed. +- Users can rebuild individual VMs if something goes wrong. +- Save data is resettable; save slots available for experimenting. + +--- + +## `start-game.sh` Fixes + +The current launcher works but has two real bugs, several fragile assumptions, and +no user-friendly output. Fix this in the same pass as the rest of the scripts since +it will share `lib/ui.sh` and `lib/config.sh`. + +### Bugs to fix + +**Orphaned server process** +The script ends with `exec remote-viewer`, which replaces the shell. The `trap` +that was set to kill the server on EXIT disappears with the shell — so when the +player closes the SPICE window, the game server keeps running silently. + +Fix: don't `exec`. Run `remote-viewer` normally, capture its PID, wait for it to +exit, then kill the server cleanly. + +```bash +# instead of: +exec remote-viewer "$spice_uri" + +# do: +remote-viewer "$spice_uri" & +VIEWER_PID=$! +trap 'kill "$SERVER_PID" "$VIEWER_PID" 2>/dev/null || true' EXIT INT TERM +wait "$VIEWER_PID" +``` + +**`sleep 1` server readiness check** +One second is a race. On a slow machine or if npm install just ran, the server +may not be up. On a fast machine it's wasted time. + +Fix: poll in a tight loop with a timeout. + +```bash +wait_for_server() { + local port="$1" timeout=15 i=0 + while ! ss -tlnp | grep -q ":${port} " 2>/dev/null; do + sleep 0.3 + ((i++)) + [ $i -ge $((timeout * 3)) ] && return 1 + done +} +``` + +### Fragile assumptions to fix + +- **`lsof` for port check** — not universal. Replace with `ss -tlnp` (iproute2, + present on all modern Linux). +- **No network check** — if the `sc-internal` libvirt network is inactive, the VM + starts but has no network. The HUD loads but shows nothing. Check the network is + active (and start it if not) before starting the VM. +- **No images-dir check** — once portable installs land, `SC_IMAGES_DIR` might be + on an unmounted game drive. Check it exists before trying virsh ops. +- **Frontend build at launch** — `"Building frontend..."` at game launch is odd UX. + Move this guard to install time. The launcher should only verify `dist/index.html` + exists and fail clearly if it doesn't (don't silently trigger a build). + +### UX improvements + +- Source `lib/ui.sh` and `lib/config.sh` once they exist. +- Replace raw `echo "ERROR: ..."` with friendly messages. Examples: + +| Current | Replacement | +|---|---| +| `ERROR: virsh is required.` | `Your system is missing the virtual machine tools.\nRun install.sh to set up the game.` | +| `ERROR: missing workstation domain: sc-workstation` | `Your game world hasn't been built yet.\nRun install.sh to finish setup.` | +| `ERROR: node is required. Install Node.js 18+.` | `Node.js is required but wasn't found.\nRun install.sh to set up the game.` | + +- Show brief startup status so the player isn't staring at a blank terminal: + +``` + Starting Sysadmin Chronicles... + ✓ Game server running + ✓ Workstation online + Opening your desk... +``` + +- Add `--manage-saves` and `--reset-save` flags (forward to `tools/save/manage-saves.sh`). + +### New flag: `--stop` + +Since the server now outlives the viewer when fixed, add `start-game.sh --stop` +that kills any running game server process. Useful if something gets stuck. + +### Summary of changes to `start-game.sh` + +| Area | Change | +|---|---| +| Server shutdown | `exec` → normal run + `wait`, trap covers both server and viewer | +| Server readiness | `sleep 1` → poll loop with 15s timeout | +| Port check | `lsof` → `ss -tlnp` | +| Network check | Add: verify `sc-internal` active, start if not | +| Images dir check | Add: verify `SC_IMAGES_DIR` exists before virsh ops | +| Frontend build | Remove from launcher; fail clearly if dist missing | +| Error messages | Replace all with plain-English + fix instructions | +| Startup output | Add three-line status before opening SPICE | +| New flags | `--manage-saves`, `--reset-save`, `--stop` | + +--- + +## Script Architecture + +All user-facing scripts share a common library layer. No logic is duplicated. + +``` +tools/ + lib/ + ui.sh # colored output, prompts, spinners, progress bars + deps.sh # distro detection, package name map, dep check/install + libvirt.sh # virsh wrappers: network, pool, domain, snapshot ops + vm.sh # build, rebuild, snapshot, revert per VM + config.sh # read/write install config (~/.config/sysadmin-chronicles/config) + save.sh # save slot management, reset helpers + +install.sh # project root — the entry point for new users +uninstall.sh # project root — removal with options +start-game.sh # project root — launcher (checks env, starts server, opens SPICE) + +tools/ + setup/ + check-host.sh # kept, improved UX, used internally by install.sh + first-run-setup.sh # kept as internal lib target or merged into install.sh + seed-vms.sh # kept as internal lib target, called by install.sh and rebuild + vm/ + rebuild-vms.sh # new: rebuild all or specific VMs + save/ + manage-saves.sh # new: list/switch/reset save slots +``` + +### `lib/ui.sh` +- `sc_step "label"` — numbered step header +- `sc_ok "msg"`, `sc_warn "msg"`, `sc_fail "msg"` — status lines +- `sc_prompt "question" "default"` — interactive prompt, returns answer +- `sc_confirm "question"` — yes/no, returns 0/1 +- `sc_spinner "label"` / `sc_spinner_stop` — background spinner for long ops +- `sc_progress "label" current total` — simple fraction display + +### `lib/deps.sh` +- `detect_distro` — sets `$SC_DISTRO` (arch, debian, ubuntu, fedora, opensuse) +- `map_packages` — translates canonical dep names to distro package names +- `check_deps` — returns list of missing deps +- `install_deps "pkg1 pkg2 ..."` — runs the right package manager with sudo, logs what was installed + +### `lib/libvirt.sh` +- `ensure_network name xml_path` +- `ensure_pool name path` +- `pool_path name` — returns the pool's target directory +- `domain_exists name`, `domain_state name` +- `snapshot_exists domain name` +- `snapshot_create domain name description` +- `snapshot_revert domain name` +- `snapshot_delete domain name` + +### `lib/vm.sh` +- `vm_build profile [--dry-run] [--force]` — wraps `build-vm.sh` +- `vm_rebuild profile [--dry-run]` — destroy + rebuild from cloud image +- `vm_revert vm_id snapshot_name` — revert to named snapshot +- `vm_status vm_id` — running / stopped / missing +- `vm_start vm_id`, `vm_stop vm_id` + +### `lib/config.sh` +Config file lives at `~/.config/sysadmin-chronicles/config` (survives game dir moves). + +Variables stored: +```bash +SC_GAME_DIR=/home/user/Games/sysadmin-chronicles +SC_IMAGES_DIR=/home/user/Games/sysadmin-chronicles/images +SC_LIBVIRT_URI=qemu:///system +SC_INSTALL_DATE=2026-04-27 +SC_INSTALLED_DEPS="libvirt qemu-system-x86 ..." # what we added, for the log +``` + +- `config_read` — sources the config file +- `config_write key value` +- `config_show` — pretty-prints current config + +### `lib/save.sh` +- `save_list` — lists all save slots with name, date, trust score, quest progress +- `save_switch slot_name` — switch active save +- `save_new slot_name` — create a new empty save slot +- `save_reset [slot_name]` — wipe a slot back to new-game state +- `save_export slot_name path` — export save JSON for backup +- `save_import path slot_name` — import a save JSON + +--- + +## Installer Design (`install.sh`) + +### Phase 1 — Welcome + +``` +╔══════════════════════════════════════════╗ +║ SYSADMIN CHRONICLES — SETUP ║ +╚══════════════════════════════════════════╝ + +Welcome! This installer will: + • Install a few system tools (KVM, QEMU, libvirt) + • Set up a private virtual network for the game + • Build three virtual machines (~30 minutes, once only) + +Where would you like to install the game? + [default: ~/Games/sysadmin-chronicles] > +``` + +### Phase 2 — System check (silent) + +Internally calls `check_deps`. If all present, skip to Phase 4 silently. + +### Phase 3 — Dependency install (only if needed) + +``` +Your system is missing the following tools: + • KVM virtualization support (qemu-system-x86) + • Virtual machine manager (libvirt, virt-install) + • SPICE display viewer (virt-viewer) + • Cloud image tools (cloud-image-utils, genisoimage) + +Install them now? You'll be asked for your password. [Y/n] +``` + +After install: +- Log installed packages to `~/.local/share/sysadmin-chronicles/install.log` +- Format: timestamp, package name, version, distro. Human-readable. +- Note at end: "This log is kept so you know exactly what was added. See it at: ..." + +### Phase 4 — One-time network and storage setup + +``` +── Setting up game network ────────────────── + ✓ Private game network created + ✓ VM image storage configured at ~/Games/sysadmin-chronicles/images + ✓ Game access keys generated +``` + +User never sees "libvirt", "storage pool", "sc-internal", "sc-images". + +### Phase 5 — VM build + +``` +── Building your game world ───────────────── + This happens once and takes about 30 minutes. + You can leave this running in the background. + + Building workstation (1/3) ........... ✓ 8m 14s + Building web server (2/3) ........... ✓ 4m 02s + Building build server (3/3) ........... ✓ 5m 31s + Setting up quest scenarios ........... ✓ 1m 48s +``` + +### Phase 6 — Desktop entry + +``` +Create a desktop launcher so the game appears in your app menu? [Y/n] +``` + +Creates `~/.local/share/applications/sysadmin-chronicles.desktop` if yes. + +### Phase 7 — Done + +``` +╔══════════════════════════════════════════╗ +║ SETUP COMPLETE! ║ +╚══════════════════════════════════════════╝ + +Start the game: + bash ~/Games/sysadmin-chronicles/start-game.sh + (or from your app menu if you created a launcher) + +If you ever need to rebuild the virtual machines: + bash ~/Games/sysadmin-chronicles/tools/vm/rebuild-vms.sh + +Install log saved at: + ~/.local/share/sysadmin-chronicles/install.log +``` + +--- + +## Uninstaller Design (`uninstall.sh`) + +Improved from current: shows sizes, explains consequences, three-tier removal. + +### Menu approach + +``` +╔══════════════════════════════════════════╗ +║ SYSADMIN CHRONICLES — UNINSTALL ║ +╚══════════════════════════════════════════╝ + +What would you like to remove? + + 1) Everything — full uninstall (recommended) + 2) Game world only — remove VMs, keep game files + 3) Save data only — reset to new game + 4) Custom — choose what to remove + + q) Cancel + +> +``` + +### "Everything" breakdown (shows before confirming) + +``` +This will remove: + + Game virtual machines (3 VMs + all snapshots) ~38 GB + VM image files on disk ~38 GB ← ask separately + Game network and storage configuration <1 MB + Game access keys (~/.ssh/sc_host_key) <1 KB + Desktop launcher (if created) <1 KB + + System packages (libvirt, QEMU, etc.) NOT removed + ↑ These were installed by your package manager. + See ~/.local/share/sysadmin-chronicles/install.log + if you want to remove them manually. + +Keep VM image files? If you ever reinstall, keeping them +saves the 30-minute rebuild. [Y/n — default: keep] + +Type REMOVE to confirm: > +``` + +### What is never auto-removed + +- System packages (libvirt, qemu, virt-viewer, etc.) +- Anything not prefixed with `sc-` in libvirt +- Any other libvirt VMs or networks not owned by this game + +--- + +## VM Rebuild Tool (`tools/vm/rebuild-vms.sh`) + +For when something goes wrong with a VM or the user wants a clean reset. + +``` +Usage: + rebuild-vms.sh Rebuild all VMs from scratch + rebuild-vms.sh --vm workstation Rebuild a single VM + rebuild-vms.sh --revert Revert all VMs to baseline snapshot (fast, ~30s) + rebuild-vms.sh --revert --vm workstation + +Menu (interactive): + 1) Revert all to last known good (fast — restores baseline snapshot) + 2) Rebuild workstation (~8 min — rebuilds from cloud image) + 3) Rebuild web server (~4 min) + 4) Rebuild build server (~5 min) + 5) Rebuild everything (~20 min) + q) Cancel +``` + +Key behavior: +- Always confirm before destroying a VM +- Show what quest progress will be affected +- Offer to back up save data before proceeding +- After rebuild, re-runs the appropriate quest-prep scripts and re-takes baseline snapshot + +--- + +## User Snapshots + +Players can take their own named snapshots of any VM — useful before attempting +something risky, or to bookmark a state they want to return to. + +These are distinct from the game's automatic shift checkpoints and baseline +snapshots. User snapshots are never pruned automatically. + +### Via `manage-saves.sh` (recommended) + +The save management menu will include a **VM Snapshots** section: + +``` +VM Snapshots + + workstation (ares) + 1) before-ssh-experiment 2026-05-01 19:14 + 2) checkpoint.shift-3 2026-05-01 22:00 [auto] + 3) baseline.day-one [protected] + + web server (hermes) + 1) my-nginx-fix 2026-05-02 11:30 + 2) checkpoint.shift-3 2026-05-01 22:00 [auto] + 3) baseline.clean [protected] + + Actions: [t]ake snapshot [r]evert [d]elete [q]uit +``` + +Taking a snapshot prompts for a name (letters, numbers, hyphens only). +Reverting shows a confirmation with the snapshot date. +Protected snapshots (baseline.*, checkpoint.*) cannot be deleted from this menu. + +### Via `tools/vm/rebuild-vms.sh --snapshot` + +For scripting or quick one-liners: + +```bash +rebuild-vms.sh --snapshot --vm workstation --name before-risky-thing +rebuild-vms.sh --snapshot --all --name pre-shift-4 +rebuild-vms.sh --revert --vm workstation --name before-risky-thing +``` + +### Storage note + +Each VM snapshot is an internal qcow2 differential — typically 100 MB–2 GB +depending on how much disk has changed since the baseline. The uninstaller shows +the total size of user snapshots separately so the user can decide whether to +keep them. + +### `lib/vm.sh` additions needed + +- `vm_snapshot_create vm_id name` — with name validation +- `vm_snapshot_list vm_id` — returns name, date, size, protection flag +- `vm_snapshot_revert vm_id name` +- `vm_snapshot_delete vm_id name` — refuses if name matches `baseline.*` or `checkpoint.*` + +--- + +## Save Management + +### Save file layout + +``` +~/.local/share/sysadmin-chronicles/ + saves/ + autosave.json ← always-present auto save (current session) + slot-1.json + slot-2.json + slot-3.json + install.log +``` + +### Save slot semantics + +Save slots store JSON state only: +- Trust score and history +- Quest and ticket state +- World flags +- Inbox +- In-world clock + +**VM state is not per-slot.** The shift checkpoint snapshots (checkpoint.shift-N) are the VM save mechanism and are independent of JSON slots. This is a known limitation but keeps disk usage manageable. + +When switching slots: if the VM state doesn't match the JSON slot's expected state, warn the user. They may need to revert VMs manually. + +### `tools/save/manage-saves.sh` + +``` +Usage: + manage-saves.sh Show save slot menu + manage-saves.sh --reset Reset current save to new game + manage-saves.sh --reset slot-1 Reset a specific slot + manage-saves.sh --list List all slots + +Interactive menu: + Current save: autosave (Day 3, Trust: 67, 4/8 quests) + + 1) autosave Day 3 Trust 67 Q4/8 [active] + 2) slot-1 Day 1 Trust 50 Q1/8 + 3) slot-2 —empty— + 4) slot-3 —empty— + + Actions: [s]witch [n]ew [r]eset [e]xport [i]mport [q]uit +``` + +### Reset save (standalone, accessible from start-game.sh) + +The launcher `start-game.sh` should have an escape hatch: + +``` +start-game.sh --manage-saves → opens save management menu +start-game.sh --reset-save → confirms and resets to new game +``` + +--- + +## Launcher Improvements (`start-game.sh`) + +Current issues to fix: +- Silently fails if images drive not mounted +- No check that the libvirt network is up before starting +- `sleep 1` to wait for server is fragile + +Improvements: +- `config_read` to get `SC_IMAGES_DIR`, check it exists and is writable +- Check libvirt network is active, start it if not (with clear message) +- Poll server readiness on `/healthz` instead of sleeping +- Show a brief status before launching SPICE: "Starting your workstation..." +- On failure, show a plain-English error and the fix + +--- + +## Portable Installation Notes + +The `sc-images` libvirt pool target can be any path the host OS can write to. The installer configures it to `$SC_IMAGES_DIR` (inside the game dir by default). + +If the user puts the game on a game drive (`/mnt/gamesdrive/sysadmin-chronicles/`): +- `SC_IMAGES_DIR=/mnt/gamesdrive/sysadmin-chronicles/images` +- The libvirt pool points there +- All qcow2 files live on the game drive +- The launcher checks the drive is mounted before starting + +If the drive is unmounted: +``` + ✗ Can't find your game world. + The VM images are stored at /mnt/gamesdrive/sysadmin-chronicles/images + but that location isn't available right now. + + Is your game drive plugged in and mounted? + Once it's mounted, run start-game.sh again. +``` + +--- + +## Dependency Log Format + +`~/.local/share/sysadmin-chronicles/install.log` + +``` +# Sysadmin Chronicles — Install Log +# Created: 2026-04-27 14:32:01 +# Distro: arch (6.19.12-arch1-1) +# Game dir: /home/aaron/Games/sysadmin-chronicles +# Images: /home/aaron/Games/sysadmin-chronicles/images + +[INSTALLED] libvirt 12.2.0 via pacman +[INSTALLED] qemu-system-x86 11.0.0 via pacman +[INSTALLED] qemu-hw-display-qxl 11.0.0 via pacman +[INSTALLED] qemu-hw-display-virtio-gpu 11.0.0 via pacman +[INSTALLED] qemu-ui-spice-core 11.0.0 via pacman +[INSTALLED] qemu-chardev-spice 11.0.0 via pacman +[INSTALLED] qemu-audio-spice 11.0.0 via pacman +[INSTALLED] virt-install 5.1.0 via pacman +[INSTALLED] virt-viewer 11.0 via pacman +[INSTALLED] cloud-image-utils 0.33 via pacman +[INSTALLED] cdrtools 3.02a09 via pacman +[INSTALLED] libisoburn 1.5.8 via pacman +[SKIPPED] nodejs already installed + +# To remove manually: +# sudo pacman -Rns libvirt qemu-system-x86 qemu-hw-display-qxl ... +``` + +--- + +## File Layout After Install + +``` +~/Games/sysadmin-chronicles/ ← SC_GAME_DIR + install.sh + uninstall.sh + start-game.sh + content/ + server/ + frontend/ + docs/ + tools/ + lib/ + ui.sh + deps.sh + libvirt.sh + vm.sh + config.sh + save.sh + setup/ + check-host.sh + first-run-setup.sh + seed-vms.sh + vm/ + rebuild-vms.sh + build-vm.sh + ... + save/ + manage-saves.sh + + images/ ← SC_IMAGES_DIR (libvirt pool points here) + sc-workstation.qcow2 (~20 GB) + sc-web-server.qcow2 (~8 GB) + sc-build-machine.qcow2 (~10 GB) + +~/.config/sysadmin-chronicles/config ← install config (survives game dir moves) +~/.local/share/sysadmin-chronicles/ + saves/ + autosave.json + slot-1.json ... + install.log +``` + +--- + +## Implementation Order + +1. `tools/lib/ui.sh` — all other scripts depend on this +2. `tools/lib/config.sh` — needed by installer and launcher +3. `tools/lib/deps.sh` — needed by installer +4. `tools/lib/libvirt.sh` — needed by installer and rebuild tool +5. `tools/lib/vm.sh` — needed by installer and rebuild tool +6. `tools/lib/save.sh` — needed by save manager +7. `install.sh` — assembles libs 1–5 +8. `tools/vm/rebuild-vms.sh` — assembles libs 1, 3, 4 +9. `tools/save/manage-saves.sh` — assembles libs 1, 2, 6 +10. `uninstall.sh` — assembles libs 1, 2, 4 +11. `start-game.sh` (improved) — assembles libs 1, 2 +12. Update `check-host.sh` UX +13. README — manual install section, quick start + +--- + +## README Structure + +```markdown +## Quick Install + +curl -fsSL .../install.sh | bash +# or +bash install.sh # from downloaded zip + +## Manual Install + +
+For users who want full control or are troubleshooting +...per-distro dep tables, step-by-step... +
+``` diff --git a/docs/PRESSURE_PROFILES.md b/docs/PRESSURE_PROFILES.md new file mode 100644 index 0000000..5d97b3d --- /dev/null +++ b/docs/PRESSURE_PROFILES.md @@ -0,0 +1,76 @@ +# SYSADMIN CHRONICLES — PRESSURE PROFILES +> Version 1.1 +> +> Pressure profiles define how an unresolved situation degrades over time. +> They are referenced by name from quest files and live in +> `content/pressure_profiles/`. +> +> A pressure profile is NOT an incident. An incident is a discrete event with +> a trigger, escalation chain, and resolution. A pressure profile describes the +> passive degradation behavior of the environment while a quest is active and +> unresolved. Incidents may be spawned by pressure profiles, but are separate. + +--- + +## SCHEMA + +```json +{ + "id": "web_outage_escalation", + "label": "Web Service Outage", + "description": "Gentle escalation for Tier 1 web outage quests. Creates narrative urgency without punishing new players.", + "intensity": 2, + "escalation_steps": [ + { + "trigger_after_seconds": 900, + "notification": "Hermes is still showing errors. Is someone on this?", + "notification_severity": "warning" + }, + { + "trigger_after_seconds": 1800, + "notification": "Site has been down thirty minutes. Ticket priority is going up.", + "notification_severity": "warning", + "escalate_linked_ticket": "high" + }, + { + "trigger_after_seconds": 3600, + "notification": "Hour down. Priya has been copied in.", + "notification_severity": "error", + "escalate_linked_ticket": "critical" + } + ] +} +``` + +--- + +## FIELD REFERENCE + +| Field | Type | Description | +|-------|------|-------------| +| `id` | string | Unique identifier. Must match the string used in the quest's `pressure_profile` field. | +| `label` | string | Short human-readable name for tooling and authoring. | +| `description` | string | Internal description for authors. | +| `intensity` | int | Relative urgency / pressure level. | +| `escalation_steps` | array | Ordered list of timed escalation notices or ticket priority changes. | + +### Stage Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `trigger_after_seconds` | Yes | Seconds after activation before the stage fires. | +| `notification` | Yes | Player-facing escalation message. | +| `notification_severity` | Yes | Severity label used by the UI and notifier. | +| `escalate_linked_ticket` | No | Optional linked-ticket priority escalation. | + +--- + +## AUTHORING NOTES + +- `trigger_after_seconds` is relative to quest activation time, not real wall time. + In-game time compression applies. +- Stages must be ordered by `trigger_after_seconds` ascending. Authoring tools will + warn on out-of-order stages. +- Pressure profiles should create urgency, not guaranteed punishment. +- If a pressure profile escalates a linked ticket, it should do so in a way that + matches the authored ticket priority curve. diff --git a/docs/PROJECT_MAP.md b/docs/PROJECT_MAP.md new file mode 100644 index 0000000..878150e --- /dev/null +++ b/docs/PROJECT_MAP.md @@ -0,0 +1,377 @@ +# SYSADMIN CHRONICLES — PROJECT MAP +> Living document. Update when files are added, moved, removed, or when architecture changes. +> Version 5.1 | Living document — update when files are added, moved, or removed. + +--- + +## ROOT STRUCTURE + +``` +sysadmin-chronicles/ +│ +├── server/ ← NEW: Node.js game server +│ ├── src/ +│ │ ├── index.js Entry point — Express + WebSocket +│ │ ├── routes/ auth, state, tickets, mail, docs, sage, vms +│ │ ├── services/ ContentLoader, QuestEngine, TicketService, +│ │ │ ValidationEngine, VMManager, TrustSystem, +│ │ │ ProgressionSystem, EmailService, SageService, +│ │ │ ShiftTimer, IncidentScheduler, ShiftReviewService, +│ │ │ CertificationService, SaveState +│ │ └── lib/ ssh.js, virsh.js, command.js, eventBus.js, session.js +│ └── package.json +│ +├── frontend/ ← NEW: Svelte web HUD +│ ├── src/ +│ │ ├── App.svelte Root component, WebSocket, panel routing +│ │ ├── components/ TicketsPanel, MailPanel, DocsPanel, SagePanel, +│ │ │ VmsPanel, ProfilePanel, HeaderBar, SidebarTabs +│ │ ├── lib/api.js REST API fetch wrapper +│ │ └── main.js +│ ├── dist/ Built output (served by game server) +│ └── package.json +│ +├── scripts/ +│ └── start-game.sh One-shot: start server + open SPICE workstation viewer +│ +├── docs/ +│ ├── ARCHITECTURE.md System architecture +│ ├── CHARACTERS.md All characters — bios, relationships, story hooks +│ ├── COMPANY_LORE.md World, company, products, tone guidelines +│ ├── INSTALLER_PLAN.md Installer design and packaging +│ ├── PRESSURE_PROFILES.md Time-pressure escalation schema and authoring guide +│ ├── PROJECT_MAP.md ← this file +│ ├── ROADMAP.md Development phases and content status +│ ├── RUNTIME_DEPENDENCIES.md Host dependencies and version requirements +│ ├── SAVE_SYSTEM.md Save model, VM persistence policy, recovery flows +│ ├── SNAPSHOT_CHAIN.md VM snapshot chain and baseline management +│ ├── STORY_DESIGN_CONTEXT.md How story works — narrative arc, quest model, design constraints +│ ├── VM_BUILD_SYSTEM.md VM build and provisioning system +│ ├── WORKSTATION_POLISH_BACKLOG.md Outstanding UX polish items +│ └── codex-specs/ +│ +├── content/ ← data-driven content loaded by Node.js server +│ ├── quests/ quest JSON files (being reworked — see STORY_DESIGN_CONTEXT.md) +│ ├── tickets/ ticket JSON files (being reworked) +│ ├── incidents/ incident JSON files (being reworked) +│ ├── pressure_profiles/ escalation profiles (schema in PRESSURE_PROFILES.md) +│ ├── dialogue/ character dialogue JSON files (being reworked) +│ ├── world_flags/ world_flags.json (central registry) +│ ├── docs/ onboarding, sage_content, internal_docs, etc. +│ ├── progression/ trust_unlocks.json, access_tiers.json +│ └── vm_profiles/ workstation.json, web_server.json, build_machine.json +│ +├── tools/ +│ ├── setup/ check-host.sh, seed-vms.sh, first-run-setup.sh, uninstall.sh +│ ├── vm/ build-vm.sh, build-*.sh, snapshot-all.sh, suppress-maintenance-noise.sh +│ │ ├── profiles/ workstation.sh, web-server.sh, build-machine.sh +│ │ └── quest-prep/ Q001–Q008 prep/post scripts +│ └── content/ validate-content.js (zero-error gate), verify-clue-fingerprints.js +│ +├── company-website/ Axiom Works public website (static HTML/CSS) +│ ├── index.html Home — hero, product highlights, stats +│ ├── about.html Company story, values, contact +│ ├── people.html Team page — Dave, Marcus, Priya, Sarah + filler staff +│ ├── products.html AxiomFlow, AxiomDash, AxiomSync product pages +│ ├── style.css Shared corporate CSS (navy/blue scheme) +│ └── assets/ logo.png, portrait photos for each NPC +│ +├── vm/ images/, snapshots/, cloud-init/, probes/ +├── package.json +└── README.md +``` + + + +--- + +## COMPANY WEBSITE + +Static HTML/CSS site serving as the public-facing Axiom Works company website, accessible from the workstation VM. + +**URL inside the VM:** `http://www.axiomworks.corp/` (no port) + +**How it works:** +- The game server serves `company-website/` at `/company/` (port 3000) +- nginx is installed in the workstation VM and proxies `axiomworks.io` and `www.axiomworks.io` (port 80) → game server port 3000 at `/company/` +- `/etc/hosts` in the workstation maps both hostnames to `127.0.0.1` (localhost → nginx) +- Result: the player sees a clean `http://www.axiomworks.io/` URL in Chromium with no port number + +**Pages:** Home (`index.html`), About (`about.html`), Our Team (`people.html`), Products (`products.html`) + +**Team page portraits:** NPC photos live in `company-website/assets/`. The player is not featured on the website. + +**Domain note:** `axiomworks.corp` uses the IANA-reserved `.corp` TLD (reserved 2024, can never be publicly delegated). No registration needed — it will never resolve on the real internet. The in-VM `/etc/hosts` + nginx approach is sufficient for any build. + +**Player portraits** (for the HUD profile panel) are separate from the website portraits. They live in `server/public/portraits/` and are served at `/public/portraits/`. The player selects one via the Profile panel; the choice persists in `save.json` as `player_portrait`. + +--- + +## BOOT FLOW (Node.js Server) + +``` +bash scripts/start-game.sh + ↓ +node server/src/index.js + 1. ContentLoader.load() — reads all content/**/*.json into memory + 2. SaveState.load() — reads ~/.local/share/sysadmin-chronicles/save.json + or creates fresh save + 3. TrustSystem.initialize() — hydrates trust score + unlock state + 4. ProgressionSystem.initialize() + 5. QuestEngine.initialize() — restores quest states from save + 6. TicketService.initialize() + 7. EmailService.initialize() — restores inbox, seeds T001 email on fresh save + 8. ShiftTimer.start() — starts shift clock + 9. IncidentScheduler.start() — begins pressure tick loop (every 30s) + 10. VMManager.ensureWorkstationLive() — virsh start sc-workstation if needed + ↓ +Express + WebSocket listening on PORT (default 3000) + ↓ +remote-viewer opens SPICE connection to sc-workstation +Player sees XFCE desktop → Chromium opens HUD → game is live +``` + +--- + +## TICKET COMPLETION FLOW + +``` +Player clicks "Mark Complete" on ticket in HUD + ↓ +POST /api/tickets/:id/complete + ↓ +TicketService.markComplete(ticketId) + → load ticket + linked quest JSON + → for each solution_branch (sorted by priority DESC): + ValidationEngine.check(vmId, branch.validation.rules) + → VMManager.getIP(vmId) + → SSH as opsbridge using sc_host_key + → run each rule check (file_exists, service_state, etc.) + if all rules pass → winning branch found + → TrustSystem.adjust(branch.trust_delta) + → WorldFlags.set(branch.world_flags) + → QuestEngine.completeQuest(questId) + → EmailService.send(follow-up NPC email if negative branch) + → SaveState.write() + → broadcast trust:changed, mail:new via WebSocket + ↓ +Response: { passed, branch, trust_delta, failures } +HUD shows success toast or failure details +``` + +--- + +## VM IDENTITY TABLE + +| vm_id | SC constant | libvirt domain | hostname | distro | ssh_user | mgmt_user | always_live | Quests | +|-------|-------------|----------------|----------|--------|----------|-----------|-------------|--------| +| `workstation` | `SC.VM_WORKSTATION` | `sc-workstation` | `ares` | Debian 12 | `player` | `opsbridge` | yes | Q001 | +| `web_server` | `SC.VM_WEB_SERVER` | `sc-web-server` | `hermes` | Debian 12 | `player` | — | no | Q002–Q005, Q007 | +| `build_machine` | `SC.VM_BUILD_MACHINE` | `sc-build-machine` | `vulcan` | Arch Linux | `player` | — | no | Q006, Q008 | + +See `docs/VM_BUILD_SYSTEM.md` for full build system documentation and profile authoring guide. + +**SSH key**: all host→guest connections use `~/.ssh/sc_host_key` (BatchMode, no password). + +**Baseline snapshots**: +- workstation: `baseline.day-one` +- web_server, build_machine: `baseline.clean` + +--- + +## TERMINAL ARCHITECTURE + +The player uses a real **Tilix** terminal inside the workstation VM (sc-workstation / ares). +No terminal simulation. SSH to target VMs is real SSH. There is no in-game terminal widget. + +``` +Player opens Tilix on the workstation XFCE desktop + → types: ssh hermes + → real SSH to sc-web-server using player's authorized_keys + → works directly on the target VM + +Host-side validation (triggered by "Mark Complete" in HUD): + ValidationEngine.js SSHes as 'opsbridge' → sudo -H -i -u player + Runs rule checks (file_exists, service_state, etc.) + Returns pass/fail to game server +``` + +Host SSH options (used by ValidationEngine.js and VMManager.js): +``` +-o StrictHostKeyChecking=no +-o BatchMode=yes +-o ConnectTimeout=5 +-o LogLevel=ERROR +-i ~/.ssh/sc_host_key +``` + +--- + +## SERVICE DEPENDENCY GRAPH (Node.js server) + +``` +eventBus.js (Node.js EventEmitter — no deps) + └─ consumed by: all services + +ContentLoader + └─ consumed by: QuestEngine, TicketService, ValidationEngine, TrustSystem, + ProgressionSystem, IncidentScheduler, EmailService, SageService + +VMManager + ← wraps virsh.js + ssh.js + ← called by QuestEngine (start required VMs on quest activation) + ← called by ValidationEngine (get VM IP for SSH) + +ValidationEngine + ← calls VMManager.getIP(vmId) + ← SSHes as opsbridge → runs rule checks (file_exists, service_state, etc.) + ← called by TicketService on mark-complete + +QuestEngine + ← calls VMManager to start required VMs + ← calls ValidationEngine via TicketService + ← calls TrustSystem, WorldFlags, EmailService on resolution + → emits via eventBus: quest:activated, quest:resolved, ticket:received + +IncidentScheduler + ← reads WorldFlags for trigger conditions + ← tick drives escalation step advancement + → emits via eventBus: incident:activated, incident:escalated, incident:resolved + +TrustSystem + ← called by QuestEngine on branch resolution + ← called by IncidentScheduler for ignored incident penalties + → emits via eventBus: trust:changed + +SaveState + ← called by QuestEngine, TrustSystem, ProgressionSystem + ← reads/writes ~/.local/share/sysadmin-chronicles/save.json +``` + +--- + +## KEY MODULES + +### Server (`server/src/`) + +| Module | File | Responsibility | +|--------|------|----------------| +| Entry point | index.js | Express + WS, service wiring, static serving | +| ContentLoader | services/ContentLoader.js | Load all content/ JSON at startup | +| QuestEngine | services/QuestEngine.js | Quest state machine | +| TicketService | services/TicketService.js | Ticket state, mark-complete, branch resolution | +| ValidationEngine | services/ValidationEngine.js | SSH rule evaluation (all rule types) | +| VMManager | services/VMManager.js | virsh wrappers, IP resolution | +| TrustSystem | services/TrustSystem.js | Score, unlocks, revocation | +| ProgressionSystem | services/ProgressionSystem.js | Unlocked VMs, docs, access | +| EmailService | services/EmailService.js | Inbox, follow-ups, reply options | +| SageService | services/SageService.js | Rule-based dialogue / KB | +| ShiftTimer | services/ShiftTimer.js | Shift clock, 30s tick broadcasts | +| IncidentScheduler | services/IncidentScheduler.js | Pressure tick, incident injection | +| ShiftReviewService | services/ShiftReviewService.js | End-of-shift review email | +| CertificationService | services/CertificationService.js | Cert awards after quest chains | +| SaveState | services/SaveState.js | Read/write save.json | +| ssh.js | lib/ssh.js | Promisified SSH execution | +| virsh.js | lib/virsh.js | virsh command wrappers | +| eventBus.js | lib/eventBus.js | Node.js EventEmitter for service coordination | + +### Frontend (`frontend/src/`) + +| Component | File | Responsibility | +|-----------|------|----------------| +| Root | App.svelte | Panel routing, WebSocket connection | +| Tickets | TicketsPanel.svelte | List, detail, mark-complete | +| Mail | MailPanel.svelte | Inbox, message, reply buttons | +| Docs | DocsPanel.svelte | Trust-gated doc viewer | +| Sage | SagePanel.svelte | Chat / KB search | +| VMs | VmsPanel.svelte | Live VM status indicators | +| Header | HeaderBar.svelte | Trust, shift timer, mail badge | +| API | lib/api.js | REST fetch wrapper | + +--- + +## CONTENT DOMAINS + +| Domain | Purpose | +|--------|---------| +| `quests/` | Objective chains, clue fingerprints, validation rules, branch priorities | +| `tickets/` | Player-facing problem statements with initial/current priority | +| `incidents/` | Dynamic pressure events with blast_radius and escalation steps | +| `dialogue/` | Workplace messages, hints, follow-ups, series threads | +| `pressure_profiles/` | Reusable escalation templates referenced by quest branches | +| `world_flags/` | Central registry — all world state flags declared here | +| `docs/` | Internal documentation + Sage/help content (trust-gated) | +| `progression/` | Trust thresholds, unlocks, revocation rules, access tiers | +| `vm_profiles/` | Domain names, hostnames, snapshots, networks, resource budgets | + +--- + +## FILE NAMING CONVENTIONS + +- Quest files: `Q{NNN}-{kebab-case-title}.json` +- Ticket files: `T{NNN}.json` +- Incident files: `I{NNN}-{kebab-case-title}.json` +- Dialogue files: `{character}-Q{NNN}.json` or `{character}-Q{NNN}-{variant}.json` +- Quest prep scripts: `Q{NNN}-prep.sh` +- VM profiles: `{snake_case}.json` + +--- + +## CONTENT VALIDATION CHECKS + +Run: `node tools/content/validate-content.js` — must exit 0 (zero errors). + +| Check | Rule | +|-------|------| +| JSON well-formed | All content files parse without error | +| No duplicate IDs | Unique across quests, tickets, incidents, pressure profiles, dialogue | +| World flags | Every referenced flag exists in `world_flags/world_flags.json` | +| required_vms | Every entry maps to a valid VM profile | +| blast_radius | Every entry maps to an existing incident file | +| linked_quest | Every ticket's linked_quest maps to an existing quest | +| ticket_id | Every quest's ticket_id maps to an existing ticket | +| Branch priority | Priorities unique per quest (no ties) | +| follow_up_incident | Maps to an existing incident file | +| pressure_profile | Maps to an existing pressure profile file | +| series_id | Every series_id has at least two dialogue members | +| revokes | Trust unlock revoke entries reference valid unlock strings | +| clue_fingerprint | Evidence rule types are valid | + +--- + +## KNOWN GAPS (Post-Redesign) + +These are gaps in the v4.0 Node.js + Svelte implementation. +All content is authored, validator-clean, and reused unchanged. + +### P0 — Blocking for first playable shift + +| Gap | Notes | +|-----|-------| +| Phase 7 workstation VM verification | Confirm SPICE display, Chromium autostart, Tilix as default work end-to-end on a freshly seeded VM | +| Phase 10 full playtest | Boot all VMs, play Q001→Q002, validate full server→SSH→HUD loop | + +### P1 — Required before broader testing + +| Gap | Notes | +|-----|-------| +| Clue quality as system degrades | Evidence should remain legible as incidents escalate (I001/I002/I003 escalation pass) | +| Viewer smoothness | `remote-viewer` SPICE path is functional but not final-UX smooth; lower priority with real XFCE desktop | + +### P2 — Polish / completeness + +| Gap | Notes | +|-----|-------| +| WORKSTATION_POLISH_BACKLOG.md items | See that file for outstanding desktop UX polish | + +--- + +## GENERATED / LARGE ASSETS + +Created by CLI tooling, not hand-managed: + +- `vm/images/*.qcow2` +- Imported libvirt domain XML +- Baseline snapshot exports or manifests +- Shift checkpoint snapshots +- Packaged Linux build artifacts diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 0000000..daeedfb --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,58 @@ +# SYSADMIN CHRONICLES — DEVELOPMENT ROADMAP +> Version 5.0 | Status: Active development +> +> Changelog: +> v5.0 — GDScript/Godot removed. Node.js + Svelte is the only codebase. +> v4.0 — Full architecture pivot to Node.js + Svelte. +> v3.x — GDScript/Godot era (superseded). + +--- + +## IMPLEMENTATION PHASES (Node.js + Svelte) + +| Phase | Description | Status | +|-------|-------------|--------| +| 1 | Game server skeleton — Express, ContentLoader, SaveState, GET /api/state | [x] done | +| 2 | TrustSystem, ProgressionSystem, QuestEngine, TicketService, ticket routes | [x] done | +| 3 | ValidationEngine — SSH into VMs, all rule types | [x] done | +| 4 | EmailService — inbox, follow-up emails, reply options, mail routes | [x] done | +| 5 | WebSocket broadcasts — trust:changed, mail:new, shift:tick, incident:alert | [x] done | +| 6 | Svelte frontend — all panels built, dist/ served by game server | [x] done | +| 7 | XFCE workstation VM — cloud-init, SPICE/QXL, Chromium, Tilix, autostart | [x] done | +| 8 | SageService + docs routes + SagePanel + DocViewer | [x] done | +| 9 | IncidentScheduler + ShiftTimer + pressure tick loop | [x] done | +| 10 | Full playtest — boot all VMs, play Q001→Q002 end to end | [ ] pending | + +**Phase 7 details:** `workstation.sh` profile provisions the full XFCE desktop via +cloud-init: SPICE+virtio display with spicevmc channel for vdagent resize, Chromium +autostart via `open-portal` wrapper (waits for game server before launching), Tilix +as default terminal (`update-alternatives` + `helpers.rc`), dark theme, screensaver +off, desktop icons executable. Snapshot chain: `baseline.day-one`, `baseline.recovery` +taken by `seed-vms.sh`. + +--- + +## CONTENT STATUS + +The quest system and story are being completely reworked. All existing quest, +ticket, dialogue, and incident content (Q001–Q008, T001–T008, I001–I003) is +considered legacy and will be replaced. + +### Story Design Assets + +| File | Purpose | +|------|---------| +| `docs/CHARACTERS.md` | All characters — bios, relationships, story hooks, unresolved threads | +| `docs/STORY_DESIGN_CONTEXT.md` | How story works in this game — narrative arc, quest structure, character model, design constraints | +| `docs/COMPANY_LORE.md` | World, company, products, tone guidelines | + + +--- + +## QUEST TIER DEFINITIONS + +| Tier | Label | Characteristics | +|------|-------|-----------------| +| 1 | Tutorial Arc | Single VM, clear symptoms, one obvious fix, one better fix, no time pressure | +| 2 | Workday Arc | Multi-symptom, one quest affects another, trust pressure, incidents active | +| 3 | Stretch | Multi-VM, ambiguous root cause, political pressure, real prioritization stakes | diff --git a/docs/RUNTIME_DEPENDENCIES.md b/docs/RUNTIME_DEPENDENCIES.md new file mode 100644 index 0000000..3713654 --- /dev/null +++ b/docs/RUNTIME_DEPENDENCIES.md @@ -0,0 +1,54 @@ +# Runtime Dependencies + +This file tracks host and guest dependency expectations for Sysadmin Chronicles. +Keep it updated when provisioning scripts, VM display backends, or installer +requirements change. + +## Host Packages + +| Capability | Arch package / command | Minimum tested version | Notes | +| --- | --- | --- | --- | +| Godot runtime | `godot` | 4.6.2 | Used for the current Godot client path. | +| Libvirt CLI | `libvirt` / `virsh` | 12.2.0 | Use `qemu:///system` for game VMs. Socket activation is supported. | +| QEMU system emulator | `qemu-system-x86` / `qemu-system-x86_64` | 11.0.0 | Must match the split QEMU module package versions. | +| QEMU disk tools | `qemu-img` | 11.0.0 | Used by VM builders for qcow2 images. | +| QXL display module | `qemu-hw-display-qxl` | 11.0.0 | Required for `virt-install --video qxl`. | +| Virtio GPU modules | `qemu-hw-display-virtio-gpu`, `qemu-hw-display-virtio-gpu-pci`, `qemu-hw-display-virtio-vga` | 11.0.0 | Required for the default SPICE + virtio workstation display path. | +| SPICE UI module | `qemu-ui-spice-core` | 11.0.0 | Required for SPICE graphics in libvirt domain capabilities. | +| SPICE channel module | `qemu-chardev-spice` | 11.0.0 | Required for SPICE agent channels. | +| SPICE audio module | `qemu-audio-spice` | 11.0.0 | Required for SPICE-backed guest audio. | +| VM installer | `virt-install` | 5.1.0 | Creates imported cloud-image domains. | +| SPICE viewer | `remote-viewer` / `virt-viewer` | 11.0 | Used for desktop workstation display. | +| Cloud image tools | `cloud-image-utils`, `cdrtools`, `libisoburn` | cloud-image-utils 0.33, cdrtools 3.02a09, libisoburn 1.5.8 | Used to generate seed ISOs. | +| SSH client | `ssh` | OpenSSH 10.3p1 | Used by the game and setup scripts to reach guests. | +| Node.js | `node` | 22.22.2 | Required by the redesigned browser HUD/server path. | + +## Libvirt Resources + +| Resource | Required shape | Notes | +| --- | --- | --- | +| Network | `sc-internal`, bridge `sc-br0`, subnet `10.42.0.0/24`, NAT forwarding | NAT is required during VM image provisioning so Debian cloud-init can install packages. The network remains private to libvirt guests for inbound access. | +| Storage pool | `sc-images` | For `qemu:///system`, defaults to `/var/lib/libvirt/images/sysadmin-chronicles`. | +| SSH key | `~/.ssh/sc_host_key` | Injected into guests for game automation and bridge access. | + +## Workstation Guest Packages + +The workstation image currently targets Debian 12 Bookworm and installs: + +- Desktop/display: `xfce4`, `xfce4-goodies`, `lightdm`, `lightdm-gtk-greeter`, `spice-vdagent`, `qemu-guest-agent`, `accountsservice`, `linux-image-amd64` +- Desktop metadata: `gvfs`, `gvfs-daemons`, `libglib2.0-bin` for trusted desktop launchers and GVFS metadata writes +- User tools: `tilix`, `chromium`, `thunar`, `geany`, `meld`, `vim`, `nano`, `tmux`, `htop` +- Sysadmin tools: `openssh-server`, `openssh-client`, `sudo`, `curl`, `wget`, `rsync`, `git`, `jq`, `python3`, `nmap`, `netcat-openbsd`, `dnsutils`, `traceroute`, `mtr`, `tcpdump`, `strace`, `lsof`, `openssl`, `whois`, `iperf3`, `logwatch` +- Fonts/completion: `fonts-hack`, `fonts-firacode`, `bash-completion` + +## Version Capture + +Before cutting an installer or release, capture current versions with: + +```bash +tools/setup/check-host.sh +virsh --connect qemu:///system version +qemu-system-x86_64 --version +virt-install --version +pacman -Q libvirt qemu-system-x86 qemu-hw-display-qxl qemu-hw-display-virtio-gpu qemu-hw-display-virtio-gpu-pci qemu-hw-display-virtio-vga qemu-ui-spice-core qemu-chardev-spice qemu-audio-spice virt-install virt-viewer spice-gtk cloud-image-utils cdrtools libisoburn +``` diff --git a/docs/SAVE_SYSTEM.md b/docs/SAVE_SYSTEM.md new file mode 100644 index 0000000..e265e5d --- /dev/null +++ b/docs/SAVE_SYSTEM.md @@ -0,0 +1,330 @@ +# SYSADMIN CHRONICLES — SAVE SYSTEM DESIGN +> Version 1.3 | Status: Active development +> +> Changelog: +> v1.3 — Defined `persists: false` flag semantics (shift boundary reset). +> Added world flag persistence rules section. +> +> This document covers the save model, VM persistence policy, dirty state +> handling, recovery flows, and the design decisions behind them. + +--- + +## THE CORE TENSION + +The game wants real VMs. Real VMs have real state. That state changes as the +player works. The question is: what do we save, when, and what happens when +things go wrong? + +Two broad approaches exist: + +**Approach A — Replay Model** +Save authored flags and game state only. On load, restore a baseline snapshot +and replay authored events to reconstruct the world. Simple, cheap, predictable. + +**Approach B — Dirty State Model** +Preserve actual VM disk state as-is. Save references to the current snapshot or +live qcow2 state. On load, the VM resumes exactly where it was. + +This game uses **Approach B**, with structured recovery fallbacks. Here is why, +and what that means in practice. + +--- + +## WHY DIRTY STATE + +The replay model breaks the design contract. If the player spent forty minutes +debugging a broken service, leaving behind log entries, partial edits, and +useful breadcrumbs, restoring a clean baseline erases all of that. The world +forgets. That is not how real systems work. + +The dirty state model means: +- The player's workstation remembers what they did +- Target VMs remember fixes applied and mistakes made +- Evidence persists — good and bad +- A machine the player damaged stays damaged until they fix it or request reimage +- A machine they set up correctly stays correct + +Operational note: +- The workstation should be treated as a curated terminal-first appliance image + whose shell history, local config, and jump-box state persist like any other VM state +- Desktop-like company tools live in the game state layer, not inside a VM browser session +- Rebuilding the workstation runtime on every reset would create slow, noisy, + and inconsistent recovery behavior + +This is more expensive. It is also the point of the game. + +--- + +## WHAT GETS SAVED + +### Game State Layer +Saved as structured JSON. Cheap, fast, always consistent. + +- Player trust score and history +- Unlocked VMs, sudo scopes, internal docs, tools +- Active and completed ticket/quest state +- World flags (current values and change history) +- Incident scheduler state (active incidents, escalation timers) +- Per-quest authored consequence records +- Shift timestamp and in-world clock + +### VM State Layer +Saved as libvirt snapshot references or qcow2 state references. Expensive but +necessary. + +- Per-VM: reference to current named snapshot or live disk state +- Per-VM: list of managed recovery checkpoints +- Per-VM: reimage eligibility and reimage history +- Per-VM: last-known observation data (advisory, not authoritative) + +The game does not store VM disk images in the save file. It stores references to +named snapshots managed by libvirt. The actual disk data lives where libvirt +puts it. + +--- + +## WORLD FLAG PERSISTENCE RULES + +Every world flag in `world_flags/world_flags.json` declares a `persists` field. +This controls how the flag behaves across shift boundaries and game loads. + +### `persists: true` +The flag is written to the save file and survives indefinitely. It is cleared +only when a quest or incident explicitly sets it to false, or when the VM is +reimaged. Most flags are persistent — they represent stable facts about the +world (nginx is configured correctly, logrotate is healthy, etc.). + +### `persists: false` +The flag is **reset at the start of each new shift**, regardless of its current +value. It is NOT reset on game load within the same shift. + +Non-persistent flags represent transient pressure states that should not carry +forward into the next working session: +- `hermes_disk_healthy` — disk state that may change overnight without the player's intervention +- `web_disk_pressure_active` — active disk pressure event currently escalating + +**On shift boundary**: all `persists: false` flags are cleared before the new +shift's checkpoint is taken. Their cleared state is what gets saved. + +**On game load mid-shift**: `persists: false` flags are loaded from the save +file as-is. They are not reset on load, only on shift boundary. + +**Implementation note for `SaveSystem`**: When writing the shift checkpoint, +iterate all world flags and zero out any with `persists: false` before +serializing. Do not zero them in the live `WorldFlagRegistry` until the +checkpoint write is complete, to avoid mid-write state corruption. + +--- + +## SNAPSHOT STRATEGY FOR SAVE/LOAD + +### Named Snapshot Tiers + +Each VM maintains three tiers of snapshots: + +``` +baseline.clean — Authored starting state for a fresh quest arc +baseline.recovery — Fallback if live state is unrecoverable +checkpoint.shift-{N} — Auto-saved at start of each in-game shift +live — Current working state (no snapshot, just disk) +``` + +On save: the game records which snapshot tier is current per VM and any +divergence from it (live state is implicitly the disk, not a snapshot). + +On load: the game checks that referenced snapshots still exist and are +consistent with the saved game state flags. If they are, it resumes from live +disk state and continues normally. + +### What "Resume" Means + +The game does not revert to a snapshot on load. It resumes from whatever state +the VMs are currently in. The save file describes what the game *thinks* the +world looks like. On load, the observation service validates current VM state +against saved world flags and reconciles any drift. + +Minor drift (service restarted, log rotated by the OS) is handled silently. +Major drift (a VM that should be running is gone, a snapshot reference is +missing) triggers the recovery flow. + +--- + +## DIRTY STATE RISKS AND MITIGATIONS + +### Risk 1: Snapshot Reference Goes Stale +A named snapshot the game references is deleted or corrupted outside the game. + +Mitigation: On load, the save system checks all referenced snapshots exist +before resuming. If a checkpoint snapshot is missing but baseline.clean exists, +offer to resume from baseline with authored-flag reconstruction where possible. +If baseline.clean is also gone, the VM is treated as unrecoverable and the +reimage flow is offered. + +### Risk 2: Live Disk State is Unbootable +The player damaged the VM beyond booting — corrupted bootloader, deleted +critical system files, broke networking in a way that prevents observation. + +Mitigation: The game detects unbootable VMs through libvirt domain state and +failed SSH probes. The player is notified in-world ("hermes is not responding") +and the reimage flow is offered. The game does not attempt to force-boot or +auto-repair. + +### Risk 3: Multiple VMs Diverge from Each Other +The player fixed hermes but their notes reference a service that is now +configured differently. Cross-VM state is inconsistent with authored +expectations. + +Mitigation: World flags are the source of truth for cross-VM consequences, not +raw VM state. If the flags say nginx_stable but hermes currently has nginx +failed, the validation service surfaces this on next observation pass and raises +an in-world event. The player is not penalized for drift that happens while they +are offline — but they are informed. + +### Risk 4: Disk Space on Host +qcow2 images with many snapshots can balloon. Long save histories consume real +host storage. + +Mitigation: Managed checkpoint retention policy. The game keeps a maximum of N +shift checkpoints per VM (default: 5) and prunes the oldest on new checkpoint +creation. Authored baseline and recovery snapshots are never pruned by the game. +A storage budget field in vm_profiles allows per-VM tuning. + +Resource budget note: +- Budget the workstation separately from server VMs +- Even a modest workstation profile should be budgeted separately from server VMs +- Save/recovery tooling should assume workstation snapshots are the most + storage-expensive routine snapshots in the fleet +- Earlier lab builds showed that browser-capable workstation images can exceed + small cloud-image defaults quickly; the terminal-first plan avoids much of + that pressure, but disk budgets still need to be explicit + +--- + +## THE REIMAGE FLOW + +When a VM is unrecoverable, the player can report it for reimage through an +in-world mechanic (ticket to management or ops channel). + +Flow: +1. Player submits a reimage request for the affected machine +2. An in-world delay is imposed (e.g., 1 in-game shift) +3. The machine is restored from baseline.recovery or baseline.clean +4. Trust penalty is applied based on severity +5. Any in-progress quests on that VM are reset to their baseline state +6. Evidence from before the reimage is gone — acknowledged in-world as "we + had to wipe the machine" + +This is not a free reset. It has visible consequences. But it allows the game +to continue rather than becoming permanently stuck. + +The reimage flow is the designed escape valve, not a hidden automatic recovery. + +--- + +## SHIFT CHECKPOINTS + +At the start of each in-game shift, the game: +1. Clears all `persists: false` world flags +2. Saves all game state JSON (with non-persistent flags already zeroed) +3. Creates a named snapshot for each active VM: `checkpoint.shift-{N}` +4. Records the checkpoint reference in the save file +5. Prunes shift checkpoints beyond the retention limit + +This gives the player a rollback option at shift granularity if they want to +undo a disastrous session, at the cost of losing that shift's work entirely. + +Shift checkpoint rollback is an explicit player action, not automatic. It is +presented as "start this shift over" and requires confirmation. It does not +undo trust changes or world flag consequences that were sent to other characters +(e.g., dialogue already delivered, tickets already closed). + +--- + +## DEVELOPER RESET + +For authoring and testing, a separate CLI tool exists outside the game: + +```bash +bash tools/vm/snapshot-all.sh --revert-to baseline.clean +``` + +This is not accessible in the shipped game. It completely resets all VMs to +their authored baseline. Used during content authoring and automated test runs. + +--- + +## SAVE FILE STRUCTURE (DRAFT SCHEMA) + +```json +{ + "save_version": 1, + "player": { + "trust": 14, + "trust_history": [], + "unlocks": ["sudo:systemctl", "vm:build_machine"], + "current_shift": 7 + }, + "world": { + "flags": { + "player_ssh_configured": true, + "nginx_stable": true, + "hermes_logrotate_healthy": false, + "hermes_log_pressure_pending": true, + "hermes_disk_healthy": false + }, + "flag_history": [], + "_note": "persists:false flags are zeroed at shift boundary before this snapshot is written. They survive game load within the same shift." + }, + "quests": { + "completed": ["Q001", "Q002"], + "failed": [], + "active": ["Q003"], + "branch_outcomes": { + "Q002": "config-fixed-enabled" + } + }, + "tickets": { + "active": ["T003"], + "closed": ["T001", "T002"] + }, + "incidents": { + "active": [ + { + "id": "I001", + "started_at_shift": 6, + "escalation_step_reached": 1 + } + ], + "resolved": [] + }, + "vms": { + "workstation": { + "current_snapshot_tier": "live", + "last_checkpoint": "checkpoint.shift-6", + "recovery_snapshot": "baseline.recovery", + "reimage_count": 0, + "last_observation": {} + }, + "web_server": { + "current_snapshot_tier": "live", + "last_checkpoint": "checkpoint.shift-6", + "recovery_snapshot": "baseline.recovery", + "reimage_count": 0, + "last_observation": {} + } + } +} +``` + +--- + +## DESIGN PRINCIPLES SUMMARY + +- The dirty state is the game. Preserving it is the point. +- Snapshots are structured fallbacks, not the primary save mechanism. +- The game never silently reverts VM state without player awareness. +- Recovery from failure is in-world and has consequences. +- The host disk cost is real and must be managed with a retention policy. +- Developers get clean-reset tooling outside the shipped game. +- `persists: false` flags reset at shift boundary, not on load. diff --git a/docs/SNAPSHOT_CHAIN.md b/docs/SNAPSHOT_CHAIN.md new file mode 100644 index 0000000..d6ca244 --- /dev/null +++ b/docs/SNAPSHOT_CHAIN.md @@ -0,0 +1,103 @@ +# SYSADMIN CHRONICLES — SNAPSHOT CHAIN +> Version 1.0 +> +> This document defines what each named baseline snapshot represents, +> how the snapshot chain is built, and what assumptions quest authors +> can make about VM state at each snapshot. + +--- + +## POLICY + +Each `baseline.post-qXXX` snapshot represents the **canonical clean-branch +outcome** of quest QXXX — meaning all prior quests were resolved via their +highest-priority (best) solution branch. + +Player state diverges from the baseline during play. The baseline is always +the authored "good state" for that point in the arc, built independently of +any player's actual save. + +**A baseline snapshot is never built from a bad or partial branch outcome.** +If a player took the wrong branch, their VM state differs from the baseline +for all subsequent quests. That divergence is intentional and is the game. + +--- + +## SNAPSHOT CHAIN TABLE + +| Snapshot Name | VM(s) | Built After | Represents | +|---------------|-------|-------------|------------| +| `baseline.day-one` | workstation | fresh image | Brand new ares workstation. No player account SSH key. Provisioning script ran but authorized_keys absent. | +| `baseline.clean` | web_server | fresh image | Fresh hermes. nginx installed, no config errors, logrotate present, web root owned by www-data. Ready for Q002 to break it. | +| `baseline.clean` | build_machine | fresh image | Fresh vulcan. NTP disabled (for Q006 scenario). Arch base install, pacman configured to use internal repo. | +| `baseline.post-q001` | workstation | Q001 clean branch | Player SSH key in authorized_keys with correct permissions (0600 file, 0700 dir). Used as the implied state for all subsequent quests requiring SSH access. Not an explicit snapshot — workstation just stays live from Q001 onward. | +| `baseline.post-q004` | web_server | Q004 clean branch | hermes with: nginx stable+enabled, logrotate configured, web root owned by www-data recursively. All of Q002–Q004 resolved cleanly. Used as starting state for Q005 and Q007. | +| `baseline.post-q006` | build_machine | Q006 clean branch | vulcan with NTP enabled and healthy, archlinux-keyring refreshed, builds working. Used as starting state for Q008. | + +--- + +## HOW SNAPSHOTS ARE BUILT + +Snapshots are produced by `tools/vm/seed-vms.sh` in sequence: + +``` +1. Build base VM images from cloud-init or preseed +2. Run base configuration (hostname, users, packages, game helpers) +3. Run suppress-maintenance-noise.sh +4. Take baseline.clean snapshot +5. Run Q001-prep.sh → take no snapshot (workstation stays live) +6. Run Q002-prep.sh through Q004-prep.sh sequentially on web_server +7. Apply clean-branch outcome state manually or via a post-quest-state script +8. Take baseline.post-q004 snapshot on web_server +9. Run Q006-prep.sh on build_machine +10. Apply clean-branch outcome state on build_machine +11. Take baseline.post-q006 snapshot on build_machine +``` + +Step 7 and 10 ("apply clean-branch outcome state") are done via dedicated +scripts in `tools/vm/quest-prep/`: + +``` +Q004-post-clean.sh — sets web root ownership, confirms logrotate, enables nginx +Q006-post-clean.sh — enables systemd-timesyncd, refreshes archlinux-keyring +``` + +These post-clean scripts are the authoritative definition of what "clean +branch" means for snapshot purposes. + +--- + +## WHAT QUEST AUTHORS CAN ASSUME + +When authoring a quest against `baseline.post-q004`, you can assume: +- nginx is active and enabled on hermes +- /etc/logrotate.d/nginx exists and is correct +- /var/www/axiomworks is owned by www-data recursively +- The deploy service runs as www-data and can write to /var/www/axiomworks +- No Q002/Q003/Q004 broken state exists +- Q005 and Q007 both build on this clean hermes state + +When authoring a quest against `baseline.post-q006`, you can assume: +- Everything in post-q004 (hermes state) +- systemd-timesyncd is active and enabled on vulcan +- archlinux-keyring is up to date +- pacman -Syu works without signature errors +- Q008 uses this as its clean starting baseline + +If your quest needs to break something that was fixed in a prior quest, +your prep script must re-break it after the post-clean baseline is applied. +Document this explicitly in your prep script's header comment. + +--- + +## DEVELOPER RESET + +To rebuild all baselines from scratch: + +```bash +bash tools/vm/snapshot-all.sh --revert-to baseline.clean +bash tools/vm/seed-vms.sh +``` + +This is destructive and should only be run during authoring or CI. +It is not available in the shipped game. diff --git a/docs/STORY_DESIGN_CONTEXT.md b/docs/STORY_DESIGN_CONTEXT.md new file mode 100644 index 0000000..f026292 --- /dev/null +++ b/docs/STORY_DESIGN_CONTEXT.md @@ -0,0 +1,423 @@ +# Story Design Context — Sysadmin Chronicles + +For story designers and AI agents creating new quests and narrative content. + +**Related docs:** +- `CHARACTERS.md` — character bios, relationships, story hooks +- `COMPANY_LORE.md` — world, company, tone +- `QUEST_AUTHORING.md` — technical JSON spec for implementers + +This document answers: *how does story actually work in this game, and what does a quest +concept need to contain to be usable?* + +--- + +## The Core Premise + +The player is a new junior sysadmin at Axiom Works, a mid-size B2B software company. +They are replacing someone named Dale. Nobody will explain why Dale is gone. + +The game is played entirely through a simulated work environment: a terminal, an email +inbox, and a company website. There are no cutscenes, no narration, no inventory, no +combat. Everything that happens is expressed through: + +- **Tickets** — the player receives a ticket describing a problem +- **The terminal** — the player SSHes into VMs, investigates, and fixes things +- **Character dialogue** — characters react to how the player solved the problem +- **The next ticket** — the world moves on, and the consequences of what the player + did are baked into the next situation + +That's it. Story is not told — it is accumulated from the choices the player makes +when fixing real Linux problems on real virtual machines. + +--- + +## The Three Machines (VMs) + +Every quest happens on one or more of these machines. Their narrative identities +matter as much as their technical roles. + +### ares — the Workstation +The player's home machine. Ubuntu 24.04. Quests here are onboarding-flavored — +establishing access, learning the environment. It's the only machine the player +can reach on day one. + +*Narrative identity:* Where you start. Safe-ish. The first one you break is here. + +### hermes — the Web / App Server +Debian 12. Runs nginx and the AxiomFlow demo/staging application. This is the +machine that Sarah Chen cares about, that customers can feel, and that Priya Nair +watches for security posture. Most of the early-game quests are here. + +*Narrative identity:* The product's face to the world. Breaking this makes noise +immediately. The most politically visible machine. + +### vulcan — the Build Machine +Arch Linux. Compiles packages, runs the internal build pipeline, serves packages +to hermes via an internal apt repo. Nikhil Sharma owns this in principle but nobody +manages it daily. Things here break silently until hermes starts serving bad software. + +*Narrative identity:* The machine nobody watches until something downstream fails. +Quests here reveal that problems have upstream causes the player didn't expect. + +### Planned future machines +As the story expands, new machines can be added. Each should have a clear narrative +role before it's introduced. (See `COMPANY_LORE.md` for the candidate list.) + +--- + +## How Story Is Delivered + +### Tickets as Act One +Every quest begins with a ticket in the player's inbox. The ticket is a short email +from a character describing a symptom — not a cause. The sender's perception of the +problem is usually incomplete and sometimes wrong. This is intentional: the player's +job is to investigate, not to execute instructions. + +Good ticket writing: +- Describes what the sender experienced, not what the cause is +- Has the sender's voice and perspective (Sarah is outcome-focused; Dave is confused; + Priya is terse and specific) +- Does not hint at the solution +- Creates genuine stakes (site is down, builds are failing, someone is locked out) + +Bad ticket writing: +- Explains the root cause ("the log file is too big") +- Has no character voice (generic IT help desk language) +- Stakes are unclear or low + +### The Terminal as Act Two +The player investigates. They SSH in, run commands, read logs, check configs, look at +file ownership. The evidence is seeded into the VM baseline — it is genuinely there +to find, not procedurally generated. A good quest has a natural clue trail: + +- The most obvious thing points to a second thing +- The second thing reveals the actual problem +- The fix is achievable with real Linux knowledge + +The player cannot be told what to do. They can ask Marcus for hints (via dialogue +choices), but good players don't need to. + +### Branching Resolution as Act Three +When the player has made changes to the VM, the game checks the state of the +system against the quest's solution branches. The branch that matches determines: + +- What dialogue fires (Marcus's reaction, Sarah's reaction, Priya's follow-up) +- What trust delta the player receives +- What world flag is set (persistent story state) +- Whether an incident is triggered (a future consequence of a partial fix) +- What ticket comes next + +**This is the central story mechanic.** Every quest should be designed with at +least two and ideally three resolution branches: + +| Branch type | What it means | +|-------------|---------------| +| **Clean fix** | Player understood the root cause and solved it properly. High trust, no downstream risk. | +| **Acceptable fix** | Problem is solved but with a tradeoff — brittle approach, future maintenance burden, or incomplete cleanup. Lower trust. | +| **Regression** | Player fixed the symptom but made something else worse. Negative trust. Story consequences. | + +The **regression branch** is not about punishment — it's about realism. A real +sysadmin who removes all SSH restrictions to restore one person's access has +technically solved the ticket while creating a larger problem. The story should +treat this as realistic professional consequence, not a game-over failure. + +Players on a clean-fix path get more trust, unlock more access, and receive warmer +character reactions. Players on a regression path continue playing but face the +downstream effects of their choices. + +--- + +## World Flags — Persistent Story State + +World flags are string keys set when a quest's branch resolves. They persist for +the entire playthrough and can be read by later quests, incidents, and dialogue. + +Examples: +- `hermes_logrotate_healthy` — set when the player properly fixed log rotation +- `hermes_ssh_allowusers_fragile` — set when the player restored SSH access using + the brittle AllowUsers approach instead of the robust AllowGroups approach +- `player_ssh_configured` — set when the player successfully set up SSH on day one + +World flags are how story continuity works. A later quest can check whether the +player fixed something correctly earlier and behave differently. Marcus can reference +a past fix. Priya can flag a previously introduced risk in a later audit. A problem +that was "solved" with a quick fix can recur. + +**When designing a new quest, ask:** what flag should this set, and what future quests +or dialogue might reference it? + +--- + +## Trust — The Narrative Currency + +Trust is a numeric score that tracks the player's professional standing with Marcus +and the IT team. It affects: + +- **VM access** — the player gains SSH access to hermes and vulcan as trust increases. + If trust drops badly, access can be revoked. +- **Documentation access** — more trusted players get access to internal runbooks + and admin guides +- **Character warmth** — Marcus's messages change tone subtly as trust grows +- **Incident visibility** — at a certain trust level, the player starts seeing + background incidents before they become critical + +Trust is not displayed as a raw number. Players experience it as consequences. + +**For quest designers:** each branch should have a `trust_delta` that reflects the +quality of the fix. A proper root-cause fix should earn more than a workaround. +Regression branches should cost trust. Day-one onboarding quests are lenient; +later quests at higher tiers should be less forgiving. + +--- + +## Incidents — Consequences of Incomplete Fixes + +An incident is a time-delayed consequence that fires when a quest's partial-fix +branch was taken. It represents the problem coming back. + +Example: The player clears a full disk by deleting a log file but doesn't restore +the logrotate config. Two in-game hours later, the disk starts filling again. Dave +notices. The player gets another ticket about the same symptom. + +Incidents are not punishments — they are realistic. The world doesn't stay fixed +just because the player touched it. A player who takes clean-fix branches will +rarely see incidents. A player who takes every shortcut will find their ticket queue +filling up with problems they already "solved." + +For story purposes: incidents can also carry narrative weight. If the player made a +security regression, an incident could represent an audit finding, an unusual login, +or a configuration discrepancy Priya noticed. + +--- + +## The Character Conversation Model + +Quest dialogue fires after a branch resolves. Three characters can speak: + +### Marcus Webb +The primary voice. Appears in every quest. His post-resolution message reflects: +- What the player actually did (not just whether they succeeded) +- Whether they understood the root cause or just cleared the symptom +- A forward-looking observation (usually a quiet flag for what's coming next) + +Marcus does not praise effusively or scold dramatically. He states what he observed. +His message for a clean fix is warmer and sometimes wry. His message for a regression +is brief and pointed. He never says "well done!" He might say "that's the right call." + +### Sarah Chen +Speaks when the quest affects something product-facing (hermes being up or down, +deploys working or failing). Her messages are reactive — she responds to outcomes, +not process. She is not hostile unless the player makes her situation worse. + +### Priya Nair +Speaks when the quest has security implications — access changes, hardening, +audit posture. She does end-of-shift reviews that grade overall performance. +Her per-quest messages are brief and evaluative. She notices things Marcus might not. + +### Other characters +Dave Okonkwo files tickets. He does not have post-resolution dialogue — he +just stops or starts noticing things. Future characters (Kowalski, Nikhil, Tanya) +can speak in dialogue if quests are designed to involve them. + +--- + +## The Narrative Arc + +The overall story has six phases. Quests should be designed with their phase in mind. +The phase is usually not visible to the player — it emerges from what's happening +around them. + +### Phase 1 — Normal Work +*Tier 1 quests. Early game.* + +The player is new. Everything is routine. Marcus is helpful. The problems are real +but not alarming — a broken config, a full disk, a permission issue. The player is +learning the environment. The subtext is that things are slightly more wrong than +they should be, but there's nothing to point at. + +Hidden layer: small anomalies in the systems that curious players can notice but +don't have context for yet. + +### Phase 2 — Unease +*Tier 1/2 transition.* + +The problems start to have patterns. The same kind of thing breaks twice. A fix +the player made doesn't hold the way it should. Nothing is alarming, but Marcus's +messages have a slightly different quality — he notices things he doesn't explain. + +Hidden layer: a world flag from an early quest points somewhere unexpected. + +### Phase 3 — Suspicion +*Tier 2 quests. Mid game.* + +The player starts encountering problems they didn't cause and can't fully explain. +Access was changed by someone. A config was edited recently. A log shows an +unusual pattern. Nobody is accusing anyone. But the player now has enough context +to start asking questions — even if no quest explicitly tells them to. + +This is where Dale becomes relevant again. The systems the player inherits were +last touched by Dale. Some of them have been in a particular state for a long time. + +### Phase 4 — Investigation +*Tier 2/3 transition.* + +The player has connected enough dots to understand that something happened before +they arrived. The quests in this phase involve digging into logs, access records, +and configuration history. The investigation is framed as professional work +(audit the access logs, trace the package build history) — but the results tell +a story. + +Marcus's messages are shorter. Priya starts appearing more. Kowalski schedules a +meeting nobody explains. + +### Phase 5 — Conflict +*Tier 3 quests. Late game.* + +The player knows what happened. Acting on that knowledge has professional +consequences. The conflict is not physical — it is about what the player chooses +to surface, who they tell, and what they do with access they were given for one +purpose that could be used for another. + +### Phase 6 — Resolution +*Endgame.* + +The situation resolves. The ending the player gets depends on the world flags +accumulated across their entire playthrough — not just whether they clicked the +"good ending" button. A player who took clean-fix branches throughout, built +trust, and noticed the hidden anomalies gets a different ending than a player +who patched symptoms, lost trust, and missed everything. + +--- + +## What Makes a Good Quest Scenario + +The best quests have a **plausible mundane cause** and a **visible technical trail**. +Players should never need to guess — they should be able to find the answer by +looking at the right files and running the right commands. + +### Good scenario types +- Service down → config syntax error → player traces error output to the line +- Disk full → log file enormous → logrotate config missing → player restores it +- Deploy fails → files owned by wrong user → someone ran a script as root manually +- Build failures → clock drift → NTP not running → player enables time sync +- Access locked out → sshd_config modified → wrong directive → player corrects it +- App crashes after update → bad package from internal repo → player traces to source + +### What makes these work +1. **The symptom is real and urgent.** Something is actually broken. +2. **The cause is discoverable.** The evidence is in logs, config files, or system state. +3. **The fix is a real Linux operation.** Not artificial — `chown`, `systemctl`, editing + a config, fixing a cron entry, rolling back a package. +4. **Multiple approaches exist.** The quick fix works. The proper fix is better and + the game knows the difference. +5. **The character reactions are grounded.** Sarah cares about the demo being up. + Priya cares about the access control implications. Marcus cares about whether the + player understood what they were doing. + +### Bad scenario types to avoid +- Problems that require packages not in the VM's guaranteed baseline (see `QUEST_AUTHORING.md`) +- Problems that require real-time events the validation engine can't check +- Problems where the "correct" fix is the only fix (no meaningful branch differentiation) +- Problems that break the fourth wall or require the player to know game-layer information +- Problems that are gotchas rather than investigations (the cause can't be found by looking) + +--- + +## Hidden Anomalies — Environmental Storytelling + +Every 3–5 quests should include something unusual in the VM environment that the player +is not told about and not required to engage with. These are not quest objectives. +They are breadcrumbs for curious players. + +Examples of the kind of thing these should be: +- A user account that shouldn't exist +- A log entry from an odd time that doesn't match the official history +- A file that was modified recently but wasn't part of the quest setup +- A cron job that's been disabled but was once important +- An SSH key in authorized_keys that doesn't belong to anyone obvious + +These anomalies should be consistent with the overall narrative arc — a player who +collects them across the whole game should be able to piece together what happened +before they arrived. They should never be labelled, never referenced in objectives, +and never required. They are for the players who look. + +--- + +## Quest Output Format for Story Agents + +When proposing new quests, provide the following. This is the minimum needed for +a technical author to implement the quest. + +``` +Quest ID: QXXX +Title: [player-facing] +Narrative phase: [1–6] +Tier: [1, 2, or 3] + +Primary VM: [ares / hermes / vulcan] +Additional VMs: [if any] + +Scenario summary: + What is broken, why it is broken (the root cause), and what the player + will encounter. 1–3 sentences. Written for the implementer, not the player. + +Ticket: + From: [character name] + Subject: [email subject line] + Body: [the email the player receives. Written in the sender's voice. + Describes the symptom. Does not explain the cause.] + +Clue trail: + What the player will find when they investigate. The evidence that leads + them to the root cause. Describe the actual files, log entries, and system + states — not the player's steps. + +Solution branches: + Branch 1 (clean fix, highest trust): + What the player has done. Why it's correct. Trust delta. + Branch 2 (acceptable fix): + What the player has done. What tradeoff it introduces. Trust delta. + Branch 3 (regression, if applicable): + What the player did wrong. What it breaks. Negative trust delta. + +Character reactions: + Marcus (post-resolution): + Clean: [what Marcus says] + Acceptable: [what Marcus says] + Regression: [what Marcus says] + Sarah / Priya (if relevant): + [reaction to the specific outcome that affects them] + +World flags set: [list flags each branch sets] +Follow-up incident (if any): [what recurs if the acceptable-fix branch was taken] +Hidden anomaly (if any): [something unusual seeded into the VM that's not part of + the quest objectives] +Narrative notes: [anything a future quest author should know — Dale connections, + story threads this opens or closes, things characters should remember] +``` + +--- + +## The Dale Thread — Notes for Story Designers + +Dale's story should emerge slowly from the systems themselves, not from exposition. +When designing quests — especially mid-to-late game — consider: + +- **What did Dale last touch?** The VMs the player inherits have a history. Some + configurations were made by Dale. Some are good. Some are wrong in ways that + suggest Dale was dealing with something. + +- **What was Dale trying to do?** As the investigation phase develops, the picture + should become coherent. Dale wasn't random — there was a pattern to their actions. + +- **Who knew?** Marcus knew Dale. Priya may have been involved in whatever ended + Dale's tenure. Kowalski definitely knows. The player assembles this from fragments, + not a scene where someone explains it. + +- **The player is inheriting Dale's problems.** Some of the broken things the player + fixes are broken because Dale broke them. Some of the broken things were broken on + purpose. The player won't know which is which until later. + +The reveal of what Dale did should feel like the player figured it out, not like the +game told them. diff --git a/docs/VM_BUILD_SYSTEM.md b/docs/VM_BUILD_SYSTEM.md new file mode 100644 index 0000000..6f6cf5f --- /dev/null +++ b/docs/VM_BUILD_SYSTEM.md @@ -0,0 +1,187 @@ +# VM Build System + +## Overview + +VM provisioning uses a modular driver + profile pattern. One driver script handles +the full build pipeline; per-VM profile files declare what makes each machine +distinct. Adding a new VM means writing one profile file — no changes to the driver. + +## Structure + +``` +tools/vm/ + build-vm.sh # Driver — sources a profile and runs the build pipeline + build-workstation.sh # Wrapper → build-vm.sh profiles/workstation.sh + build-web-server.sh # Wrapper → build-vm.sh profiles/web-server.sh + build-build-machine.sh # Wrapper → build-vm.sh profiles/build-machine.sh + profiles/ + workstation.sh # sc-workstation / ares — XFCE desktop (Debian) + web-server.sh # sc-web-server / hermes — nginx app server (Debian) + build-machine.sh # sc-build-machine / vulcan — build toolchain (Arch) + lib/ + common.sh # Shared libvirt helpers (pool, domain, seed ISO, wait-for-IP) +``` + +## Invocation + +```bash +# By wrapper (backwards-compatible) +./build-workstation.sh [--dry-run] [--force] +./build-web-server.sh [--dry-run] [--force] +./build-build-machine.sh [--dry-run] [--force] + +# By driver directly — profile name (no extension) or explicit path +./build-vm.sh workstation [--dry-run] [--force] +./build-vm.sh profiles/web-server.sh --force +``` + +`--dry-run` skips all libvirt/qemu-img calls and prints what would run. +`--force` destroys and recreates a domain that already exists. + +## Profile Contract + +A profile is a bash file sourced by `build-vm.sh`. It must set these variables: + +| Variable | Example | Description | +|----------|---------|-------------| +| `DOMAIN` | `sc-web-server` | libvirt domain name | +| `HOSTNAME` | `hermes` | Guest hostname | +| `RAM_MB` | `512` | Memory in MB | +| `VCPUS` | `1` | vCPU count | +| `DISK_SIZE` | `8G` | qcow2 overlay size | +| `GRAPHICS` | `vnc` | `vnc`, `spice`, `spice-qxl`, or `none` | +| `BASE_URL` | `https://...` | URL to download base cloud image from | +| `BASE_IMAGE` | `$SC_BASE_DIR/...` | Local path to cache the base image | + +It must also define `generate_user_data()` — a function that prints the complete +cloud-init `#cloud-config` YAML to stdout. The driver calls this function and writes +the output to the seed ISO. The following variables are available when the function +runs (set by the driver after sourcing the profile): + +| Variable | Value | +|----------|-------| +| `PUBKEY` | Contents of `${SC_SSH_KEY}.pub` | +| `GAME_HOST_IP` | `${SC_GAME_HOST_IP:-10.42.0.1}` | +| `POOL_DIR` | Resolved libvirt pool path | +| `DISK_PATH` | `$POOL_DIR/${DOMAIN}.qcow2` | +| `SEED_ISO` | `$SC_SEED_DIR/${DOMAIN}-seed.iso` | + +Profile-specific variables (e.g. `HUD_URL`, `SAGE_URL`, `PRIVKEY_INDENT`) are set +in the profile before `generate_user_data` is defined and are available inside it. + +## Writing a New Profile + +1. Copy `profiles/web-server.sh` as a starting point. +2. Set the 8 required variables. +3. Write `generate_user_data()` with the cloud-init YAML for the new machine. +4. Run `./build-vm.sh profiles/my-new-vm.sh --dry-run` to validate. +5. Run without `--dry-run` to build. + +No changes to the driver or any other file are needed. + +## Build Pipeline (driver) + +1. Parse `--dry-run` / `--force` flags +2. Resolve and source the profile file +3. Validate required variables and `generate_user_data` function exist +4. Source `lib/common.sh` (sets `SC_*` env, exposes helpers) +5. Run `ensure_vm_tooling` (checks virsh, qemu-img, virt-install, SSH keys, pool/network) +6. If domain exists and `--force` not set: exit cleanly +7. `download_if_missing` — fetch base image if not cached +8. Call `generate_user_data` → write to tmpdir, build NoCloud seed ISO +9. `destroy_domain` — remove existing domain if present +10. `create_backing_disk` — qcow2 overlay over the base image +11. `build_import_domain` — `virt-install --import`, enable autostart +12. `wait_for_agent_ip` — poll QEMU guest agent for IP (up to 300 s) +13. Cleanup tmpdir on exit (trap) + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SC_GAME_HOST_IP` | `10.42.0.1` | Host machine IP on the game network | +| `SC_SSH_KEY` | `~/.ssh/sc_host_key` | SSH key pair used for all host→guest connections | +| `SC_BASE_DIR` | See `common.sh` | Where base cloud images are cached | +| `SC_SEED_DIR` | See `common.sh` | Where cloud-init seed ISOs are written | +| `SC_POOL_NAME` | `sc-images` | libvirt storage pool | +| `SC_NETWORK_NAME` | `sc-internal` | libvirt network | +| `LIBVIRT_DEFAULT_URI` | `qemu:///system` | Override to `qemu:///session` for user-mode libvirt | +| `SC_WORKSTATION_GRAPHICS` | `spice` | Override workstation graphics backend | + +## Current VMs + +| Profile | Domain | Hostname | OS | RAM | vCPUs | Disk | Graphics | +|---------|--------|----------|----|-----|-------|------|----------| +| `workstation.sh` | `sc-workstation` | `ares` | Debian 12 | 2048 MB | 2 | 20 G | SPICE | +| `web-server.sh` | `sc-web-server` | `hermes` | Debian 12 | 512 MB | 1 | 8 G | VNC | +| `build-machine.sh` | `sc-build-machine` | `vulcan` | Arch Linux | 768 MB | 2 | 10 G | VNC | + +## Hostname Resolution + +All VMs resolve internal hostnames via static `/etc/hosts`. There is no DNS server +on the game network — this matches how small company networks often work before a +proper internal DNS is set up. + +Each VM only has entries for the hosts it needs to reach: + +- **ares** (workstation): knows `hermes`, `vulcan`, `portal.axiomworks.internal`, `sage.axiomworks.internal` +- **hermes**: knows `portal.axiomworks.internal` +- **vulcan**: knows `hermes` (deploy target), `portal.axiomworks.internal` + +The `.axiomworks.internal` domain is fictional but realistic — real companies use +private suffixes like `.internal` or `.corp` for their infrastructure. + +## Networking Notes + +- All VMs attach to the `sc-internal` libvirt network +- The host machine (10.42.0.1) serves the game portal (`:3000`) and Sage KB (`/sage/`) +- Fixed IPs used in `/etc/hosts` across VMs: hermes=10.42.0.40, vulcan=10.42.0.24 +- These must match the DHCP reservations configured in `network-sc-internal.xml` +- IPv6 disabled on all VMs (sysctl) — not needed, reduces noise + +## Performance Tuning + +All VMs share a common sysctl baseline applied via `/etc/sysctl.d/`: + +| Setting | Value | Rationale | +|---------|-------|-----------| +| `vm.swappiness` | 10 | Prefer RAM; swap only under real pressure | +| `vm.vfs_cache_pressure` | 50 | Keep inode cache warm longer | +| `vm.dirty_ratio` | 15–25 | Batch writes; vulcan higher for build workloads | +| IPv6 disabled | — | Removes unnecessary network overhead | + +All VMs have a swap file (512 MB – 1 GB depending on role) created at first boot. + +## DHCP Reservations and MAC Addresses + +Fixed IPs are set via DHCP reservations in `network-sc-internal.xml` and the live +libvirt network. The reservations reference MAC addresses, which virt-install +**generates fresh on every `--force` rebuild**. After any rebuild, the old +reservation is stale and the VM will get a random IP from the pool. + +After a `--force` rebuild, update the reservations: + +```bash +# 1. Get the new MAC +virsh domiflist sc-web-server # (or sc-workstation, sc-build-machine) + +# 2. Remove the old reservation (use the old MAC from network-sc-internal.xml) +sudo virsh net-update sc-internal delete ip-dhcp-host \ + "" --live --config + +# 3. Add the new one +sudo virsh net-update sc-internal add ip-dhcp-host \ + "" --live --config + +# 4. Update network-sc-internal.xml to match +``` + +The VM will pick up the reserved IP on its next DHCP renewal (or reboot). + +### Current reservations + +| VM | Domain | Hostname | MAC | IP | +|----|--------|----------|-----|----| +| Workstation | sc-workstation | ares | `52:54:00:bd:aa:29` | 10.42.0.36 | +| Web server | sc-web-server | hermes | `52:54:00:49:9b:64` | 10.42.0.40 | +| Build machine | sc-build-machine | vulcan | `52:54:00:5e:9f:b9` | 10.42.0.24 | diff --git a/docs/WORKSTATION_POLISH_BACKLOG.md b/docs/WORKSTATION_POLISH_BACKLOG.md new file mode 100644 index 0000000..acda344 --- /dev/null +++ b/docs/WORKSTATION_POLISH_BACKLOG.md @@ -0,0 +1,56 @@ +# Workstation Polish Backlog + +Captured from playtest notes. These items are intentionally left unresolved for a later pass. + +## Launcher And Viewer + +- ~~Make `./scripts/start-game.sh` executable by default.~~ **RESOLVED** — file is `rwxr-xr-x`. +- ~~Prevent Chromium from auto-launching on workstation login.~~ **RESOLVED** — removed the `game-hud.desktop` autostart entry from `workstation.sh`. Players open the Axiom Works portal from the desktop launcher when they want it. +- ~~Fix fullscreen toggling in the workstation viewer. The current `FULLSCREEN.txt` says `Shift+F12` but that is the cursor-release binding; fullscreen toggle is `F11`.~~ **RESOLVED** — Renamed to `VIEWER_HELP.txt`, corrected key bindings, expanded to cover fullscreen, cursor release, zoom, copy/paste, and USB redirect. +- Make sure the player can exit fullscreen without shutting down the VM. +- Investigate whether virt-viewer / the SPICE client can auto-detect and apply the host's native resolution when entering fullscreen mode. SPICE supports dynamic resolution via the vdagent service (already installed); verify the guest `spice-vdagent` is running and that the display XML uses `` so resize events actually reach the guest. + +## HTTPS / TLS + +- Make all in-VM websites (portal, Sage, company website) serve over HTTPS. Approach: generate a self-signed CA during workstation cloud-init, install it into Chromium's trust store and the system CA bundle, then issue a wildcard or multi-SAN cert for `*.axiomworks.corp`, `*.axiomworks.internal`, and `portal.axiomworks.internal`. Configure the game server to serve TLS (or put nginx in front for all sites), and update all internal URLs to `https://`. No browser warnings, everything looks legitimate. Not required for gameplay but raises the production feel significantly. + +## Desktop UX + +- ~~Ensure the Axiom Works portal desktop icon is executable/trusted out of the box.~~ **RESOLVED** — `Portal.desktop` is provisioned with permissions `0755`, and `workstation.sh` seeds GVFS trusted metadata with a login-time reload fallback. +- Remove mail from the top of the XFCE applications menu, since the portal handles email. (Low priority — no mail client is installed, so this is unlikely to appear.) +- ~~Set Tilix as the default terminal entry in the applications menu.~~ **RESOLVED** — `update-alternatives --set x-terminal-emulator /usr/bin/tilix` and `helpers.rc` both configured in `workstation.sh` runcmd. +- The XFCE **Applications → System → Terminal Emulator** menu entry still launches the XFCE terminal emulator instead of Tilix. `update-alternatives` sets the system default but XFCE's own preferred-applications config (`xfce4-terminal.desktop` precedence) overrides it for that menu entry. Fix by either: removing `xfce4-terminal` from the installed packages, or writing a `~/.config/xfce4/helpers.rc` entry that explicitly maps `TerminalEmulator=tilix`, or adding a `preferred-applications.xml` override in the XFCE config directory. +- ~~Keep the XFCE dark theme as the default desktop theme.~~ **RESOLVED** — `xsettings.xml` sets `Adwaita-dark` theme in `workstation.sh`. +- ~~Tilix launched from the desktop icon opens in `/Desktop` by default instead of `/home/player`. Fix the `Terminal.desktop` launcher to set `Path=/home/player` so the initial working directory is the home directory.~~ **RESOLVED** — `Path=/home/player` added to `Terminal.desktop` in `build-workstation.sh`. +- ~~Preserve clean desktop icon placement after removing `cidata`.~~ **RESOLVED** — `workstation.sh` seeds XFCE desktop icon layout files so Terminal and Portal sit in the chosen top-right positions and viewer help stays bottom-left after rebuilds. + +## Workstation Lifecycle + +- ~~Take a clean snapshot after the workstation is fully configured and validated.~~ **RESOLVED** — `seed-vms.sh` takes `baseline.day-one` and `baseline.recovery` snapshots after workstation build. +- ~~Treat workstation shutdown as the end-of-shift game exit; save workstation state.~~ **RESOLVED (server side)** — `VMManager.ensureWorkstationLive()` in the Node.js server handles startup. Game server cleanly shuts down when `start-game.sh` exits (SIGTERM). VM suspend-on-quit is a future enhancement. +- ~~Rebuild or restore from the clean snapshot when needed, but allow the live workstation to drift during play.~~ **RESOLVED** — `always_live: true` in `workstation.json` means shift checkpoints skip the workstation; it drifts freely and is only restored from `baseline.recovery` on catastrophic failure. + +## Terminal Experience + +~~All in-game terminal simulation items are obsolete~~ — the player uses a real Tilix terminal directly in the XFCE workstation VM. Arrow key history, tab completion, copy/paste, scrollback, and interactive programs (vim, htop, etc.) all work natively. + +## Browser and Bookmarks + +- The Chromium bookmarks bar shows the default Debian bookmarks. The game-specific bookmarks are buried under a "Managed bookmarks" folder instead of sitting directly in the bar. Move the managed bookmarks to the top-level bar and remove the default Debian entries. This is controlled by the `ManagedBookmarks` policy in `/etc/chromium/policies/managed/bookmarks.json`; restructure the JSON so items appear at bar level rather than inside a named folder. +- ~~All four managed bookmarks go to the same URL; anchors don't work.~~ **RESOLVED** — Bookmarks reduced to two: "Axiom Works Portal" and "Sage (KB)" at `/sage/`. + +## Sage — Knowledge Base + +- Sage is intended to be a navigable knowledge base, not just a search box. It should feel like a real internal company wiki: organized into sections and categories that a player can explore by browsing, in addition to searching. The content is the KB data already planned for the game. +- Search should be lightweight and practical — something like Meilisearch (or a similarly small embedded-first search server) that indexes the KB content and serves fast full-text results without requiring a heavy backend. +- Sage should be a completely separate web application from the Axiom Works portal. It should have its own URL, its own visual design (distinct look and feel from the portal), and its own place in the bookmarks bar. In a realistic company, documentation tools are separate products (Confluence, Notion, internal wikis) from the ticketing portal — Sage should feel the same way. +- Add a Sage bookmark to Chromium once Sage has its own URL. + +## VM Performance + +- ~~Guest VM RAM maxed causing hangs.~~ **RESOLVED** — `RAM_MB` raised to 1536 MB; 1 GB swap file added via `runcmd` in `build-workstation.sh` (fallocate + mkswap + fstab entry). Rebuild required to take effect. + +## Visual Cleanup + +- ~~Hide or remove the `cidata` desktop icon.~~ **RESOLVED** — `build-vm.sh` detaches the cloud-init seed ISO after workstation readiness, so the CD-ROM is not exposed on the desktop or in file-manager device lists. `xfce4-desktop.xml` also keeps removable/device desktop icons hidden as a fallback. +- ~~Hide the internal `VirtIO Disk` from Thunar's Computer view.~~ **RESOLVED** — `workstation.sh` installs a udev rule setting `UDISKS_IGNORE=1` on `vd*` system disk devices, keeping internal VM storage out of player-facing file-manager device lists. diff --git a/docs/design/CHARACTERS.md b/docs/design/CHARACTERS.md new file mode 100644 index 0000000..ab3e32d --- /dev/null +++ b/docs/design/CHARACTERS.md @@ -0,0 +1,459 @@ +# Characters — Sysadmin Chronicles + +Story design reference. All characters, bios, relationships, and open story hooks. +For company/world context see `COMPANY_LORE.md`. This file focuses on the people. + +--- + +## Active Characters + +These characters have an established in-game voice and presence. Any new quest work +should treat their characterization here as canonical. + +--- + +### The Player +**Role:** New junior sysadmin hire, day one +**Identity:** Unnamed. Player-selected portrait (5 options). + +Hired to replace Dale. Nobody will explain what Dale did. Badge number is still +pending — temp credentials were handled by someone in Finance on their first day. +The player is a competent professional, not a bumbling intern. They may not know +every answer but they know how to look. + +The player has no spoken lines. Their character is expressed entirely through the +choices they make when fixing things — whether they understand root causes or just +clear symptoms, whether they leave systems better or just less broken. + +--- + +### Marcus Webb +**Role:** Senior Systems Administrator +**Email:** `m.webb@axiomworks.internal` +**Reports to:** Dave Kowalski (Director of IT) + +Six years at Axiom Works. Hired by Kowalski. Knows where everything is, why it's +there, and which parts were a mistake. Communicates in short, precise messages. +Does not explain things twice. Trusts competence over credentials — he will give +the player more rope as they demonstrate they know what to do with it. If they +don't, the rope gets shorter. + +He was the one who onboarded the player. He assigned their first ticket. He will +assign most of the tickets that follow. His messages range from brief task +assignments to late-night observations about something that's been on his mind — +the latter usually mean something is about to become a problem. + +He knows what Dale did. He has decided not to discuss it. + +**Personality:** Dry. Technically precise. Does not perform enthusiasm. Occasionally +wry but never jokey. Respects players who fix root causes. Mildly annoyed by +players who fix symptoms and call it done. + +**Relationships:** +- Kowalski: reports to him; respectful but not deferential +- Sarah: professional; takes her tickets seriously, occasionally says quiet things when she's wrong +- Priya: mutual professional respect; they operate in the same zone of "things that matter when they go wrong" +- Phil Ruiz (Sales VP): warm; Phil owes Marcus for saving a demo once and Marcus has never mentioned it + +--- + +### Sarah Chen +**Role:** Product Manager, AxiomFlow +**Email:** `s.chen@axiomworks.internal` + +Owns the AxiomFlow product roadmap. Coordinates between sales, engineering, and +customers. Emails Monday mornings. Cares intensely about the demo and staging +environments because those are the product she can actually see and touch. Not wrong +about their importance. + +She files tickets when things break on the product-facing side. Her descriptions of +problems are accurate about symptoms and often wrong about causes — she will +confidently diagnose a permissions issue as a script bug, or a package problem as a +config error. She is not incompetent; she just doesn't have the full picture. When +the player fixes the underlying cause rather than the surface symptom, she notices. + +She has a sharp edge when things get worse after someone touches them. She will say +so, clearly, without being melodramatic about it. + +**Personality:** Direct. Metric-oriented. Not patient with vague timelines or "we're +looking into it." Appreciates being told what the actual problem was, not just that +it's fixed. + +**Relationships:** +- Marcus: professional; trusts that her tickets will be handled, doesn't ask for much +- Player: initially impersonal (they're new); warms or cools based on outcomes +- Nikhil Sharma: upstream dependency — his build pipeline affects her deployments + +--- + +### Priya Nair +**Role:** Head of Security & Compliance +**Email:** `p.nair@axiomworks.internal` +**Direct report:** James Osei (Security Analyst) + +Leads all security reviews, access audits, and compliance programmes. Has a standing +Thursday meeting with David Park (CTO) that has existed since 2017. Was brought in +after an incident nobody discusses in public. Has been building the security function +from something informal into something that can survive a SOC 2 audit. + +She frames everything in terms of what happens when things go wrong, not whether they +will. She assumes breach. She assumes misconfiguration. She is often right. She is +not someone who appreciates hearing about a production change after it has already +happened. + +She will tell the player when a fix is correct and why. She will also tell them when +a fix works but leaves the environment in a worse position than before. She is not +punitive about this — she just states it. + +She does shift reviews at end-of-shift and grades the player's overall performance. +Her criteria: did the work move forward, did the environment stay stable, did the +player create extra problems. + +**Personality:** Precise. Consequence-focused. Calm in tone even when the content +is not calm. Economical with words. Does not use exclamation marks. + +**Relationships:** +- Player: evaluative; her trust is earned by demonstrating that security is a + consideration, not an afterthought +- Marcus: peer respect; they operate in different domains with overlapping concerns +- Dave Kowalski: reports indirectly up through him for infrastructure decisions +- David Park: standing Thursday meeting; she has the CTO's ear + +> **Name note for developers:** The in-game email service and some ticket files +> previously used "Priya Kapoor" and the onboarding doc used "Priya Singh." +> These are all the same character. **Priya Nair** is the canonical name. +> Email should be `p.nair@axiomworks.internal`. Update references in +> `server/src/services/EmailService.js`, `content/tickets/T007.json`, and +> `content/docs/onboarding.json`. + +--- + +### Dave Okonkwo +**Role:** Internal employee, non-technical +**Email:** `d.okonkwo@axiomworks.internal` + +A regular Axiom Works employee who notices when things aren't working and files +tickets about it. He doesn't know enough to diagnose the problem — he reports +symptoms accurately and assumes the wrong cause. His reports are useful precisely +because they represent what a non-technical user actually experiences. + +He is not on the company website (280 employees, most of them aren't). He's +somewhere in operations or general staff. He's not in Finance, not in IT. + +> **Open decision:** Dave Okonkwo is currently the only employee-level character who +> submits tickets. The company website has Dave Kowalski as Director of IT Operations +> (Marcus's boss), which is a completely different person. This is not a naming +> inconsistency — they're two different people. However: if the story wants Kowalski +> to become an active character who also files tickets or escalates issues, that's a +> separate thread. Okonkwo and Kowalski coexist. + +--- + +## Named Background Characters + +On the company website. No current in-game presence. Available for story use — +they can send emails, appear on CC lines, be referenced in dialogue, or become +active characters in new quests. + +Listed in rough order of story relevance to the IT/sysadmin context. + +--- + +### Dave Kowalski — Director of IT Operations +Marcus's manager. The player's skip-level. Background is network engineering — +has Cisco certifications he will not volunteer unless provoked. Oversees systems +(Marcus's domain), networking (Tom Malaney), and IT support. Has been at Axiom +Works since 2015. Describes the infrastructure as "mature." Sends weekly status +emails in bullet points that never quite answer the question. When things go wrong +he schedules a meeting to "talk through the situation," which everyone has learned +is worse than a direct message. + +Has said "we should really document that" more times than he can count. Has +documented very little personally. Maintains a mysterious Tuesday 2–3pm calendar +block. + +Story use: source of policy pressure, indirect escalation, the person who asks +questions that reveal Marcus hasn't told the player everything. + +--- + +### Nikhil Sharma — Platform Engineer +Owns the internal build and release pipeline, the CI infrastructure, and the +parts of deployment that nobody else wants to think about. Strong opinions about +reproducible builds. Sends Slack messages at 6am. Occasionally at 11pm. + +He is the engineer most directly connected to what happens on vulcan — if a build +is broken, it's probably something Nikhil built or maintains. He has never met the +player. He almost certainly doesn't know the player exists. + +Story use: the author of broken packages the player has to debug; a character who +can explain (or fail to explain) what went wrong upstream; an escalation path when +a build problem is genuinely his fault. + +--- + +### Tanya Okafor — Head of Customer Success +Manages post-sale relationships for all AxiomFlow customers and the twelve legacy +AxiomSync accounts that haven't migrated. Uses the word "partnership" a lot. + +Usually the first person to know when something is wrong in production, because a +customer has already called her before IT knows there's a problem. Her call log +is an early warning system. She is not hostile to IT but she has learned that +"we're looking into it" is not an answer she can give a customer. + +Story use: pressure vector from the customer direction; source of urgency that +doesn't come from Marcus or the ticket queue; demonstrates real-world stakes when +things go down. + +--- + +### Phil Ruiz — VP of Sales +Has been promising features to prospects since 2016. Maintains a warm relationship +with the infrastructure team because Marcus once fixed the staging environment with +twenty minutes to spare before a major demo — Phil has never forgotten this. Travels +frequently. Expense reports submitted promptly, which Marcus has noted approvingly. + +Story use: indirect beneficiary when demos work; pressure source when a sales demo +is scheduled and something is broken; the person who will tell the CTO what IT did +right in a room the player will never be in. + +--- + +### Yusuf Halabi — Engineering Manager +Reports to David Park (CTO). Manages the core AxiomFlow platform team. Runs the +Thursday architecture review. Has opinions about test coverage. Leaves pull request +comments that are technically correct and diplomatically suboptimal. + +Story use: engineering-side escalation; source of tickets about internal tooling; +the person who will ask why a config change broke a downstream process. + +--- + +### Derek Ashford — Financial Controller +Does not appear at team meetings. Does appear on CC lines of every email that +mentions cloud costs, hardware procurement, or infrastructure budget. Always +replies-all. His manager is Rachel Brandt (CFO). + +Story use: background texture on procurement requests; the voice that makes any +infrastructure spending feel like a negotiation. + +> **Note on "Dave from Finance":** Marcus's day-one message references "Dave from +> Finance" as the person holding the player's temp credentials. This is almost +> certainly Derek Ashford — Marcus using his first name informally, or a +> continuity error. Derek Ashford is the only Finance character plausibly holding +> IT credentials. His first name is Derek, not Dave — either the message should +> be corrected, or "Dave from Finance" is a third unnamed Finance employee. + +--- + +### Rachel Huang — Systems Administrator +Marcus's peer on the IT team. Handles provisioning, patch cycles, and the ongoing +negotiation with Finance over cloud consolidation. Came from a managed services +background. Has strong opinions about monitoring dashboards, most of which are +correct. + +Story use: the person who set something up that the player now has to maintain; +a colleague who can provide context Marcus won't; someone whose provisioning +decisions the player will encounter as infrastructure. + +--- + +### Tom Malaney — Network Engineer +Responsible for network infrastructure across the office and hosted environments. +On-call for more holiday weekends than he would like. Thorough in documentation +when he finds time for it. + +Story use: DNS, firewall, or routing problems that are not the player's fault +but become the player's problem; someone who can be reached but is slow to +respond. + +--- + +### James Osei — Security Analyst +Priya's direct report. Handles vulnerability assessments, access reviews, and +quarterly compliance reporting. Methodical. Has a spreadsheet for everything, +which is not a criticism. + +Story use: the person who runs the actual audit that Priya will summarize to the +player; a source of detailed (sometimes overwhelming) security findings. + +--- + +### Ellen Marsh — CEO & Co-Founder +Built the first version of AxiomFlow after a decade in operations. No CS background. +Attends all-hands twice a year. Does not use Slack. Has final say on pricing and +major customer commitments. + +Story use: the distant authority whose priorities shape everything; never interacts +with the player directly, but her decisions land as constraints. + +--- + +### David Park — CTO & Co-Founder +Wrote the original rules engine in 2011. Now manages engineering managers. Still has +opinions about the data model. Has a standing Thursday meeting with Priya that hasn't +moved since 2017. + +Story use: architectural decisions from above; the person Priya reports significant +security findings to. + +--- + +### Karen Volkov — COO +Joined 2014. Responsible for the fact that the company has documented processes for +anything at all. Has opinions about infrastructure costs that surface in IT's world +via Finance. Prefers decisions with clear owners and deadlines. + +--- + +### Rachel Brandt — CFO +Joined 2016. Approves all capital expenditure over $5,000. Working to consolidate +cloud spend. Does not enjoy surprises in the infrastructure budget. Derek Ashford +reports to her. + +--- + +### Mei Lin — Senior Software Engineer +Has maintained AxiomSync's integration layer since 2018. Knows more about it than +anyone would prefer, including herself. Currently leading the migration tooling +project for the remaining legacy accounts. + +--- + +### Cora Reyes — Software Engineer +Works on the AxiomDash reporting pipeline. Has submitted more internal RFCs than +anyone else on the team in the past year. Moving toward senior. + +--- + +### Ben Portillo — Product Manager, AxiomDash +Leads product development for the analytics add-on. Works closely with large +accounts to understand what they actually want from dashboards (usually different +from what they asked for). + +--- + +### Annika Gosse — UX Designer +Responsible for AxiomFlow's interface. Has been advocating for a redesign of the +workflow builder since 2022. Patient. + +--- + +### Sandra Wu — HR Manager +Manages hiring, onboarding, and employee relations since 2016. Runs the new-hire +onboarding process (three days, thorough). Sends birthday emails on time, every time. + +--- + +### Owen Blake — Office Manager +Keeps the office running. Has fixed more things than his job title implies. The +person to contact if conference room equipment stops working. + +--- + +### Mike Kawamoto — Account Executive +Handles mid-market manufacturing accounts in the northeast. Believes strongly in +the demo environment. Closes more deals in Q4 than any other quarter. + +--- + +### Lisa Ferreira — Customer Success Manager +Manages onboarding for new AxiomFlow deployments. Has a talent for understanding +what customers mean rather than what they say. + +--- + +## Unresolved Characters (Story Hooks) + +These are referenced in existing content but never defined. They represent the +strongest open narrative threads. + +--- + +### Dale — The Previous Sysadmin +**Reference:** Marcus's day-one message — "You're replacing Dale. Nobody will tell you +what Dale did because it's complicated." + +Dale is gone. The player has their desk, their access provisioning slot, and +apparently their reputation — people know the player is "Dale's replacement" before +they know the player's name. The systems the player inherits are the systems Dale +last touched. + +What Dale did is unknown. It is described as "complicated." Marcus knows. Possibly +Kowalski knows. Possibly Priya knows, if it was security-related. + +This is the strongest existing narrative mystery in the game. It has setup and no +payoff. Dale's story could be: +- A technical incident (something Dale broke and couldn't fix) +- A policy violation (something Dale did that wasn't malicious but wasn't right) +- A trust collapse (competent but burned bridges) +- Something personal +- Any combination + +The player finding out what Dale did — gradually, through the systems they work on, +through things people let slip — is a natural story spine for the whole game. + +--- + +### "Dave from Finance" — Day One Reference +**Reference:** Marcus's day-one message — "Dave from Finance has your temp credentials. +He's on three today." + +Almost certainly Derek Ashford (Financial Controller), referred to informally. But +Derek's first name is Derek, not Dave — this is either Marcus being casual with +names, a continuity error, or a genuinely separate unlisted Finance employee. + +Needs a decision: correct "Dave" to "Derek" in Marcus's message, or introduce a +separate "Dave from Finance" as a minor character. + +--- + +## Key Relationships Map + +``` +Ellen Marsh (CEO) + └── David Park (CTO) + └── Yusuf Halabi (Eng Manager) + ├── Mei Lin + ├── Cora Reyes + └── Nikhil Sharma + └── Karen Volkov (COO) + └── Rachel Brandt (CFO) + └── Derek Ashford (Financial Controller) + └── Phil Ruiz (VP Sales) + ├── Mike Kawamoto + └── Tanya Okafor + └── Lisa Ferreira + +Dave Kowalski (Director of IT) + ├── Marcus Webb ←── Player's manager + │ └── [Player] + ├── Rachel Huang + └── Tom Malaney + +Priya Nair (Head of Security) + └── James Osei + +Sarah Chen (Product, AxiomFlow) ←── frequent ticket source +Ben Portillo (Product, AxiomDash) +Annika Gosse (UX) +``` + +--- + +## Tone Notes for New Story Work + +- **Marcus talks like someone who has answered this question before.** Precise, low + affect, no wasted words. Never condescending — just efficient. +- **Sarah talks like a PM: outcome-focused, slightly impatient, specific about + what she needs.** She is not a villain. She has real deadlines. +- **Priya talks like someone who has already thought about what goes wrong.** She + doesn't speculate — she states. She's not alarming, she's matter-of-fact. +- **Dave Okonkwo talks like someone who doesn't know what the problem is** but is + trying to be helpful by reporting exactly what he observed. He should never be + made to look stupid — he's doing the right thing. +- **The company takes itself seriously.** Humor comes from the gap between official + language and reality, not from anyone being a cartoon. +- **Problems have plausible causes.** Systems broke because someone made a + reasonable decision under time pressure, not because they were careless idiots. + The player should feel like a professional, not a janitor. diff --git a/docs/design/COMPANY_LORE.md b/docs/design/COMPANY_LORE.md new file mode 100644 index 0000000..f8b81f3 --- /dev/null +++ b/docs/design/COMPANY_LORE.md @@ -0,0 +1,165 @@ +# Axiom Works — Company Lore Reference + +> For quest authors, dialogue writers, and ticket copy. Keep the tone dry and +> believable. The company should feel real, slightly dysfunctional, and just +> plausible enough that players recognise the type. + +--- + +## Who They Are + +**Axiom Works** is a B2B enterprise software company founded in 2011. Headquarters +is in a three-floor office park that is technically "downtown adjacent" depending +on how charitable you are with the map. They have about 280 employees. The +Glassdoor rating is 3.8 stars and management checks it obsessively. + +Their flagship product is **AxiomFlow** — a workflow automation platform aimed at +mid-size manufacturers, logistics companies, and anyone who got a 90-minute demo +and thought it looked easy. Most customers are still on the workflow they set up +in 2019. The platform does what it says. Marketing says it does considerably more. + +--- + +## Products + +| Product | Description | Status | +|---------|-------------|--------| +| **AxiomFlow** | Workflow automation platform | Active, main revenue | +| **AxiomDash** | Reporting and analytics add-on | Active, profitable, under-resourced | +| **AxiomSync** | Legacy data integration layer | End-of-sale since 2021, still maintained for 12 customers who refuse to migrate | + +The current marketing tagline is *"Streamline. Scale. Succeed."* It replaced +*"Work smarter, not harder"* in Q3 of last year. The one before that mentioned +AI. Nobody is sure what the AI was. + +--- + +## Infrastructure + +The company runs a mix of on-prem servers (named after Greek gods — a choice made +by a contractor in 2017 who left before documenting anything) and a handful of +cloud instances that accounting keeps trying to consolidate. + +| Host | Role | Notes | +|------|------|-------| +| **ares** | Player workstation | XFCE desktop, where the player works | +| **hermes** | Web/app server | nginx, staging and demo environment for AxiomFlow | +| **vulcan** | Build machine | Arch Linux, compiles artifacts, runs scheduled jobs | + +### Planned future systems +As the game grows, additional machines will be added. Candidates: + +| Proposed host | Role | Greek connection | +|---|---|---| +| **poseidon** | Database server | Foundation, depths, reliability | +| **apollo** | Mail / notification server | Messenger, communication | +| **athena** | Internal tooling (ticketing, wiki) | Wisdom, knowledge management | +| **argus** | Monitoring / alerting | The hundred-eyed watcher | +| **mnemosyne** | Backup / storage | Memory, persistence | + +--- + +## Characters + +### Dave Kowalski — Director of IT Operations +The player's skip-level manager. Has been at Axiom Works since 2015. Hired Marcus. +Oversees three teams: systems (Marcus's domain), networking, and IT support. Background +is originally networking — has Cisco certifications he won't bring up unless someone else +brings up Cisco certifications first. Sends weekly status emails formatted in bullet +points that never quite answer the question you were asking. When things go wrong he +schedules a meeting to "talk through the situation," which everyone has learned is +worse than an email. Maintains a calendar block from 2–3pm on Tuesdays that nobody +has ever asked about. Has said "we should really document that" approximately 400 times. +Describes the infrastructure as "mature." + +### Marcus Webb — Senior Sysadmin +The player's manager and the person who assigned them the ticket. Has been at +Axiom Works for six years. Knows where all the bodies are buried. Communicates +primarily in terse Slack messages and occasionally very long emails sent at 11pm. +Trusts competence over process. Gets irritated by people who confuse symptoms +with root causes. + +### Priya Nair — Security / Compliance +Runs security reviews and has opinions about everything. Usually right. Tends to +frame concerns in terms of what will happen when things go wrong rather than +whether they will. Was brought in after an incident nobody talks about in public. + +### Sarah Chen — Product Manager +Represents the product team's perspective in the ticket queue. Cares about demo +environments more than production ones because demos are what she can see. Not +technically wrong about their importance. Emails at 8am on Mondays. + +### Derek Ashford — Financial Controller +Does not appear in person. Appears on CC lines of emails where infrastructure +costs are being discussed. Always replies-all. His full name is Derek Ashford. +His manager is Rachel Brandt (CFO). + +--- + +## Background Characters (non-interactive, for world texture) + +These characters exist on the company website and in lore but do not appear in +quests or dialogue. Use them for verisimilitude — email headers, CC lines, internal +wiki author credits, that sort of thing. + +### Ellen Marsh — CEO & Co-Founder +Built AxiomFlow after a decade in operations. Not technical. Attends all-hands +twice a year. Has final say on pricing and major customer commitments. Does not +use Slack. The player will never interact with her. + +### David Park — CTO & Co-Founder +Wrote the original rules engine. Now manages engineering managers. Still has +opinions about the data model. Has a standing Thursday meeting with security +that hasn't moved since 2017. + +### Karen Volkov — COO +Joined 2014. Responsible for the fact that Axiom Works has documented processes +for anything. Has opinions about infrastructure costs. Prefers decisions with +clear owners and deadlines. + +### Rachel Brandt — CFO +Joined 2016. Approves all capital expenditure over $5,000. Does not enjoy +surprises in the infrastructure budget. Derek reports to her. + +### Phil Ruiz — VP of Sales +Has been promising features to prospects since 2016. Has a warm relationship +with the infrastructure team because Marcus once saved a demo with 20 minutes to +spare. Expense reports submitted promptly. + +### Tanya Okafor — Head of Customer Success +Manages all post-sale customer relationships including the twelve AxiomSync +holdouts. Usually the first to know when something is wrong in production, +because a customer has already called her. + +### Yusuf Halabi — Engineering Manager +Reports to the CTO. Manages the core AxiomFlow platform team. Has opinions +about test coverage. Runs the Thursday architecture review. + +### Mei Lin — Senior Software Engineer +Has maintained AxiomSync's integration layer since 2018. Knows more about it +than anyone would prefer. + +### Nikhil Sharma — Platform Engineer +Owns the build and release pipeline and internal CI infrastructure. Occasionally +sends Slack messages at 6am. + +### Sandra Wu — HR Manager +Manages hiring, onboarding, and employee relations since 2016. Sends birthday +emails on time, every time. Runs the new-hire onboarding process that takes +three days. + +--- + +## Tone Guidelines + +- **Dry, not sarcastic.** The company takes itself seriously. The humour comes + from the gap between how they describe things and what's actually happening. +- **Specific, not generic.** "The AxiomSync customer in Cincinnati keeps calling" + is better than "a client is upset." +- **Plausible dysfunction.** Problems happen because of reasonable decisions made + under time pressure, not because people are incompetent. The player should feel + like a real professional, not a janitor. +- **No cartoon villains.** Derek from Finance is not evil. The product team is not + stupid. They have different priorities. +- **The infrastructure has history.** It was built over time. Some parts are good. + Some parts were good in 2017. The player's job is to keep it working. diff --git a/docs/design/QUEST_AUTHORING.md b/docs/design/QUEST_AUTHORING.md new file mode 100644 index 0000000..78828ff --- /dev/null +++ b/docs/design/QUEST_AUTHORING.md @@ -0,0 +1,419 @@ +# Quest Authoring +Use this guide when adding new JSON quests under `content/quests/`. + +Quest files describe observed VM state. They are not command scripts and they +should model real Linux behavior, not puzzle logic detached from the system. + +For complete worked files, see [`docs/AUTHORING_EXAMPLES.md`](/home/aaron/Programming/sysadmin-chronicles/docs/AUTHORING_EXAMPLES.md). + +## Quest JSON Schema + +### Root Fields + +| Field | Type | Description | +| --- | --- | --- | +| `id` | string | Quest ID, for example `Q005`. | +| `title` | string | Player-facing quest title. | +| `tier` | int | Difficulty tier, usually `1`, `2`, or `3`. | +| `primary_vm` | string | Main VM for the quest. Current authored values are `workstation`, `web_server`, and `build_machine`. | +| `required_vms` | string[] | Every VM the quest touches. Include all VMs used in clues, validation, or prep. | +| `ticket_id` | string | Links to `content/tickets/.json`. | +| `baseline_snapshot` | string | Snapshot name that the prep script should restore or build from. | +| `summary` | string | Short internal scenario summary. | +| `clue_fingerprint` | object | Advisory description of the evidence seeded into the baseline. | +| `objectives` | object[] | Objective list shown to the player and used for progress checks. | +| `solution_branches` | object[] | Branches the validator can resolve to. Higher-priority valid branches win. | +| `pressure_profile` | string or null | Optional pressure/escalation profile name. | +| `blast_radius` | string[] | Incident IDs that this quest can affect or trigger. | +| `unlock_requirements` | string[] | Prerequisites such as `world_flag:` entries. | +| `tags` | string[] | Search and classification tags. | +| `internal_notes` | string | Author-only notes for reviewers. | +| `_note` | string | Optional author-only comment. Existing content uses this at root and inside nested objects. | + +### `clue_fingerprint` + +`clue_fingerprint` is advisory. It documents what evidence the baseline already +contains so content reviewers can confirm the clue trail is real. + +| Field | Type | Description | +| --- | --- | --- | +| `description` | string | Plain-language explanation of the clue trail. | +| `evidence` | object[] | Evidence items that point to the issue. Use the same general shape as the relevant validation type. | + +Common evidence shapes in existing content: + +- File and log evidence usually includes `type`, `vm`, `path`, and `contains` +- State evidence may include `type`, `vm`, `service`, `state`, or `enabled` +- Ownership evidence may include `type`, `vm`, `path`, `user`, and `group` +- Scalar evidence may include `threshold_percent`, `port`, or `command` depending on the clue + +Existing clue fingerprints also use clue-only labels such as `service_state_is`, +`service_enabled_is`, and `expected_user`. Treat those as descriptive baseline +metadata, not runtime validation names. + +## Objectives + +| Field | Type | Description | +| --- | --- | --- | +| `id` | string | Stable objective ID. | +| `description` | string | Player-facing objective text. | +| `check_mode` | string | `passive` or `explicit`. Use `passive` by default. | +| `validation` | object | Rule object evaluated by `ValidationService`. | + +Objectives are for feedback and progress tracking. They do not choose the +winning solution branch. + +## Solution Branches + +| Field | Type | Description | +| --- | --- | --- | +| `id` | string | Stable branch ID. | +| `label` | string | Optional short label used in content review and debugging. | +| `priority` | int | Higher wins when multiple branches validate. Priorities must be unique per quest. | +| `validation` | object | Rule object evaluated for this branch. | +| `trust_delta` | float | Trust change applied when this branch wins. Positive for better fixes, negative for risky or damaging ones. | +| `follow_up_dialogue` | string | Dialogue ID to trigger after resolution. | +| `follow_up_incident` | string | Incident ID to trigger after resolution, if the branch intentionally leaves a latent problem. | +| `follow_up_ticket` | string | Next ticket ID in the quest chain. | +| `world_flags` | string[] | Flags to set when the branch wins. | +| `_note` | string | Optional author-only comment. | + +### Branch Authoring Guide + +- Use branch priority to rank the quality of valid solutions. +- Put the clean, robust fix at the highest priority. +- Use lower priorities for brittle workarounds, partial fixes, or outcomes that + leave future risk behind. +- Use `trust_delta` to reflect the quality of the fix, not just whether the + quest technically completed. +- Use `follow_up_ticket` when a winning branch should advance the story to the + next ticket. +- Use `follow_up_incident` only when that branch intentionally seeds a later + recurrence or operational cost. +- Keep priorities unique. If two branches can both pass with the same priority, + the content should be rewritten. + +## Validation Rule Types + +Design notes sometimes use shorthand names like `file_mode_matches` or +`command_exits_zero`. In authored JSON, use the runtime rule names below. + +- `file_mode_matches` -> `file_mode` +- `file_owner_matches` -> `file_owner` +- `service_state_matches` -> `service_state` +- `service_is_enabled` -> `service_enabled` +- `process_is_running` -> `process_running` +- `port_is_listening` -> `port_listening` +- `package_is_installed` -> `package_installed` +- `command_exits_zero` -> `command_assert` + +| JSON type | Fields | Notes | +| --- | --- | --- | +| `file_exists` | `vm`, `path` | Passes when the file exists. | +| `file_absent` | `vm`, `path` | Inverse of `file_exists`. | +| `directory_exists` | `vm`, `path` | Passes when the directory exists. | +| `file_contains` | `vm`, `path`, `contains` | Passes when the file contains the given text. | +| `log_contains` | `vm`, `path`, `contains` | Alias for `file_contains` used by some clue fingerprints. | +| `file_mode` | `vm`, `path`, `mode` | Checks the exact file mode string, such as `0600`. | +| `file_owner` | `vm`, `path`, `user`, `group` | Checks exact ownership. | +| `file_owner_is_not` | `vm`, `path`, `user`, `group` | Negated ownership check. | +| `service_state` | `vm`, `service`, `state` | Checks the active state, such as `active`, `inactive`, or `failed`. | +| `service_enabled` | `vm`, `service`, `enabled` | Checks boot-time enablement. The `enabled` field defaults to `true`. | +| `process_running` | `vm`, `process` | Passes when the named process is running. | +| `process_user` | `vm`, `process`, `user` | Passes when the named process runs as the given user. | +| `port_listening` | `vm`, `port`, `listening` | Checks whether a port is listening. The `listening` field defaults to `true`. | +| `package_installed` | `vm`, `package` | Passes when the package is installed. | +| `mount_present` | `vm`, `path` | Passes when the mount is present. | +| `disk_usage_below` | `vm`, `path`, `threshold_percent` | Passes when disk usage is below the threshold. `percent` is accepted in older content. | +| `disk_usage_above` | `vm`, `path`, `threshold_percent` | Passes when disk usage is above the threshold. `percent` is accepted in older content. | +| `command_assert` | `vm`, `command` | Fallback rule for command-based checks. Use sparingly. | +| `and` | `rules` | All sub-rules must pass. | +| `or` | `rules` | Any sub-rule may pass. | +| `not` | `rule` | Inverts the inner rule. | + +### Validation Notes + +- Prefer state-based checks over command checks. +- Use `and` and `or` to model genuinely alternative states, not to hide weak + authoring. +- `command_assert` is a fallback. If a real state rule exists, use that first. +- Some older quest files include extra fields such as `protocol` or + `installed`. The loader ignores unknown keys, but new quests should stick to + the documented fields above. + +## Prep Script Requirements + +Each quest needs a prep script at `tools/vm/quest-prep/QXXX-prep.sh`. + +- The script must be idempotent. +- It must set up the starting VM state for the quest. +- It runs at image build time, not when the player starts the quest. +- It should install required packages only from local or pre-baked sources. +- It may create logs, users, groups, permissions, or broken config files that + form the scenario. +- It must not rely on a live player session. + +When a quest continues an existing chain, the prep script should restore the +prior clean snapshot first, then apply the new scenario changes, and finally +take the next baseline snapshot. + +## VM Provisioning Pipeline + +A new quest requires a VM baseline before it can be played. The full authoring +workflow from scratch to playable quest: + +### 1. Write the prep script + +Create `tools/vm/quest-prep/QXXX-prep.sh`. Requirements: + +- Must be idempotent — safe to run twice on the same domain. +- Accepts the domain name as $1 and an optional `--dry-run` flag as $2. +- Must not prompt for input or depend on internet access. +- Reads `tools/vm/lib/common.sh` for shared helpers (`run`, `step`, `ok`, etc.). + +Typical operations: break a config file, chown a directory, remove a logrotate +config, add a cron entry, delete a key. Nothing that would be undone by the +player before the quest starts. + +### 2. Register the quest in seed-vms.sh + +Open `tools/setup/seed-vms.sh` and: + +1. Add a `require_file` check near the top (`STEP 1 — Pre-flight checks`): + ```bash + require_file "$QUEST_PREP/QXXX-prep.sh" "QXXX prep script" + ``` + +2. Add a `run_prep_and_snapshot` call in `STEP 4 — Run quest-prep scripts`: + ```bash + run_prep_and_snapshot "QXXX" "sc-" "baseline." + ``` + The snapshot name must match the quest's `baseline_snapshot` field. + +### 3. Baseline snapshot chain + +Each VM has its own chain. Only the CLEAN branch resolution of a quest is used +as the baseline for the next quest. Brittle-branch resolutions are never +snapshotted. + +| VM | Snapshot chain | +|----|----------------| +| `sc-workstation` | `baseline.day-one` (Q001 only) | +| `sc-web-server` | `baseline.clean` → `baseline.post-q002` → `baseline.post-q003` → `baseline.post-q004` | +| `sc-build-machine` | `baseline.clean` → `baseline.post-q006` | + +A prep script that builds on a prior quest must revert to the prior snapshot +before applying its changes. + +### 4. VM baseline package set + +Each authored VM has a guaranteed minimum set of packages that players can rely on +during gameplay. New quests must not assume packages outside this set unless the +quest prep script installs them. + +| VM | OS | Guaranteed packages | +|----|----|---------------------| +| `sc-workstation` (ares) | Ubuntu 24.04 | `qemu-guest-agent`, `openssh-server`, `sudo`, `bash-completion`, `hostname`, `ssh` client (system) | +| `sc-web-server` (hermes) | Debian 12 | `qemu-guest-agent`, `openssh-server`, `sudo`, `nginx`, `logrotate`, `rsync`, `curl`, `hostname`, `ssh` client | +| `sc-build-machine` (vulcan) | Arch Linux | `qemu-guest-agent`, `openssh`, `sudo`, `base-devel`, `archlinux-keyring`, `inetutils` (provides `hostname`, `ping`), `ssh` client | + +`hostname`, `whoami`, `id`, `ls`, `cat`, `echo`, `ps`, `df`, `du`, `free`, +`systemctl`, `journalctl` are available on all VMs. + +The in-game terminal auto-adds `-C` to bare `ls` calls so column output renders +correctly. If a quest step requires `ls -l` or another explicit format, pass it +explicitly — the auto-`-C` injection only fires when no layout flag is present. + +### 5. Run the pipeline + +```bash +# Dry run first — shows what would execute without touching VMs +bash tools/setup/seed-vms.sh --dry-run + +# Full build — requires libvirt and all three sc-* domains to exist +bash tools/setup/seed-vms.sh + +# Prep + snapshot only (skip the image build step) +bash tools/setup/seed-vms.sh --skip-build + +# Single VM only +bash tools/setup/seed-vms.sh --vm web_server +``` + +### 5. Validate + +After seed-vms.sh completes: + +```bash +# Check content integrity (including baseline_snapshot field) +node tools/content/validate-content.js + +# Verify snapshots exist on each domain +virsh snapshot-list sc-web-server +virsh snapshot-list sc-build-machine +``` + +## Multi-Solution Quest Example + +```json +{ + "id": "Q099", + "title": "Cron Runs as Root", + "tier": 2, + "primary_vm": "web_server", + "required_vms": ["web_server"], + "ticket_id": "T099", + "baseline_snapshot": "baseline.clean", + "_note": "Minimal example: the nightly cron job should run as www-data, not root.", + "summary": "A site-sync cron entry was copied from a root shell. It still runs, but it now leaves root-owned cache files behind.", + "clue_fingerprint": { + "description": "The cron file exists, but it names root as the executor. The cache directory is already polluted with root-owned files.", + "evidence": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/site-sync", "contains": "root /opt/site-sync/bin/sync-cache.sh" }, + { "type": "file_owner_is_not", "vm": "web_server", "path": "/var/www/axiomworks/cache", "user": "www-data" } + ] + }, + "objectives": [ + { + "id": "sync-safe", + "description": "The cron job runs as www-data and the scheduler is active", + "check_mode": "passive", + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/site-sync", "contains": "www-data /opt/site-sync/bin/sync-cache.sh" }, + { + "type": "or", + "rules": [ + { "type": "command_assert", "vm": "web_server", "command": "systemctl is-active --quiet cron" }, + { "type": "command_assert", "vm": "web_server", "command": "pgrep -x cron >/dev/null" } + ] + } + ] + } + } + ], + "solution_branches": [ + { + "id": "correct-cron", + "label": "Correct Cron User", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/site-sync", "contains": "www-data /opt/site-sync/bin/sync-cache.sh" }, + { + "type": "or", + "rules": [ + { "type": "command_assert", "vm": "web_server", "command": "systemctl is-active --quiet cron" }, + { "type": "command_assert", "vm": "web_server", "command": "pgrep -x cron >/dev/null" } + ] + } + ] + }, + "trust_delta": 2, + "world_flags": ["site_sync_healthy"], + "follow_up_dialogue": "marcus-Q099-complete-clean", + "follow_up_ticket": "T100", + "_note": "Preferred fix: keep the job and run it with the correct user." + }, + { + "id": "disabled-cron", + "label": "Brittle Disable", + "priority": 40, + "validation": { + "type": "command_assert", + "vm": "web_server", + "command": "test ! -f /etc/cron.d/site-sync" + }, + "trust_delta": -1, + "world_flags": ["site_sync_brittle"], + "follow_up_dialogue": "marcus-Q099-complete-brittle", + "_note": "The job was deleted instead of repaired. It stops the symptom, but it is not a durable fix." + } + ], + "pressure_profile": null, + "blast_radius": [], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "tags": ["cron", "permissions", "web_server"], + "internal_notes": "Example only." +} +``` + +## Multi-VM Quest Example + +```json +{ + "id": "Q098", + "title": "Build Sync Writes Bad Ownership", + "tier": 2, + "primary_vm": "build_machine", + "required_vms": ["workstation", "build_machine", "web_server"], + "ticket_id": "T098", + "baseline_snapshot": "baseline.post-q006", + "_note": "The build machine is pushing release files to the web server, but the ownership is wrong and the deploy helper is still running.", + "summary": "A deployment helper on the build machine is writing release files to the web server with root ownership. The helper must be stopped and the output repaired so the web server can manage the files again.", + "clue_fingerprint": { + "description": "The deploy helper is still running on build_machine. On web_server, the release artifact is owned by root instead of www-data.", + "evidence": [ + { "type": "file_contains", "vm": "build_machine", "path": "/opt/deploy/bin/push-release.sh", "contains": "rsync -a --chown=root:root" }, + { "type": "process_running", "vm": "build_machine", "process": "deploy-sync" }, + { "type": "file_owner_is_not", "vm": "web_server", "path": "/var/www/axiomworks/releases/current/index.html", "user": "www-data", "group": "www-data" } + ] + }, + "objectives": [ + { + "id": "release-owned-correctly", + "description": "The web release file is owned by www-data and the deploy helper is stopped", + "check_mode": "passive", + "validation": { + "type": "and", + "rules": [ + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/releases/current/index.html", "user": "www-data", "group": "www-data" }, + { "type": "not", "rule": { "type": "process_running", "vm": "build_machine", "process": "deploy-sync" } } + ] + } + } + ], + "solution_branches": [ + { + "id": "deploy-stopped-owner-fixed", + "label": "Stop Helper and Fix Ownership", + "priority": 100, + "validation": { + "type": "and", + "rules": [ + { "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/releases/current/index.html", "user": "www-data", "group": "www-data" }, + { "type": "not", "rule": { "type": "process_running", "vm": "build_machine", "process": "deploy-sync" } } + ] + }, + "trust_delta": 2, + "world_flags": ["release_permissions_fixed"], + "follow_up_dialogue": "marcus-Q098-complete-clean", + "_note": "This branch validates both VMs: the release file is fixed on web_server and the helper is no longer running on build_machine." + } + ], + "pressure_profile": null, + "blast_radius": [], + "unlock_requirements": ["world_flag:player_ssh_configured"], + "tags": ["deploy", "permissions", "multi-vm", "build_machine", "web_server"], + "internal_notes": "Example only." +} +``` + +## Quest Chain Authoring + +Use `follow_up_ticket` to chain the campaign in sequence. The winning branch +emits the next ticket, and `QuestDirector` activates the next quest from that +ticket. + +| Quest | Clean branch `follow_up_ticket` | +| --- | --- | +| `Q001` | `T002` | +| `Q002` | `T003` | +| `Q003` | `T004` | +| `Q004` | `T005` | + +Keep the chain on the clean, high-priority branch. If a brittle branch should +continue the story differently, use its own `follow_up_ticket` or +`follow_up_incident` intentionally. diff --git a/docs/design/SPEC_LOCK.md b/docs/design/SPEC_LOCK.md new file mode 100644 index 0000000..1de0acb --- /dev/null +++ b/docs/design/SPEC_LOCK.md @@ -0,0 +1,161 @@ +# Sysadmin Chronicles — Spec Lock + +This file preserves the user's intended new system design. Treat it as binding. + +## 1. Narrative spine + +The story progression is: + +```text +Normal Work → Unease → Suspicion → Investigation → Conflict → Resolution +``` + +Every quest must map to one of these phases. + +## 2. Required quest structure + +Every proposed quest must include: + +- Title +- Narrative Phase +- Objective +- Linux Concepts +- Systems Used +- Hidden Hook (optional) +- Failure Conditions +- Behavior Impact + +For implementation, these may be expanded into JSON fields, but these concepts must remain present. + +## 3. Core systems + +### 3.1 Player behavior tracking + +Track: + +- `curiosity` — exploration, anomaly investigation, reading beyond ticket scope +- `obedience` — completing assigned work, following stated priorities, ignoring suspicious extras +- `risk` — reckless changes, broad permissions, deleting evidence, unsafe shortcuts + +These influence: + +- Access levels +- Narrative progression +- Endings + +### 3.2 Trust and suspicion compatibility + +The existing system already uses `trust_delta`, world flags, and branch quality. Preserve that. + +Map old and new systems like this: + +- `trust` = professional standing produced mostly by solution quality and branch outcomes +- `suspicion` = management/security attention caused by investigative, risky, or unusual behavior +- `curiosity`, `obedience`, `risk` = the new behavior profile controlling narrative route + +Do not replace trust. Extend it. + +### 3.3 Access system + +Player permissions evolve: + +```text +basic_user → sudo → root +``` + +Access is affected by: + +- Trust from competent task completion +- Suspicion from investigation behavior +- Risk from careless or destructive changes +- Narrative phase + +### 3.4 Boss system / management pressure + +The boss system acts as a dynamic constraint, not a cutscene machine. + +Phase scaling: + +- Phase 1: Annoying +- Phase 2: Dismissive +- Phase 3: Suspicious +- Phase 4: Monitoring +- Phase 5: Interfering +- Phase 6: Outcome-dependent + +Functions: + +- Interrupt tasks +- Reassign priorities +- Restrict access +- Add pressure through tickets, emails, delayed approvals, audits, or access review + +In the current company context, this can be represented by Marcus, Kowalski, Priya, or policy pressure depending on the situation. Do not turn one character into a cartoon villain. + +### 3.5 Hidden narrative system + +Hidden hooks are embedded in normal quests. + +Examples: + +- Unknown services +- Suspicious cron jobs +- Hidden users +- Network anomalies +- Unexpected SSH keys +- Odd timestamps +- Config history that does not match the official story + +Rules: + +- Never explicitly flagged +- Optional discovery only +- Not required to complete the assigned ticket +- Must be discoverable through real sysadmin behavior +- Should accumulate into a coherent hidden story over time + +## 4. Quest generation constraints + +- Reuse existing game systems +- Do not introduce unnecessary mechanics +- Scale difficulty with player progression +- Preserve the observed-VM-state design from existing quest authoring +- Prefer real Linux behavior over puzzle logic + +## 5. Difficulty scaling + +- Phase 1: Explicit instructions +- Phase 2: Partial hints +- Phase 3: Minimal guidance +- Phase 4+: Problem-solving only + +This applies to ticket wording, hints, clue obviousness, and branch tolerance. + +## 6. Endings + +Endings are determined by behavior over the playthrough: + +- `corporate_loop` — obedient path / bad ending +- `burnout` — passive path / neutral ending +- `exposure` — investigative path / good ending +- `chaos` — destructive/high-risk path + +No ending should be selected by a single obvious final button. The route should emerge from world flags, behavior variables, access state, and discovered/acted-on hidden hooks. + +## 7. Design principles + +- Discovery over exposition +- Systems over scripts +- Freedom over forced narrative +- Realism with subtle distortion + +## 8. Non-goals + +Do not: + +- Build a linear-only story +- Rely on cutscenes +- Over-explain mechanics +- Remove player agency +- Turn the mystery into explicit quest markers +- Rewrite established characters to fit a new plot diff --git a/docs/design/STORY_DESIGN_CONTEXT.md b/docs/design/STORY_DESIGN_CONTEXT.md new file mode 100644 index 0000000..f026292 --- /dev/null +++ b/docs/design/STORY_DESIGN_CONTEXT.md @@ -0,0 +1,423 @@ +# Story Design Context — Sysadmin Chronicles + +For story designers and AI agents creating new quests and narrative content. + +**Related docs:** +- `CHARACTERS.md` — character bios, relationships, story hooks +- `COMPANY_LORE.md` — world, company, tone +- `QUEST_AUTHORING.md` — technical JSON spec for implementers + +This document answers: *how does story actually work in this game, and what does a quest +concept need to contain to be usable?* + +--- + +## The Core Premise + +The player is a new junior sysadmin at Axiom Works, a mid-size B2B software company. +They are replacing someone named Dale. Nobody will explain why Dale is gone. + +The game is played entirely through a simulated work environment: a terminal, an email +inbox, and a company website. There are no cutscenes, no narration, no inventory, no +combat. Everything that happens is expressed through: + +- **Tickets** — the player receives a ticket describing a problem +- **The terminal** — the player SSHes into VMs, investigates, and fixes things +- **Character dialogue** — characters react to how the player solved the problem +- **The next ticket** — the world moves on, and the consequences of what the player + did are baked into the next situation + +That's it. Story is not told — it is accumulated from the choices the player makes +when fixing real Linux problems on real virtual machines. + +--- + +## The Three Machines (VMs) + +Every quest happens on one or more of these machines. Their narrative identities +matter as much as their technical roles. + +### ares — the Workstation +The player's home machine. Ubuntu 24.04. Quests here are onboarding-flavored — +establishing access, learning the environment. It's the only machine the player +can reach on day one. + +*Narrative identity:* Where you start. Safe-ish. The first one you break is here. + +### hermes — the Web / App Server +Debian 12. Runs nginx and the AxiomFlow demo/staging application. This is the +machine that Sarah Chen cares about, that customers can feel, and that Priya Nair +watches for security posture. Most of the early-game quests are here. + +*Narrative identity:* The product's face to the world. Breaking this makes noise +immediately. The most politically visible machine. + +### vulcan — the Build Machine +Arch Linux. Compiles packages, runs the internal build pipeline, serves packages +to hermes via an internal apt repo. Nikhil Sharma owns this in principle but nobody +manages it daily. Things here break silently until hermes starts serving bad software. + +*Narrative identity:* The machine nobody watches until something downstream fails. +Quests here reveal that problems have upstream causes the player didn't expect. + +### Planned future machines +As the story expands, new machines can be added. Each should have a clear narrative +role before it's introduced. (See `COMPANY_LORE.md` for the candidate list.) + +--- + +## How Story Is Delivered + +### Tickets as Act One +Every quest begins with a ticket in the player's inbox. The ticket is a short email +from a character describing a symptom — not a cause. The sender's perception of the +problem is usually incomplete and sometimes wrong. This is intentional: the player's +job is to investigate, not to execute instructions. + +Good ticket writing: +- Describes what the sender experienced, not what the cause is +- Has the sender's voice and perspective (Sarah is outcome-focused; Dave is confused; + Priya is terse and specific) +- Does not hint at the solution +- Creates genuine stakes (site is down, builds are failing, someone is locked out) + +Bad ticket writing: +- Explains the root cause ("the log file is too big") +- Has no character voice (generic IT help desk language) +- Stakes are unclear or low + +### The Terminal as Act Two +The player investigates. They SSH in, run commands, read logs, check configs, look at +file ownership. The evidence is seeded into the VM baseline — it is genuinely there +to find, not procedurally generated. A good quest has a natural clue trail: + +- The most obvious thing points to a second thing +- The second thing reveals the actual problem +- The fix is achievable with real Linux knowledge + +The player cannot be told what to do. They can ask Marcus for hints (via dialogue +choices), but good players don't need to. + +### Branching Resolution as Act Three +When the player has made changes to the VM, the game checks the state of the +system against the quest's solution branches. The branch that matches determines: + +- What dialogue fires (Marcus's reaction, Sarah's reaction, Priya's follow-up) +- What trust delta the player receives +- What world flag is set (persistent story state) +- Whether an incident is triggered (a future consequence of a partial fix) +- What ticket comes next + +**This is the central story mechanic.** Every quest should be designed with at +least two and ideally three resolution branches: + +| Branch type | What it means | +|-------------|---------------| +| **Clean fix** | Player understood the root cause and solved it properly. High trust, no downstream risk. | +| **Acceptable fix** | Problem is solved but with a tradeoff — brittle approach, future maintenance burden, or incomplete cleanup. Lower trust. | +| **Regression** | Player fixed the symptom but made something else worse. Negative trust. Story consequences. | + +The **regression branch** is not about punishment — it's about realism. A real +sysadmin who removes all SSH restrictions to restore one person's access has +technically solved the ticket while creating a larger problem. The story should +treat this as realistic professional consequence, not a game-over failure. + +Players on a clean-fix path get more trust, unlock more access, and receive warmer +character reactions. Players on a regression path continue playing but face the +downstream effects of their choices. + +--- + +## World Flags — Persistent Story State + +World flags are string keys set when a quest's branch resolves. They persist for +the entire playthrough and can be read by later quests, incidents, and dialogue. + +Examples: +- `hermes_logrotate_healthy` — set when the player properly fixed log rotation +- `hermes_ssh_allowusers_fragile` — set when the player restored SSH access using + the brittle AllowUsers approach instead of the robust AllowGroups approach +- `player_ssh_configured` — set when the player successfully set up SSH on day one + +World flags are how story continuity works. A later quest can check whether the +player fixed something correctly earlier and behave differently. Marcus can reference +a past fix. Priya can flag a previously introduced risk in a later audit. A problem +that was "solved" with a quick fix can recur. + +**When designing a new quest, ask:** what flag should this set, and what future quests +or dialogue might reference it? + +--- + +## Trust — The Narrative Currency + +Trust is a numeric score that tracks the player's professional standing with Marcus +and the IT team. It affects: + +- **VM access** — the player gains SSH access to hermes and vulcan as trust increases. + If trust drops badly, access can be revoked. +- **Documentation access** — more trusted players get access to internal runbooks + and admin guides +- **Character warmth** — Marcus's messages change tone subtly as trust grows +- **Incident visibility** — at a certain trust level, the player starts seeing + background incidents before they become critical + +Trust is not displayed as a raw number. Players experience it as consequences. + +**For quest designers:** each branch should have a `trust_delta` that reflects the +quality of the fix. A proper root-cause fix should earn more than a workaround. +Regression branches should cost trust. Day-one onboarding quests are lenient; +later quests at higher tiers should be less forgiving. + +--- + +## Incidents — Consequences of Incomplete Fixes + +An incident is a time-delayed consequence that fires when a quest's partial-fix +branch was taken. It represents the problem coming back. + +Example: The player clears a full disk by deleting a log file but doesn't restore +the logrotate config. Two in-game hours later, the disk starts filling again. Dave +notices. The player gets another ticket about the same symptom. + +Incidents are not punishments — they are realistic. The world doesn't stay fixed +just because the player touched it. A player who takes clean-fix branches will +rarely see incidents. A player who takes every shortcut will find their ticket queue +filling up with problems they already "solved." + +For story purposes: incidents can also carry narrative weight. If the player made a +security regression, an incident could represent an audit finding, an unusual login, +or a configuration discrepancy Priya noticed. + +--- + +## The Character Conversation Model + +Quest dialogue fires after a branch resolves. Three characters can speak: + +### Marcus Webb +The primary voice. Appears in every quest. His post-resolution message reflects: +- What the player actually did (not just whether they succeeded) +- Whether they understood the root cause or just cleared the symptom +- A forward-looking observation (usually a quiet flag for what's coming next) + +Marcus does not praise effusively or scold dramatically. He states what he observed. +His message for a clean fix is warmer and sometimes wry. His message for a regression +is brief and pointed. He never says "well done!" He might say "that's the right call." + +### Sarah Chen +Speaks when the quest affects something product-facing (hermes being up or down, +deploys working or failing). Her messages are reactive — she responds to outcomes, +not process. She is not hostile unless the player makes her situation worse. + +### Priya Nair +Speaks when the quest has security implications — access changes, hardening, +audit posture. She does end-of-shift reviews that grade overall performance. +Her per-quest messages are brief and evaluative. She notices things Marcus might not. + +### Other characters +Dave Okonkwo files tickets. He does not have post-resolution dialogue — he +just stops or starts noticing things. Future characters (Kowalski, Nikhil, Tanya) +can speak in dialogue if quests are designed to involve them. + +--- + +## The Narrative Arc + +The overall story has six phases. Quests should be designed with their phase in mind. +The phase is usually not visible to the player — it emerges from what's happening +around them. + +### Phase 1 — Normal Work +*Tier 1 quests. Early game.* + +The player is new. Everything is routine. Marcus is helpful. The problems are real +but not alarming — a broken config, a full disk, a permission issue. The player is +learning the environment. The subtext is that things are slightly more wrong than +they should be, but there's nothing to point at. + +Hidden layer: small anomalies in the systems that curious players can notice but +don't have context for yet. + +### Phase 2 — Unease +*Tier 1/2 transition.* + +The problems start to have patterns. The same kind of thing breaks twice. A fix +the player made doesn't hold the way it should. Nothing is alarming, but Marcus's +messages have a slightly different quality — he notices things he doesn't explain. + +Hidden layer: a world flag from an early quest points somewhere unexpected. + +### Phase 3 — Suspicion +*Tier 2 quests. Mid game.* + +The player starts encountering problems they didn't cause and can't fully explain. +Access was changed by someone. A config was edited recently. A log shows an +unusual pattern. Nobody is accusing anyone. But the player now has enough context +to start asking questions — even if no quest explicitly tells them to. + +This is where Dale becomes relevant again. The systems the player inherits were +last touched by Dale. Some of them have been in a particular state for a long time. + +### Phase 4 — Investigation +*Tier 2/3 transition.* + +The player has connected enough dots to understand that something happened before +they arrived. The quests in this phase involve digging into logs, access records, +and configuration history. The investigation is framed as professional work +(audit the access logs, trace the package build history) — but the results tell +a story. + +Marcus's messages are shorter. Priya starts appearing more. Kowalski schedules a +meeting nobody explains. + +### Phase 5 — Conflict +*Tier 3 quests. Late game.* + +The player knows what happened. Acting on that knowledge has professional +consequences. The conflict is not physical — it is about what the player chooses +to surface, who they tell, and what they do with access they were given for one +purpose that could be used for another. + +### Phase 6 — Resolution +*Endgame.* + +The situation resolves. The ending the player gets depends on the world flags +accumulated across their entire playthrough — not just whether they clicked the +"good ending" button. A player who took clean-fix branches throughout, built +trust, and noticed the hidden anomalies gets a different ending than a player +who patched symptoms, lost trust, and missed everything. + +--- + +## What Makes a Good Quest Scenario + +The best quests have a **plausible mundane cause** and a **visible technical trail**. +Players should never need to guess — they should be able to find the answer by +looking at the right files and running the right commands. + +### Good scenario types +- Service down → config syntax error → player traces error output to the line +- Disk full → log file enormous → logrotate config missing → player restores it +- Deploy fails → files owned by wrong user → someone ran a script as root manually +- Build failures → clock drift → NTP not running → player enables time sync +- Access locked out → sshd_config modified → wrong directive → player corrects it +- App crashes after update → bad package from internal repo → player traces to source + +### What makes these work +1. **The symptom is real and urgent.** Something is actually broken. +2. **The cause is discoverable.** The evidence is in logs, config files, or system state. +3. **The fix is a real Linux operation.** Not artificial — `chown`, `systemctl`, editing + a config, fixing a cron entry, rolling back a package. +4. **Multiple approaches exist.** The quick fix works. The proper fix is better and + the game knows the difference. +5. **The character reactions are grounded.** Sarah cares about the demo being up. + Priya cares about the access control implications. Marcus cares about whether the + player understood what they were doing. + +### Bad scenario types to avoid +- Problems that require packages not in the VM's guaranteed baseline (see `QUEST_AUTHORING.md`) +- Problems that require real-time events the validation engine can't check +- Problems where the "correct" fix is the only fix (no meaningful branch differentiation) +- Problems that break the fourth wall or require the player to know game-layer information +- Problems that are gotchas rather than investigations (the cause can't be found by looking) + +--- + +## Hidden Anomalies — Environmental Storytelling + +Every 3–5 quests should include something unusual in the VM environment that the player +is not told about and not required to engage with. These are not quest objectives. +They are breadcrumbs for curious players. + +Examples of the kind of thing these should be: +- A user account that shouldn't exist +- A log entry from an odd time that doesn't match the official history +- A file that was modified recently but wasn't part of the quest setup +- A cron job that's been disabled but was once important +- An SSH key in authorized_keys that doesn't belong to anyone obvious + +These anomalies should be consistent with the overall narrative arc — a player who +collects them across the whole game should be able to piece together what happened +before they arrived. They should never be labelled, never referenced in objectives, +and never required. They are for the players who look. + +--- + +## Quest Output Format for Story Agents + +When proposing new quests, provide the following. This is the minimum needed for +a technical author to implement the quest. + +``` +Quest ID: QXXX +Title: [player-facing] +Narrative phase: [1–6] +Tier: [1, 2, or 3] + +Primary VM: [ares / hermes / vulcan] +Additional VMs: [if any] + +Scenario summary: + What is broken, why it is broken (the root cause), and what the player + will encounter. 1–3 sentences. Written for the implementer, not the player. + +Ticket: + From: [character name] + Subject: [email subject line] + Body: [the email the player receives. Written in the sender's voice. + Describes the symptom. Does not explain the cause.] + +Clue trail: + What the player will find when they investigate. The evidence that leads + them to the root cause. Describe the actual files, log entries, and system + states — not the player's steps. + +Solution branches: + Branch 1 (clean fix, highest trust): + What the player has done. Why it's correct. Trust delta. + Branch 2 (acceptable fix): + What the player has done. What tradeoff it introduces. Trust delta. + Branch 3 (regression, if applicable): + What the player did wrong. What it breaks. Negative trust delta. + +Character reactions: + Marcus (post-resolution): + Clean: [what Marcus says] + Acceptable: [what Marcus says] + Regression: [what Marcus says] + Sarah / Priya (if relevant): + [reaction to the specific outcome that affects them] + +World flags set: [list flags each branch sets] +Follow-up incident (if any): [what recurs if the acceptable-fix branch was taken] +Hidden anomaly (if any): [something unusual seeded into the VM that's not part of + the quest objectives] +Narrative notes: [anything a future quest author should know — Dale connections, + story threads this opens or closes, things characters should remember] +``` + +--- + +## The Dale Thread — Notes for Story Designers + +Dale's story should emerge slowly from the systems themselves, not from exposition. +When designing quests — especially mid-to-late game — consider: + +- **What did Dale last touch?** The VMs the player inherits have a history. Some + configurations were made by Dale. Some are good. Some are wrong in ways that + suggest Dale was dealing with something. + +- **What was Dale trying to do?** As the investigation phase develops, the picture + should become coherent. Dale wasn't random — there was a pattern to their actions. + +- **Who knew?** Marcus knew Dale. Priya may have been involved in whatever ended + Dale's tenure. Kowalski definitely knows. The player assembles this from fragments, + not a scene where someone explains it. + +- **The player is inheriting Dale's problems.** Some of the broken things the player + fixes are broken because Dale broke them. Some of the broken things were broken on + purpose. The player won't know which is which until later. + +The reveal of what Dale did should feel like the player figured it out, not like the +game told them. diff --git a/docs/design/new_system_canon_packet.md b/docs/design/new_system_canon_packet.md new file mode 100644 index 0000000..9a55452 --- /dev/null +++ b/docs/design/new_system_canon_packet.md @@ -0,0 +1,133 @@ +# Sysadmin Chronicles — New System Canon Packet + +This packet combines the new quest-system spec with the established story/implementation context. + +## Core sentence + +The player is not “on a main quest.” The player is doing sysadmin work. The story leaks through systems. + +## Hard canon + +- Company: Axiom Works +- Products: AxiomFlow, AxiomDash, AxiomSync +- Tone: plausible B2B software company; dry corporate dysfunction; no cartoon villains +- Infrastructure naming: Greek-god hostnames +- Current machines: + - `ares` — player workstation, Ubuntu 24.04 + - `hermes` — web/app/demo server, Debian 12, nginx + - `vulcan` — build machine, Arch Linux, internal build/release pipeline +- Player: competent new junior sysadmin, replacing Dale, no spoken lines +- Dale: previous sysadmin; central unresolved mystery; reveal through systems, not exposition + +## Character preservation rule + +Character portraits already match the current bios and are on the in-game company website. + +Allowed: + +- Compress bios for prompt use +- Clarify contradictions +- Add operational story use +- Preserve and sharpen existing voice + +Not allowed: + +- Changing names already shown on the company site +- Changing role, personality, authority level, implied visual vibe, or age band +- Making characters cartoon villains +- Creating changes that would require new portraits + +## Active character use + +### Marcus Webb + +Senior Systems Administrator. Primary technical contact and ticket voice. Dry, terse, precise. Trusts competence over credentials. Gives more rope as the player proves competence. Knows what Dale did but avoids discussing it directly. Respects root-cause fixes and dislikes symptom-patching. + +Use for: quest assignments, technical follow-up, access/trust gates, quiet hints, sometimes late-night observations. + +### Sarah Chen + +Product Manager, AxiomFlow. Outcome-focused, direct, concerned with demos/staging/product-visible failures. Often right about symptoms and wrong about root cause. Notices proper underlying fixes. + +Use for: product-facing tickets, hermes/demo pressure, stakeholder reactions. + +### Priya Nair + +Head of Security & Compliance. Canonical email: `p.nair@axiomworks.internal`. Replace old references to Priya Kapoor or Priya Singh. Calm, precise, consequence-focused. Assumes breach/misconfiguration professionally. No alarmism. No exclamation marks. + +Use for: access audits, security consequences, end-of-shift review, risky-fix evaluation. + +### Dave Okonkwo + +Non-technical employee and ticket source. Reports symptoms accurately, misdiagnoses causes plausibly, helpful rather than stupid. + +Use for: ordinary employee impact reports. + +### Dave Kowalski + +Director of IT Operations. Marcus's manager and player's skip-level. Policy pressure, bullet-point status emails, meetings as implied threat, “we should document that” energy. + +Use for: boss/management pressure, access restriction, escalation, status demands. + +### Derek Ashford + +Financial Controller. Appears on CC lines around costs/procurement. Always replies-all. Treat “Dave from Finance” as likely continuity error unless the user decides otherwise. + +Use for: budget/procurement pressure. + +## Background character use + +Use sparingly for flavor and pressure, not because every named character needs screen time. + +- Nikhil Sharma — build/release pipeline and vulcan +- Tanya Okafor — customer pressure +- Phil Ruiz — sales/demo pressure +- Yusuf Halabi — engineering escalation +- Rachel Huang — sysadmin peer/provisioning +- Tom Malaney — DNS/routing/networking +- James Osei — audit details +- Ellen Marsh / David Park / Karen Volkov / Rachel Brandt — distant executive pressure + +## Quest/story delivery model + +Every quest is delivered through existing game systems: + +1. Ticket/email describes a symptom. +2. Player investigates real VM state. +3. Player applies real Linux/admin fixes. +4. Validator resolves the matching solution branch. +5. Dialogue reacts to the actual branch. +6. World flags, trust, incidents, behavior variables, and access state persist. +7. Later quests read those consequences. + +## Existing implementation concepts to preserve + +- JSON quests under `content/quests/` +- Tickets under `content/tickets/` +- VM prep scripts under `tools/vm/quest-prep/QXXX-prep.sh` +- Observed-state validation +- Clue fingerprints +- Solution branches +- `trust_delta` +- `world_flags` +- `follow_up_ticket` +- `follow_up_incident` +- Incidents as delayed consequences +- Baseline snapshots + +## New system additions + +Add or strengthen: + +- Narrative phases +- Behavior variables: curiosity, obedience, risk +- Suspicion as management/security attention +- Access levels: basic_user, sudo, root +- Boss/management pressure phase scaling +- Hidden hook discovery state +- Behavior-driven endings +- Debug tools for narrative state + +## Design warning + +Do not use the new system as an excuse to throw away the current strengths. The existing branch/world-flag/trust model is good. It needs to become the backbone of the new narrative system, not get replaced by a generic quest tracker wearing a fake mustache. diff --git a/docs/design/sysadmin_chronicles_full_quest_redesign.md b/docs/design/sysadmin_chronicles_full_quest_redesign.md new file mode 100644 index 0000000..8758330 --- /dev/null +++ b/docs/design/sysadmin_chronicles_full_quest_redesign.md @@ -0,0 +1,3528 @@ +# Sysadmin Chronicles — Full Quest & Story Redesign (REVISED) + +> Self-revision against SPEC_LOCK.md (binding), CHARACTERS.md, STORY_DESIGN_CONTEXT.md, +> QUEST_AUTHORING.md, and COMPANY_LORE.md. +> +> Audit findings from v1 corrected in this revision. Changes are not additive — +> this document supersedes the previous version in full. + +--- + +## Audit Summary (What Changed and Why) + +The first draft had the right bones but violated the design's core premise in several +places. The clearest pattern of failure: quests were being used to deliver investigation +content explicitly rather than letting investigation happen as a byproduct of normal +work. Specific problems fixed in this revision: + +**Replaced or redesigned:** +- Q028 (Dale's archive handed to the player as a directed task) → Q028 is now a backup + integrity task where Dale's working directory appears in the restore path +- Q029 (authenticate a forged report) → Q029 is now a systemd service audit task where + the forged report is found in a log directory, not handed to the player +- Q035 (write an investigation summary for the CTO) → Q035 is now a log retention and + archival task; the player's work product IS the investigation record +- Q038 (write what you believe happened) → Q038 is now a certificate rotation task under + pressure; the conflict is operational, not narrative +- Q041 (read Priya's briefing document) → Q041 is now a production hardening task +- Q044 (Marcus explains Dale) → cut as a named quest; Dale's story now emerges from + system artifacts the player finds; Marcus says less, more precisely +- Q045 (Kowalski emails the outcome) → Q045 is now a change-freeze and documentation + task whose resolution signals the ending; no character summarizes what happened +- Q046/Q047/Q048 replaced with quests that have real Linux substance + +**Hook density reduced:** Phase 2 had one hook per quest. Hooks are now seeded in +roughly every 2–3 quests across Phase 1–2, with concentration increasing in Phase 3. + +**Styx dropped:** The `styx` hostname thread from Q006 had no resolution. Removed. +Q006 is revised with a hook that connects to the active investigation arc. + +**Difficulty scaling corrected:** Phase 2 quests that were Tier 1 have been corrected +to Tier 2. Ticket wording in Phase 2 is less explicit. Phase 4+ tickets give the +problem statement only — no guidance on approach. + +**Phase 6 given real technical content:** Resolution-phase quests now all teach Linux +concepts. Narrative delivery happens through the work and its consequences, not +through characters explaining what happened. + +--- + +## 1. Design Overview + +### The Core Proposition + +The player is doing sysadmin work. The story leaks through the systems they maintain. +A player who ignores everything except the tickets will complete the game — they will +just complete a different version of it than the player who reads the bash history that +wasn't in scope and notices a timestamp that doesn't fit. + +This is not a rhetorical distinction. Every system in this redesign follows from it: +behavior variables capture what kind of sysadmin the player is, not whether they are +"good" at detecting the plot. Trust reflects professional competence. Endings reflect +the accumulated profile of both. + +### How the New System Extends the Existing One + +The existing branch/world-flag/trust model is the backbone. It is not replaced. + +**Preserved from existing implementation:** +- `trust_delta` per solution branch — reflects quality of the fix +- `world_flags` — persistent string keys, set by branch resolution, read by later quests +- `follow_up_ticket` and `follow_up_incident` — chain quests, trigger delayed consequences +- Solution branch priority — highest valid branch wins +- Tier-based difficulty (Tier 1, 2, 3) +- Observed-state validation — not scripted walkthroughs +- Clue fingerprints as advisory baseline documentation +- Character dialogue responding to branch outcomes + +**New system adds (minimally, without unnecessary mechanics):** +- `narrative_phase` field on each quest — maps to one of six phases; gates pressure + profile and difficulty scaling +- Behavior variables: `curiosity`, `obedience`, `risk` — accumulated alongside trust; + govern narrative route and ending +- `suspicion` — management/security attention score; distinct from trust; affects + access and pressure level +- Access level per machine: `basic_user`, `sudo`, `root` — evolves with trust and + phase; degrades with sustained high risk +- `hidden_hook` field on quests — defines a discovery condition and the flag it sets; + optional, never required to complete the ticket +- Ending evaluator — runs at game close; reads all accumulated state; outputs one of + four endings + +No other new mechanics are introduced. Every new field maps to existing infrastructure +patterns (world flags, trust deltas, branch outcomes). + +### Variable Interaction Model + +``` + [Quest branch resolves] + │ + ┌───────────┼────────────┐ + ▼ ▼ ▼ + trust_delta world_flags behavior_impact + │ │ │ + ▼ ▼ ▼ + trust narrative curiosity / + (access, routing obedience / + warmth, (later quest risk / + incident content) suspicion + visibility) + │ + ▼ + ending_route +``` + +Trust and behavior variables accumulate in parallel. A player with high trust and +high curiosity is a different player than one with high trust and high obedience — +same professional quality, different narrative destination. + +--- + +## 2. Character Usage Guide + +All portrait-compatible identity is preserved. The following is operational guidance +for quest authors, not character redefinition. + +### Marcus Webb + +**Voice:** Short. Precise. Does not explain things twice. The second sentence he +adds — when he adds one — is always the important one. + +**Quest role:** Primary ticket source (most quests), trust gatekeeper, access grant/ +revoke mechanism, ambient signal source in mid-game. + +Marcus's messages evolve with trust. Low trust: purely functional assignments. +Mid trust: he occasionally adds context that wasn't asked for. High trust: he +sometimes sends a message that isn't a ticket at all — an observation, a thing he's +noticed, phrased as if the player should already know what to do with it. + +He knows about Dale. He will not bring it up directly. If the player finds something +Dale-related, Marcus's response will be exact and quiet — never surprised, never +explanatory. + +Use Marcus for: ticket assignments, clean/acceptable/regression branch responses, +access gate messages, quiet mid-game Slack observations, cost-free hints if the +player asks (not volunteered). Do not use Marcus to explain the story, praise +the player effusively, or become verbose about anything personal. + +### Sarah Chen + +**Voice:** Direct, outcome-focused, slightly impatient when things are broken. +Warms when fixes hold. Cools when fixes don't. + +**Quest role:** hermes and staging tickets, product-pressure source, response +calibration for clean vs. symptom fixes. + +Sarah's descriptions are accurate about symptoms and often wrong about cause. +She describes what she saw, not what caused it. When a fix holds — when the same +problem doesn't recur — she notices, and says something. When it does recur, she +says something else, shorter. + +Use Sarah for: hermes/staging/demo tickets, stakeholder pressure escalations, CC +lines on cross-team notes, downstream reactions to fix quality. Do not use Sarah +for investigation-phase content — she doesn't have visibility into what the player +is finding. + +### Priya Nair + +**Canonical email:** `p.nair@axiomworks.internal`. Prior references to Priya +Kapoor or Priya Singh are the same person. Those files need updating. + +**Voice:** Precise. Consequence-focused. Calm in tone. No exclamation marks. She +states things, she doesn't perform alarm. + +**Quest role:** Shift reviews, access audits, security-consequence notifications, +investigation-phase escalation when audit activity surfaces a finding. + +Priya reviews every 3–4 quests. Her reviews note what advanced, what stayed +stable, and what the player introduced as new risk. High curiosity plus low risk: +she notes methodical investigation. High risk: she flags the access footprint. + +In Phase 3–4, Priya becomes more present because the audits are surfacing things. +This is her job, not surveillance of the player specifically. The distinction matters +for tone. + +Use Priya for: shift reviews, access audits, consequence delivery for regression +branches, investigation-phase task assignments (narrowly scoped), security findings +from James Osei. Do not use Priya for technical troubleshooting, warmth, +or anything casual. + +### Dave Okonkwo + +**Voice:** Helpful, non-technical, accurate about what he saw, wrong about cause. + +**Quest role:** End-user-experience ticket source for early-phase quests and +Phase 2 normalcy anchors. + +Dave's tickets are useful because they describe genuine user experience. His +hypotheses about the cause are well-intentioned guesses. He should never be +made to look stupid — he's filing a ticket correctly for someone without technical +training. + +Use Dave for: early-phase user-visible failures, texture of the company being a +real place. Do not use Dave for anything touching the investigation arc. + +### Dave Kowalski + +**Voice:** Institutional. Bullet-point emails. Meetings as implied threat. +"We should really document that." + +**Quest role:** Management pressure escalation (Phase 3 onward), access restriction +trigger, status demand source, policy constraint. + +Kowalski is not suspicious of the player — he is managing upward risk. His +interventions are institutional responses to things that have surfaced at his level. +When he appears directly, something has become his problem. His pressure is applied +through: status-demand emails, access review initiation, meeting invites that have +known weight, priority-reassignment tickets. + +Use Kowalski for: Phase 3+ pressure manifestations, access restriction when suspicion +is elevated, escalation when an incident has made noise at director level. Do not +make him a villain, do not have him accuse anyone, do not have him explain the plot. + +### Background Characters + +Used sparingly for texture. + +- **Nikhil Sharma** — CC lines on build/pipeline things; Slack messages at unexpected + hours; upstream explanation or blame when something on vulcan is his. He doesn't + know the player until the player touches something of his. +- **Derek Ashford** — CC lines when infrastructure costs surface. +- **Tom Malaney** — Networking problems that are his domain but are slow to resolve. +- **Phil Ruiz** — Demo pressure; hermes's political importance made human. +- **James Osei** — Audit details that Priya summarizes. +- **Rachel Huang** — Peer provisioning; access handoffs when Marcus delegates. + +--- + +## 3. Phase-by-Phase Narrative Arc + +### Phase 1 — Normal Work + +Day one onboarding through the first weeks. The work is real work. The company +is a real place that functions, mostly. Nothing is obviously wrong. + +Quests establish the environment: what the machines are, what they run, who files +tickets, how the characters communicate, what competent work looks like. The player +builds access through demonstrated competence. Marcus is evaluative. Sarah is brisk. +Priya's first shift review is factual and mild. + +Difficulty: explicit instructions. Tickets describe what to do with some specificity. +The clue trail is direct. Branch tolerance is generous — Tier 1 quests forgive partial +fixes with lower trust deltas rather than negative ones. + +Hidden layer: Dale's name appears in file ownership and configuration history. His +SSH key appears in `authorized_keys`. His last logrotate config is in a backup +directory. None of this is called out. A player who reads the files before acting +will find it. Most won't. + +**Phase end state:** Player has basic to moderate access. Trust is positive if clean +branches have been taken. A small number of hidden hook flags may be set for curious +players. The game looks, so far, like what it says it is. + +### Phase 2 — Unease + +The same job. The same machines. But the texture changes slightly. A problem comes +back that was fixed. A service was modified and the modification doesn't have a +corresponding ticket. A config that should have been set by the tooling was set by +hand, by someone. + +Nothing is alarming. But a sysadmin who is paying attention notices these things — +the way you notice that a door doesn't close flush, or that a clock is a few minutes +fast. Not urgent. Off. + +Difficulty: partial hints. Tickets describe the symptom and hint at the location. +The cause requires more investigation than in Phase 1. Branch tolerance decreases — +symptom-only fixes now carry explicit downstream incidents. + +Marcus's messages are the same as always. The occasional extra sentence he adds is +slightly harder to read. In Phase 1 his additions were operational context. In Phase 2 +they are sometimes observations that don't quite fit the ticket. + +Hidden layer: the anomaly pattern continues. The same IP appears in a config and in a +log. A cron job has been running for over a year with no ticket. A package in the build +history doesn't correspond to any official release. Each item is individually explainable +as legacy cruft. Together, for a player who's been collecting them, they aren't. + +**Phase end state:** Behavior variables are diverging. High-curiosity players have +world flags for discovered hooks. Obedient players are in good professional standing +with nothing unusual in their record. Suspicion is low across the board. + +### Phase 3 — Suspicion + +The pattern becomes harder to ignore if you're the kind of person who would notice it. +SSH connections from an IP not in the asset inventory. A user account with no HR record. +A backup archive with a timestamp that doesn't align with when backups run. The player +is fixing real problems with real tickets — but the root causes are starting to point +somewhere. + +Difficulty: minimal guidance. Tickets describe the symptom only. No indication of +where to look. The clue trail requires following the evidence without being directed. +Branch tolerance is stricter — partial fixes carry heavier incident weight. + +Management pressure increases. Kowalski's weekly status email asks specific questions. +Marcus forwards it without comment. Priya's shift reviews start noting things they +didn't note before. None of this is targeted at the player. The audits were already +scheduled. The status email was always going to ask those questions. + +A player who ignores all of it and fixes tickets continues to do fine work. They are +just unaware of what the work is revealing. + +**Phase end state:** The investigation path is now visible to curious players. They +have enough fragments to form a partial hypothesis. Obedient players are in good +professional standing and have noticed nothing unusual. + +### Phase 4 — Investigation + +For a curious player, the picture is now coherent enough to be disturbing. The quests +in this phase involve work that is framed as legitimate operations — audit the access +log for compliance, trace the package build history for a deployment issue, verify +backup integrity — but the results of doing that work carefully tell a story. + +Difficulty: problem-solving only. Tickets state the problem. No clue on approach. +The player is expected to know their tools and apply them. + +Marcus's messages are shorter now. Not cold — he has always been terse. But the +operational context he occasionally added in Phase 2 is absent. He is managing +something and the messages reflect that without stating it. + +Priya appears more frequently. A quarterly review surfaced something. James Osei +sent her something. She is doing her job. Her tickets are narrow and specific — +she wants to know exactly one thing, stated precisely. + +Kowalski schedules a meeting. The meeting is called a "check-in on access posture." +No specifics. Marcus's next message after the meeting's scheduled end time is +functionally identical to his previous one — same tone, same brevity. A player +paying attention will notice only the timing. + +**Phase end state:** Curious players have a complete or near-complete picture of what +happened before they arrived. The `exposure` ending is now reachable if other variables +support it. Obedient players are in good standing, unaware of the arc. High-risk +players may be under active monitoring. + +### Phase 5 — Conflict + +The conflict is professional. The player has access granted for one purpose that +intersects with information they were not meant to find. The quests are operational — +real work that needs doing. But the operational work, done carefully and honestly, +has consequences. + +A backup restoration reveals something. An access revocation request arrives for +an account the player has been investigating. A production ticket requires changing +a configuration that, to a player who has been paying attention, is recognizable as +the wrong change to make. + +The player can always do only what the ticket asks. That is always an available path. +The question is whether the player recognizes when the ticket asks for something that, +done without scrutiny, would harm something beyond the immediate task. + +Marcus says less. Priya is specific and procedural. Kowalski's emails are formal +and institutional. The company is managing something. The player is in it. + +**Phase end state:** Ending routes are determined. The final quests in Phase 6 are +confirmation, not decision. + +### Phase 6 — Resolution + +The final quests are normal work. Infrastructure tasks. Some are the same kind of +task as Phase 1 quests, deliberately — the comparison is the point. The world has +moved on. The player is still a sysadmin at Axiom Works. + +The ending emerges from the accumulated state of all behavior variables, world flags, +trust score, and access history. It is not triggered by a final choice. The player +will not be presented with an ending screen that asks them to pick. They will complete +a routine task, and the ending will fire based on everything that preceded it. + +Difficulty returns to Tier 1 for operational tasks. The pressure has lifted. The +tickets are from Sarah and Marcus and sound like Phase 1 tickets. + +--- + +## 4. Full Quest Catalog + +VMs: `workstation` (ares, Ubuntu 24.04), `web_server` (hermes, Debian 12), +`build_machine` (vulcan, Arch Linux). + +Behavior impact notation: `C` = curiosity delta, `O` = obedience delta, `R` = risk +delta, `S` = suspicion delta. Values are per-branch where they differ. + +--- + +### PHASE 1 — NORMAL WORK (Q001–Q008) + +Tier 1 throughout. Explicit instructions. Generous branch tolerance. +Hook density: 4 hooks across 8 quests. + +--- + +**Quest ID:** Q001 +**Title:** First Day, First Key +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Configure SSH key authentication for the player's account +on the workstation before end of day. +**Linux Concepts:** `ssh-keygen`, `~/.ssh/authorized_keys`, directory and file +permissions (`chmod 700`, `chmod 600`), `sshd_config` pubkey authentication +**Systems Used:** workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Your account is active. Before you touch anything else: set +up key-based auth on the workstation. Password auth stays on for now but I want +your public key in authorized_keys before end of day. Walk yourself through it." + +**Clue Trail:** +- `~/.ssh/` directory absent or present without `authorized_keys` +- `sshd_config`: `PubkeyAuthentication yes`, `PasswordAuthentication yes` +- Player generates keypair with `ssh-keygen`, places public key in `authorized_keys`, + sets permissions — `.ssh/` to 0700, `authorized_keys` to 0600 + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Key present, `.ssh/` is 0700, `authorized_keys` is +0600, SSH auth works. `trust_delta: +2`. Flags: `player_ssh_configured`. +Follow-up ticket: T002. + +Branch 2 — Permissive (priority 50): Key present, permissions wrong (`0644` on key +file or `0755` on directory). SSH works; not correctly hardened. `trust_delta: +0.5`. +Flags: `player_ssh_permissive`. Follow-up incident: I001 (Priya's first review notes +the permission). + +Branch 3 — Incomplete (priority 10): Key absent or `authorized_keys` missing. +`trust_delta: -1`. Flags: `player_ssh_failed`. Marcus follows up. + +**Hidden Hook:** A pre-existing entry in `~/.ssh/authorized_keys` — the file +the player must read and edit — has a line for `dale@axiomworks.internal`. A player +who reads the full file before writing to it will see it. Sets `hook_dale_ssh_key_found`. +Discoverable through: reading the file the task requires touching. + +**Failure Conditions:** Player cannot authenticate via key; permissions so broad +sshd refuses pubkey auth entirely. + +**Behavior Impact:** +- Clean branch: C+0, O+1, R+0 +- Permissive branch: C+0, O+0, R+1 +- Hook discovered: C+1 (reading the file carefully before writing is the behavior) + +**Narrative Notes:** Establishes Marcus's voice and the evaluation frame. The Dale +key is the first hook: completely invisible unless the player reads the file rather +than overwriting it. No hint it exists. Most players won't find it on day one. + +--- + +**Quest ID:** Q002 +**Title:** Disk Running Hot +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Something is wrong with hermes — the AxiomFlow staging +application is returning 503 errors. Investigate and fix it. +**Linux Concepts:** `df -h`, `du -sh`, `systemctl status`, `/var/log` inspection, +`logrotate`, log file management +**Systems Used:** web\_server +**Ticket Sender:** Dave Okonkwo +**Ticket Summary:** "The work application has been giving a 503 error since this +morning. I tried refreshing and logging out and back in — nothing helps. I think +maybe a script crashed? It was fine yesterday afternoon." + +**Clue Trail:** +- `systemctl status nginx` — service failed +- `journalctl -u nginx` — "no space left on device" +- `df -h` — root partition at 93%+ +- `du -sh /var/log/nginx/*` — access log at 4+ GB +- `/etc/logrotate.d/nginx` — absent + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Restores `/etc/logrotate.d/nginx` with a correct +rotation config, runs `logrotate -f /etc/logrotate.conf` to clear the current +backlog, confirms nginx is running, disk below 70%. `trust_delta: +2`. +Flags: `hermes_logrotate_healthy`. Follow-up ticket: T003. + +Branch 2 — Manual clear (priority 60): Deletes or truncates the large log file, +nginx comes back, logrotate config not restored. Disk clear now; will recur. +`trust_delta: +0.5`. Flags: `hermes_logrotate_fragile`. Follow-up incident: I002 +(log fills again, Sarah files new ticket in Phase 2). + +Branch 3 — Destructive (priority 20): Removes all logs or nginx config. Service +degraded. `trust_delta: -2`. Flags: `hermes_logs_destroyed`. Follow-up incident: +I003 (Priya flags log destruction at next review). + +**Hidden Hook:** None in this quest. The clue trail is clean and the root cause +is straightforward. This is intentional — not every quest in Phase 1 has a hook. + +**Failure Conditions:** nginx remains down; disk stays over 90%; player creates +new problems while fixing. + +**Behavior Impact:** +- Clean branch: O+1 +- Manual clear: R+0 (acceptable partial fix) +- Destructive: R+2 + +**Narrative Notes:** First hermes quest. Establishes the symptom → cause → root +cause investigation pattern. Sarah Chen reacts to branch quality in the follow-up. + +--- + +**Quest ID:** Q003 +**Title:** The Locked Room +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Sarah Chen cannot SSH into the staging server's deployment +account. She has a hotfix to push before an afternoon demo. Restore her access. +**Linux Concepts:** `sshd_config` access directives (`AllowUsers`, `AllowGroups`), +`/var/log/auth.log`, SSH troubleshooting, user group membership (`id`, `groups`) +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "I can't SSH into the staging server. I've tried from two +machines and keep getting 'connection refused' or 'permission denied.' I need to +push a hotfix before 2pm. Can you look at this now?" + +**Clue Trail:** +- `/var/log/auth.log` on hermes: `User s.chen not allowed because not listed in AllowUsers` +- `/etc/ssh/sshd_config`: `AllowUsers deploy-user marcus` — no `s.chen` +- `groups s.chen` shows she is in the `deploy` group +- The config uses `AllowUsers` per-user instead of `AllowGroups` by role + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player converts `AllowUsers` to `AllowGroups deploy` +(or similar role-based approach), restarts sshd, confirms Sarah can authenticate. +`trust_delta: +2`. Flags: `hermes_ssh_allowgroups`. Follow-up ticket: T004. + +Branch 2 — Username append (priority 60): Adds `s.chen` to the `AllowUsers` list. +Problem solved; next person locked out will need the same treatment. `trust_delta: +0.5`. +Flags: `hermes_ssh_allowusers_fragile`. Follow-up incident: I004 (another user +locked out in Phase 2). + +Branch 3 — Unrestricted (priority 10): Removes `AllowUsers` or `AllowGroups` +entirely. All valid users can SSH. `trust_delta: -2`. Flags: `hermes_ssh_unrestricted`. +Priya flags this in next review. + +**Hidden Hook:** `authorized_keys` for the `deploy-user` account on hermes contains +a key with comment `dale@ares 2023-09`. Discoverable by: reading the deploy-user's +`authorized_keys` as part of investigating the SSH configuration. Sets +`hook_dale_deploy_key`. Connects to Q001's hook for players who found that one. + +**Failure Conditions:** Sarah still locked out; sshd fails to restart after edit; +player breaks SSH for themselves. + +**Behavior Impact:** +- Clean branch: O+1 +- Username append: O+0 +- Unrestricted: R+3 +- Hook discovered: C+1 + +**Narrative Notes:** Marcus's clean-branch response: "Good call switching to +groups. AllowUsers was always going to be a maintenance problem." The attribution +of the AllowUsers config is deliberately vague — it was in place when the player +arrived. Sarah's ticket wording ("I've tried from two machines") is accurate, non- +technical, real. + +--- + +**Quest ID:** Q004 +**Title:** The Build That Won't +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** The nightly AxiomFlow build on vulcan has not produced an +artifact in three days. The scheduler shows the job running. Nothing is in the +output directory. Find the cause and fix it. +**Linux Concepts:** `systemd` timers, `journalctl`, NTP and clock synchronization, +`timedatectl`, `systemd-timesyncd`, SSL certificate validation dependencies on +system clock +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Nikhil flagged that nothing has come out of the nightly build +in three days. The timer is showing as triggered. Build log is in the usual location. +Look at what's actually happening." + +**Clue Trail:** +- `systemctl list-timers` — `axiomflow-build.timer` last triggered correctly +- `/var/log/axiomflow-build/build.log` — SSL certificate verification failure + against the internal package repository (cert fetch step) +- `timedatectl` — system clock is 47 minutes ahead of real time; NTP is not running +- `systemctl status systemd-timesyncd` — inactive and disabled +- Enabling timesyncd, syncing clock, re-running the build — success + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Enables and starts `systemd-timesyncd`, verifies +sync with `timedatectl show-timesync`, triggers a manual build run to confirm artifact +output. `trust_delta: +2`. Flags: `vulcan_ntp_healthy`. Follow-up ticket: T005. + +Branch 2 — One-time sync (priority 50): Uses `ntpdate` or `date -s` for a manual +clock correction. Clock is correct now; drift will recur without the daemon. +`trust_delta: +0.5`. Flags: `vulcan_ntp_fragile`. Follow-up incident: I005 (drift +recurs in Phase 2, build fails again). + +Branch 3 — Bypass SSL (priority 20): Disables SSL certificate verification in the +build script rather than fixing the clock. Build succeeds; certificate validation +is now bypassed. `trust_delta: -2`. Flags: `vulcan_ssl_bypassed`. Priya flags this. + +**Hidden Hook:** Reading the full build log (not just the most recent failure) +reveals a historical entry from 8 months ago: a build step called `sign-package` +that no longer exists in the current build script. The step was removed — the +removal is not documented anywhere. Sets `hook_sign_package_removed`. Discoverable +by: reading historical log entries as part of diagnosing the build environment. + +**Failure Conditions:** Build continues failing; SSL bypass introduced; NTP +configured incorrectly breaks time-dependent services. + +**Behavior Impact:** +- Clean branch: O+1 +- Bypass SSL: R+3 +- Hook discovered: C+1 + +**Narrative Notes:** First vulcan quest. Establishes the machine's character: things +break here silently and the downstream effect shows up on hermes. The `sign-package` +removal hook is the beginning of the build pipeline thread. An obedient player reads +only the current log. A curious player reads further back. + +--- + +**Quest ID:** Q005 +**Title:** Permissions Drift +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The AxiomFlow staging application cannot write to its cache +directory. Exports are failing for all users. Identify why the ownership changed +and restore correct state. +**Linux Concepts:** `chown`, `chmod`, `ls -la`, process user context (`ps aux`), +service account ownership (`www-data`), bash history inspection +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "Users in staging can't generate exports — they get a +'permission denied' error. The dev team says they haven't changed anything. It +was working Thursday. Something changed on the infrastructure side." + +**Clue Trail:** +- Application error log: `permission denied: /var/www/axiomworks/cache/export` +- `ls -la /var/www/axiomworks/cache` — directory owned by `root:root`; previously + should be `www-data:www-data` +- `ps aux | grep axiomflow` — application process running as `www-data` +- `/root/.bash_history` — contains a `sudo cp -r` command run three weeks ago that + carried root ownership forward into the cache directory + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Runs `chown -R www-data:www-data /var/www/axiomworks/cache`, +confirms application can write, identifies the `cp -r` as cause, documents root +cause in ticket response. `trust_delta: +2`. Flags: `hermes_cache_ownership_correct`. +Follow-up ticket: T006. + +Branch 2 — World-writable (priority 30): Runs `chmod o+w /var/www/axiomworks/cache` +so www-data can write without being owner. App works; directory is now world-writable. +`trust_delta: -1`. Flags: `hermes_cache_world_writable`. Priya flags in next review. + +Branch 3 — Service as root (priority 10): Modifies service unit to run as root. +App works; every downstream file is now root-owned. `trust_delta: -3`. +Flags: `hermes_app_running_as_root`. + +**Hidden Hook:** The `sudo cp -r` command in `/root/.bash_history` is timestamped +three weeks ago — before the player's start date. The session that ran this command +predates the player's account creation. Someone with root access was copying +production files before the player arrived. Sets `hook_pre_hire_root_session`. +Discoverable by: checking bash history to trace the ownership change as part of +understanding the cause. + +**Failure Conditions:** Application still cannot write to cache; player introduces +broader permission regression. + +**Behavior Impact:** +- Clean branch: O+1 +- World-writable: R+2 +- App-as-root: R+4 +- Hook discovered: C+2 (this one requires going beyond what the ticket asks) + +**Narrative Notes:** The pre-hire root session hook is more significant than the +SSH key hooks — it establishes that someone was making system changes before the +player arrived. A player who finds it has their first real data point about activity +that predates them. + +--- + +**Quest ID:** Q006 +**Title:** The Account That Shouldn't Be There +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Priya's quarterly access review surfaced a user account on +the workstation with no matching HR record. Audit it and remove it correctly. +**Linux Concepts:** `getent passwd`, `lastlog`, `last`, `ps aux`, `find / -user`, +`userdel -r`, home directory archival before removal +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Quarterly access review flagged an account on the workstation: +`jbenton`. No corresponding entry in the HR system. Before removal: confirm no active +sessions, check if any processes are running under this account, and archive the home +directory. Then remove it. Document what you find." + +**Clue Trail:** +- `getent passwd jbenton` — account exists; no HR match +- `lastlog | grep jbenton` — last login 14 months ago +- `ps aux | grep jbenton` — no active processes +- Home directory: `~jbenton/` exists with standard dotfiles and one file: + `notes/infra.txt` — a plain-text infrastructure reference listing internal + hostnames and access notes, formatted like a personal cheatsheet + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player checks activity, processes, groups, +home dir; archives home directory to `/var/archive/jbenton-YYYYMMDD.tar.gz`; +runs `userdel -r jbenton`; documents findings and archive location for Priya. +`trust_delta: +2`. Flags: `jbenton_account_removed_clean`. Follow-up ticket: T007. + +Branch 2 — Fast remove (priority 40): Removes account without archiving or checking +home dir. Account is gone. `trust_delta: +0.5`. Flags: `jbenton_account_removed_fast`. +Priya's response notes that archival is standard procedure. + +Branch 3 — Left in place (priority 10): Reports account looks inactive, recommends +deferring. Ticket unresolved. `trust_delta: -1`. + +**Hidden Hook:** `notes/infra.txt` in jbenton's home directory is a personal +infrastructure reference. It includes a line for `pipeline-svc` with a note: +`temp sudo — ask DH to scope`. The initials `DH` do not correspond to any current +employee visible on the company website. Sets `hook_dh_initials_in_jbenton_notes`. +Discoverable by: reading the file before archiving or deleting, which proper +procedure (per the ticket) requires doing anyway — but the player can ignore the +content and just archive it. + +**Failure Conditions:** Player removes account with active sessions; player destroys +home dir without archiving; ticket not resolved. + +**Behavior Impact:** +- Clean branch: O+1 +- Fast remove: R+1 (destroying potential evidence) +- Hook discovered: C+1 + +**Narrative Notes:** The `DH` initials connect to the sudoers comment the player +will find in Q011. `pipeline-svc` also connects forward. The note reads like +a practical cheatsheet — not alarming, just a person keeping track of the +infrastructure they were using. The oddness is the initials and the word "temp." + +--- + +**Quest ID:** Q007 +**Title:** Rotation Failure +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The TLS certificate for the AxiomFlow staging domain has +expired. A prospect demo is tomorrow morning. Renew the certificate and ensure +automatic renewal is in place. +**Linux Concepts:** `certbot`, Let's Encrypt certificate renewal, `systemd` timers, +`openssl s_client`, nginx configuration reload, certificate verification +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The staging site is showing a certificate error — the browser +is refusing to load it at all. Phil has a prospect demo on this environment tomorrow +at 9am. We need this fixed today." + +**Clue Trail:** +- `openssl s_client -connect staging.axiomworks.internal:443 &1 | grep -i expire` + — certificate expired 14 days ago +- `certbot certificates` — cert present, not renewed +- `systemctl status certbot.timer` — inactive, disabled +- `journalctl -u certbot --since "90 days ago"` — renewal failed 60 days ago + (HTTP challenge permission error); timer was disabled manually the same day + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Runs `certbot renew`, re-enables and starts +`certbot.timer`, reloads nginx, verifies new cert expiry with openssl, confirms +staging site loads without browser warning. `trust_delta: +2`. +Flags: `hermes_certbot_healthy`. Follow-up ticket: T008. + +Branch 2 — Renew without timer (priority 50): Renews cert but doesn't restore the +timer. Valid now; expires again in 90 days without action. `trust_delta: +0.5`. +Flags: `hermes_certbot_fragile`. Follow-up incident: I006 (cert expires again in +Phase 3). + +Branch 3 — Self-signed (priority 10): Generates self-signed cert, nginx configured +to use it. Connection is encrypted; browser still warns. `trust_delta: -1`. +Flags: `hermes_self_signed_cert`. Phil's demo shows a security warning. + +**Hidden Hook:** `journalctl -u certbot --since "90 days ago"` contains the failure +entry — permission error. Immediately after the failure, in the same journalctl +window, is an entry showing the timer was disabled by a manual `systemctl disable` +command from a root session. The session timestamp predates the player. The timer +wasn't failed-and-stopped; it was deliberately turned off after the failure. +Sets `hook_certbot_deliberately_disabled`. Discoverable by: reading the journal +further back than strictly necessary to diagnose the current renewal failure. + +**Failure Conditions:** Cert not renewed; nginx not reloaded; timer still inactive. + +**Behavior Impact:** +- Clean branch: O+1 +- Renew without timer: O+0 +- Self-signed: R+1 +- Hook discovered: C+1 + +**Narrative Notes:** The timer being deliberately disabled — not just failed — is +a small data point in the pattern of things being intentionally changed. A player +who finds it has evidence of deliberate action, not accident. + +--- + +**Quest ID:** Q008 +**Title:** The Package That Wasn't +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** A deployment to hermes is blocked because a required package +is not available in the internal apt repository. The package was reportedly built +last week. Find why it isn't available and restore the deployment path. +**Linux Concepts:** `apt-cache`, `apt-get update`, internal apt repositories, +`reprepro`, repository metadata management, package pipeline between build and +deployment +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Deployment to staging is blocked. The apt install step fails +on a package that Nikhil says he built last week. Something's broken between the +build and the repo. Find it and fix it." + +**Clue Trail:** +- `apt-cache show axiomflow-workers` on hermes — package not found +- `/etc/apt/sources.list.d/axiomworks.list` — points to `http://vulcan.axiomworks.internal/repo/` +- SSH to vulcan: repository Packages index is stale — `reprepro` was not run + after last build +- Built `.deb` artifact at `/srv/packages/axiomflow-workers_2.4.1_amd64.deb` +- Fix: `reprepro includedeb stable /srv/packages/axiomflow-workers_2.4.1_amd64.deb`, + then `apt update` on hermes confirms package availability + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Adds package to repo correctly, updates metadata, +confirms `apt-cache show` succeeds on hermes, deployment unblocked. `trust_delta: +2`. +Flags: `vulcan_repo_healthy`. Follow-up ticket: T009. + +Branch 2 — Manual install (priority 40): Copies `.deb` to hermes and installs with +`dpkg -i`. Deployment works this time; repo still broken for next deployment. +`trust_delta: 0`. Flags: `vulcan_repo_bypassed`. Follow-up incident: I007 +(next deployment fails identically). + +Branch 3 — Escalate without investigating (priority 10): Reassigns to Nikhil +without investigation. `trust_delta: -1`. Ticket stalls. + +**Hidden Hook:** While browsing the repository's package history to find the missing +package, a player who looks at the full package list rather than just the missing +one will find an entry for `axiomflow-audit-bridge` — a package built 8 months ago +with no corresponding deployment record, no entry in any release manifest visible on +hermes, and no build job in the scheduler that corresponds to when it was built. +Sets `hook_audit_bridge_package`. Discoverable by: looking at the full repo package +list rather than only the specific package named in the ticket. + +**Failure Conditions:** hermes still cannot find the package; repo metadata left +in broken state. + +**Behavior Impact:** +- Clean branch: O+1 +- Manual install: O+0 +- Hook discovered: C+2 (requires going beyond the specific package named in ticket) + +**Narrative Notes:** The audit-bridge package is the most significant Phase 1 hook. +It's discoverable only if the player looks at what's around the thing they were +sent to find — real sysadmin behavior, but not required. A player who finds it has +their first glimpse of something that doesn't fit. + +--- + +### PHASE 2 — UNEASE (Q009–Q016) + +Tier 2. Partial hints. Tickets describe the symptom and indicate the general area +but do not specify the cause. Branch tolerance decreases — acceptable-fix incidents +now carry real operational weight. Hook density: 3 hooks across 8 quests, less +pointed than Phase 1. + +--- + +**Quest ID:** Q009 +**Title:** The Recurrence +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** hermes's nginx access log is filling up again. A Phase 1 +incident that was supposed to be fixed is recurring. Find why logrotate isn't +working and make it stable. +**Linux Concepts:** `logrotate` configuration, `/etc/logrotate.d/`, `logrotate -d` +(dry run), `cron` / `systemd-logrotate.timer`, `logrotate` status file +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The staging site is throwing errors again. Same thing as a +few weeks ago — it goes down, then someone fixes it, then it comes back. I was +told logrotate was set up. Why is it happening again?" + +**Clue Trail:** +- (If `hermes_logrotate_healthy` is set from Q002): the logrotate config is present + but the `logrotate.timer` or `cron.daily` entry that calls it is disabled — + config exists but nothing triggers it +- (If `hermes_logrotate_fragile` is set from Q002): logrotate was never restored; + this is the recurrence +- Either way: `systemctl status logrotate.timer` shows disabled; or `ls /etc/cron.daily/logrotate` + shows the file is missing/not executable +- Log is filling again; nginx error is the same + +**Solution Branches:** + +Branch 1 — Root cause (priority 100): Player diagnoses the trigger failure +(timer disabled or cron entry missing), restores the trigger, verifies logrotate +runs correctly on next schedule, confirms log rotation is active. `trust_delta: +2`. +Flags: `hermes_logrotate_stable`. Follow-up ticket: T010. + +Branch 2 — Config only (priority 50): Player restores or confirms the logrotate +config but doesn't check that anything calls it. Disk is cleared manually again. +`trust_delta: +0.5`. Flags: `hermes_logrotate_still_fragile`. Follow-up incident: +I008 (recurs again). + +**No hidden hook** in this quest. The recurrence itself is the unease signal — not +every quest in Phase 2 has a hook. + +**Failure Conditions:** nginx still down; disk not cleared; trigger still inactive. + +**Behavior Impact:** +- Root cause: O+1 +- Config only: O+0 + +--- + +**Quest ID:** Q010 +**Title:** Someone Changed Something +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Priya flagged an nginx configuration on hermes that doesn't +match the last known-good state. Find what changed and restore correct configuration. +**Linux Concepts:** `diff`, config file comparison, nginx config structure +(`/etc/nginx/`), `nginx -t`, `git diff` or backup comparison, file mtime +inspection (`stat`) +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Priya found an nginx config that doesn't match the backed-up +state. I don't have a change ticket for it. Go look at what's different and tell me +if it matters." + +**Clue Trail:** +- Backup exists at `/etc/nginx/.bak/` (or Marcus provides a hash reference) +- `diff -r /etc/nginx /etc/nginx/.bak/` reveals two differences: + 1. `server_tokens off;` has been removed from the main config (nginx version + now visible in HTTP headers) + 2. A `location /internal-api/` block added to a site config, proxying requests + to `127.0.0.1:9301` — a port with nothing listening + +**Solution Branches:** + +Branch 1 — Both issues (priority 100): Player identifies both changes, restores +`server_tokens off;`, removes or quarantines the `/internal-api/` block, runs +`nginx -t`, reloads nginx, documents both changes with mtimes. `trust_delta: +2`. +Flags: `hermes_nginx_config_audited`. Follow-up ticket: T011. + +Branch 2 — Token only (priority 50): Restores `server_tokens off;` but misses +the proxy block. `trust_delta: +0.5`. Flags: `hermes_nginx_proxy_block_present`. +Follow-up incident: I009 (Priya finds the block in next audit). + +Branch 3 — No action (priority 10): Reports config looks acceptable. `trust_delta: -1`. +Priya's review flags both items. + +**Hidden Hook:** The proxy block for `/internal-api/` points to port 9301 with +nothing currently listening — but the port number itself, and the path name, will +echo in later anomalies for a player who remembers it. Sets +`hook_nginx_internal_api_block`. Discoverable by: doing a thorough diff rather +than checking only the obvious item. + +**Behavior Impact:** +- Both issues found: O+1 +- Token only: O+0 +- Hook discovered: C+1 (remembering the port number is the payoff later) + +--- + +**Quest ID:** Q011 +**Title:** The Service Account +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** The `pipeline-svc` service account on vulcan has more sudo +privileges than its role requires. Scope it to least privilege. +**Linux Concepts:** `sudo -l`, `/etc/sudoers`, `visudo`, `/etc/sudoers.d/`, +least privilege principle, testing sudo with specific commands +**Systems Used:** build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "James's privilege audit shows `pipeline-svc` on the build +machine has `NOPASSWD: ALL`. That account runs the build pipeline. It should +only be able to restart specific services. Bring it into scope." + +**Clue Trail:** +- `sudo -l -U pipeline-svc` — `(ALL) NOPASSWD: ALL` +- `/etc/sudoers.d/pipeline-svc` — the blanket grant, separate file +- Reviewing what the account actually needs: `systemctl restart axiomflow-build` + and `systemctl restart axiomflow-timer` +- Correct fix: replace `ALL` with specific command paths in sudoers.d + +**Solution Branches:** + +Branch 1 — Precise scope (priority 100): Replaces the blanket grant with +`NOPASSWD: /bin/systemctl restart axiomflow-build, /bin/systemctl restart axiomflow-timer`, +verifies with `sudo -l`, tests that the service can still restart correctly. +`trust_delta: +2`. Flags: `vulcan_pipeline_svc_scoped`. Follow-up ticket: T012. + +Branch 2 — Broader scope (priority 50): Reduces from ALL but grants more than +needed (e.g., `NOPASSWD: /bin/systemctl`). Better; not least privilege. `trust_delta: +0.5`. +Priya notes improvement but flags remaining exposure. + +Branch 3 — Remove sudo entirely (priority 20): Removes all sudo. Service account +can no longer restart services; build pipeline breaks. `trust_delta: -2`. +Follow-up incident: build failures within the hour. + +**Hidden Hook:** The comment at the top of `/etc/sudoers.d/pipeline-svc` reads: +`# Temp grant per INT-0194 — DH 2023-11`. The ticket number references an internal +system the player cannot access. The initials `DH` — same initials as in Q006's +jbenton notes — don't correspond to any current employee. Sets `hook_dh_sudo_grant`. +Discoverable by: reading the sudoers file rather than just acting on the grant. + +**Failure Conditions:** Sudoers syntax error (should use `visudo`); service can +no longer function; broader access introduced. + +**Behavior Impact:** +- Precise scope: O+1 +- Remove sudo: R+1 +- Hook discovered: C+1 (connects to Q006's DH initials for players who found that) + +--- + +**Quest ID:** Q012 +**Title:** Memory Leak +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The AxiomFlow application on hermes is crashing every few +hours due to out-of-memory events. Identify the cause and implement a fix that +addresses the root problem. +**Linux Concepts:** `free -h`, `top`, `htop`, `/proc/meminfo`, zombie processes +(`ps aux` state column), cron job inspection, Python process management, +systemd service memory limits +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The app keeps going down — every three or four hours it just +dies and restarts. Dave said he's been getting logged out mid-session. The restart +is automatic so customers haven't called yet, but they will." + +**Clue Trail:** +- `journalctl -u axiomflow` — OOM kill events every 3–4 hours +- `ps aux` during an OOM interval — many `axiomflow-report-gen` processes with + state `Z` (zombie) +- `/etc/cron.d/report-gen` — runs `axiomflow-report-gen` every 30 minutes +- The script is a Python process that forks but never calls `wait()` — zombies + accumulate and consume PID space, the parent's memory grows +- Fix: correct the script (add `subprocess.wait()` or use `subprocess.run()`) — + or constrain with systemd service limits (acceptable but not root-cause) + +**Solution Branches:** + +Branch 1 — Root cause (priority 100): Identifies the zombie accumulation from the +cron script, corrects the Python subprocess handling, confirms clean process table +after next run. `trust_delta: +2`. Flags: `hermes_report_gen_clean`. Follow-up ticket: T013. + +Branch 2 — Service limit (priority 60): Adds `MemoryMax` and `Restart=on-failure` +to the axiomflow service unit. Crashes are now bounded; zombies still accumulate but +are contained. `trust_delta: +0.5`. Flags: `hermes_app_restart_policy`. + +Branch 3 — Force-kill cron (priority 20): Adds a cron job that kills all +`axiomflow-report-gen` processes every 30 minutes. Works until a report is +mid-execution when killed. `trust_delta: -1`. Flags: `hermes_report_gen_force_killed`. + +**No hidden hook** in this quest. The technical trail is the whole story. + +**Failure Conditions:** OOM events continue; player introduces new instability. + +**Behavior Impact:** +- Root cause: O+1 +- Force-kill: R+1 + +--- + +**Quest ID:** Q013 +**Title:** The Baseline Check +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Priya's end-of-month security checklist asks the player to +audit their workstation against the company baseline: open ports, running services, +active accounts, home directory permissions. Document deviations. +**Linux Concepts:** `ss -tlnp`, `systemctl list-units --type=service`, `getent passwd`, +`ls -la ~`, `umask`, reading and comparing against a baseline document +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "End of your first month. Standard workstation audit: I've +attached the baseline checklist. Open ports, running services, account list, home +directory permissions. Document what you find. Flag anything that doesn't match." + +**Clue Trail:** +- Most findings are normal: expected services, expected ports +- One service is running but not on the baseline checklist: `axiomworks-telemetry` +- `systemctl status axiomworks-telemetry` — running, enabled, binary at + `/usr/local/bin/axiomworks-telemetry` +- `ss -tlnp` or `netstat -tlnp` — the telemetry service connects outbound (not + shown in `ss` for listening ports but visible in `netstat -anp` or `/proc`) + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Documents all deviations including the +telemetry service; investigates what the service is (service unit file contents, +binary provenance, any logs); reports complete findings. `trust_delta: +2`. +Flags: `workstation_audit_complete`. Follow-up ticket: T014. + +Branch 2 — Checklist-only (priority 50): Completes the audit against the checklist +but marks the telemetry service as "review later — may be legitimate." +`trust_delta: +0.5`. Priya follows up. + +Branch 3 — Disable to clean (priority 20): Disables the telemetry service without +investigating or reporting it. Service gone; unknown what it was doing. +`trust_delta: 0`. Flags: `workstation_telemetry_disabled_silently`. S+1. + +**Hidden Hook:** The telemetry service unit file (`/etc/systemd/system/axiomworks-telemetry.service`) +has an `ExecStart` line pointing to the binary, and the unit file has a comment line +at the top: `# deployed by pipeline — INT-0194`. The same internal ticket number +from Q011's sudoers comment. Sets `hook_telemetry_ticket_INT0194`. Discoverable by: +reading the service unit file as part of investigating what the service is. + +**Failure Conditions:** Audit incomplete; player creates instability while investigating. + +**Behavior Impact:** +- Thorough: O+1 +- Disable silently: S+1, R+1 +- Hook discovered: C+2 (connects INT-0194 across two quests — DH's ticket number) + +--- + +**Quest ID:** Q014 +**Title:** Rollback +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** A deployment to hermes this afternoon broke user +authentication in the staging application. Roll back to the previous known-good +package version and prevent automatic re-upgrade. +**Linux Concepts:** `apt-cache policy`, `apt install =`, `apt-mark hold`, +package version pinning, deployment rollback procedure +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The deployment this afternoon broke login — users can +authenticate but are immediately logged out. Phil has a customer using this +environment tomorrow. I need it rolled back now." + +**Clue Trail:** +- `apt-cache policy axiomflow-workers` — current version installed 3 hours ago +- Previous version available in the internal repo cache +- The regression is in session management — a code issue; infrastructure can't + fix the code, only roll back the package +- `apt install axiomflow-workers=2.4.0` installs prior version +- `apt-mark hold axiomflow-workers` prevents re-upgrade + +**Solution Branches:** + +Branch 1 — Rollback with hold (priority 100): Installs 2.4.0, holds the package, +confirms auth works, notifies Sarah and notes the hold is in place. `trust_delta: +2`. +Flags: `hermes_axiomflow_held`. Follow-up ticket: T015. + +Branch 2 — Rollback without hold (priority 50): Installs 2.4.0, no hold. Auto- +upgrade will re-break it on next run. `trust_delta: +0.5`. Flags: +`hermes_axiomflow_rolled_back`. Follow-up incident: I010 (auto-upgrade re-installs +2.4.1 overnight). + +Branch 3 — Forward fix attempt (priority 10): Player attempts to diagnose and fix +the code issue rather than rolling back. Outside scope; fails. `trust_delta: -1`. + +**Hidden Hook:** `apt-cache showpkg axiomflow-workers` on vulcan shows the 2.4.1 +build timestamp: 3:12am — outside the scheduled build window. The same off-schedule +time pattern as the signing step removal and the audit-bridge build. Sets +`hook_2_4_1_off_schedule_build`. Discoverable by: looking at the build machine's +package metadata while researching what version to roll back to. + +**Failure Conditions:** Auth still broken; hold not applied; player introduced +new problems. + +**Behavior Impact:** +- Rollback with hold: O+1 +- Rollback without hold: O+0 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q015 +**Title:** The Quiet Cron +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Marcus has asked for a cron audit on vulcan: list all +scheduled jobs, attribute each to a service or owner, and flag anything that +can't be attributed. +**Linux Concepts:** `crontab -l` (per-user and system), `/etc/cron.d/`, +`/etc/cron.daily/`, `/etc/cron.weekly/`, cron syntax, correlating jobs to +services or owners +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Routine cron audit on vulcan. List everything that's +scheduled — root crontab, system crontab, all of cron.d. I want to know who +owns each job and whether it still makes sense. Anything you can't attribute, +flag it." + +**Clue Trail:** +- `crontab -l` for root and `pipeline-svc` — most jobs are attributable +- `/etc/cron.d/` directory — standard entries plus one named `axiomworks-collect` +- `axiomworks-collect` job runs at 2:57am; command: `/usr/local/bin/axiomworks-collect` +- The binary `/usr/local/bin/axiomworks-collect` exists and is executable +- No ticket, no documentation comment in the cron file itself, no recent entry + in any change log + +**Solution Branches:** + +Branch 1 — Thorough, with investigation (priority 100): Player lists all jobs, +attributes each, and for `axiomworks-collect`: runs `file` and `strings` on the +binary to understand what it does before flagging it — the binary name is +suggestive and a thorough audit would check it. Submits complete report including +what the binary calls. `trust_delta: +2`. Flags: `axiomworks_collect_cron_flagged`. +Follow-up ticket: T016. + +Branch 2 — Listed but not investigated (priority 60): Player lists all jobs, +flags `axiomworks-collect` as unattributed, but does not inspect the binary. +Report is honest but shallow. `trust_delta: +1`. Flags: `axiomworks_collect_noted`. + +Branch 3 — Incomplete list (priority 10): Player misses entries. Marcus follows +up. `trust_delta: -1`. + +**Hidden Hook:** Running `strings /usr/local/bin/axiomworks-collect` or +`ldd /usr/local/bin/axiomworks-collect` and checking its network behavior (or simply +reading any log it writes, if one exists) reveals it connects to an internal address. +The binary name and the ticket number in its help text — `INT-0194` — connects it +to the same ticket number from Q011 and Q013. Sets `hook_collect_binary_INT0194`. +The hook is only set in Branch 1 (player inspected the binary). In Branch 2, the +job is noted but not confirmed. Discoverable by: going one step further than the +ticket requires — investigating what an unattributed job actually does. + +**Failure Conditions:** Cron audit submitted without flagging unattributed jobs. + +**Behavior Impact:** +- Branch 1: O+1, C+2 (the INT-0194 connection is now three sightings) +- Branch 2: O+0 +- Hook discovered: C+2 (already in Branch 1 impact) + +--- + +**Quest ID:** Q016 +**Title:** The Door Left Open +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** A security scan found port 8080 on hermes reachable from +outside the office network. That port runs the AxiomFlow admin panel. Restrict +it to internal-only access and confirm. +**Linux Concepts:** `ufw`, `iptables`, `ss -tlnp`, nginx access control by IP +(`allow`/`deny`), CIDR notation, defense-in-depth (firewall + application layer) +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Scan from this morning. Port 8080 on hermes is reachable +externally. That's the admin panel. It should be internal-only — restrict to +10.0.0.0/8. Confirm when done." + +**Clue Trail:** +- `ss -tlnp | grep 8080` — service listening on `0.0.0.0:8080` +- `ufw status` — no restriction on port 8080 +- Fix options: `ufw` rule restricting source to 10.0.0.0/8, or nginx `allow 10.0.0.0/8; deny all;` + in the 8080 server block, or both + +**Solution Branches:** + +Branch 1 — Defense in depth (priority 100): Restricts at both firewall and nginx +layer, confirms external access blocked, internal access works, reports to Priya. +`trust_delta: +2`. Flags: `hermes_admin_port_secured`. Follow-up ticket: T017. + +Branch 2 — Single layer (priority 60): Restricts at one layer only. Better. +Not layered. `trust_delta: +1`. Priya notes the single-layer approach. + +Branch 3 — Block entirely (priority 20): Blocks port for all traffic. Admin +panel inaccessible to everyone including internal users. `trust_delta: -1`. + +**No hidden hook** in this quest. The technical task is clean. + +**Failure Conditions:** Port still accessible externally; internal access broken; +ufw rules in conflict. + +**Behavior Impact:** +- Defense in depth: O+1 +- Block entirely: R+1 + +--- + +### PHASE 3 — SUSPICION (Q017–Q024) + +Tier 2. Minimal guidance. Tickets state the problem, not the location. The clue +trail requires following evidence without direction. Branch tolerance is stricter. +Hook density increases: 5 hooks across 8 quests. + +--- + +**Quest ID:** Q017 +**Title:** Access Without a Ticket +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** workstation +**Primary Objective:** hermes's auth log shows SSH connections from an IP address +not in the asset inventory. Investigate: what account was used, what activity +occurred, is access still happening. +**Linux Concepts:** `/var/log/auth.log`, `grep` and log filtering, `last`, `who`, +`lastlog`, SSH session forensics, correlating authentication events with known assets +**Systems Used:** web\_server, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Auth log review surfaced connections to hermes from 10.0.0.47 +over the past 90 days. That address is not in our asset inventory. I want to know: +what account, any evidence of what was done, and whether it's ongoing." + +**Clue Trail:** +- `/var/log/auth.log` on hermes — `Accepted publickey for pipeline-svc from 10.0.0.47` +- Sessions: short duration, irregular hours (2–4am), spanning 6 months +- `~pipeline-svc/.bash_history` — disabled or empty (shell configured with `HISTSIZE=0`) +- DNS lookup for 10.0.0.47 — no reverse record; DHCP table has no entry +- `last pipeline-svc` — confirms session dates and source IP + +**Solution Branches:** + +Branch 1 — Full documentation (priority 100): Player documents all sessions (dates, +times, duration), notes history was disabled, traces what can be traced, reports that +10.0.0.47 is unknown and appears in prior audit anomalies if the player has accumulated +hooks. `trust_delta: +3`. Flags: `unknown_ip_auth_documented`. Priya: "Thank you. +I'll look into that address." Follow-up ticket: T018. + +Branch 2 — Partial (priority 50): Player documents the sessions but cannot or does +not trace the IP or connect it to prior findings. `trust_delta: +1`. +Flags: `unknown_ip_auth_noted`. + +Branch 3 — Block and close (priority 20): Player blocks the IP at the firewall and +closes the ticket without full investigation. Access stops; record is thin. +`trust_delta: 0`. Flags: `unknown_ip_blocked_uninvestigated`. S+1. + +**Hidden Hook:** The `pipeline-svc` account was the one from Q011 — overly broad +sudo that the player (may have) scoped down. If `hook_dh_sudo_grant` was set, a +player connecting the dots now knows that whoever had access to that account from +10.0.0.47 previously had `NOPASSWD: ALL`. Sets `hook_pipeline_svc_external_sessions`. +This is not a new discoverable artifact — it's a cross-reference that sets a flag +if both the Q011 hook and the Q017 investigation are present. + +**Failure Conditions:** Player doesn't investigate before taking action; evidence +destroyed before documented. + +**Behavior Impact:** +- Full documentation: O+1, C+2 (cross-reference with prior hooks) +- Block and close: S+1, R+1 +- Cross-reference hook: C+2 (only if `hook_dh_sudo_grant` was set; the connection + is the behavior, not finding a new artifact) + +--- + +**Quest ID:** Q018 +**Title:** The User Who Wasn't Onboarded +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** A user account exists on both ares and hermes with no +corresponding HR record. Investigate the account's history and scope before removal. +**Linux Concepts:** Cross-host account audit, `last` and `lastlog`, `find / -user`, +`id`, account removal across multiple hosts with `userdel` +**Systems Used:** workstation, web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Access review surfaced account `rford` on both the workstation +and the web server. HR has no record of this person. The account has had recent +activity on hermes. Full audit before removal." + +**Clue Trail:** +- Account on both machines; `last rford` on hermes shows login 3 weeks ago +- Files owned by `rford` on hermes: `find /var/www /etc -user rford` — one result: + `/var/www/axiomworks/config/.rford_run` — a shell script +- The script, if read, runs a data aggregation command and outputs to a temp directory +- The account's group memberships include `www-data` — more access than a typical + employee account +- No ticket creating the account on either machine + +**Solution Branches:** + +Branch 1 — Full audit with archive (priority 100): Player checks activity on both +hosts, reads and archives the found file, checks group memberships, removes account +from both machines, documents fully. `trust_delta: +3`. Flags: `rford_account_removed_thorough`. +Follow-up ticket: T019. + +Branch 2 — Remove without reading (priority 40): Removes account from both machines +without examining files. Evidence lost. `trust_delta: +1`. Priya asks for the files; +they're gone. Flags: `rford_account_removed_fast`. + +Branch 3 — Workstation only (priority 10): Removes from workstation, misses hermes. +`trust_delta: -1`. Hermes account remains active. + +**Hidden Hook:** The `.rford_run` script, if read before archiving, outputs a +data aggregation of AxiomFlow session logs and sends it to a temp directory with +a timestamp. The script has a comment: `# collect step — called by INT-0194 +automation`. Three previous hooks have referenced INT-0194. Sets +`hook_rford_script_INT0194`. Discoverable by: reading the file before archiving, +which proper archival practice would do. + +**Failure Conditions:** Evidence destroyed without reading; account not removed from +both machines; player removes account with active processes still running. + +**Behavior Impact:** +- Full audit: O+1 +- Read the file: C+3 (INT-0194 is now four references — pattern is now clear to + any player who has been collecting these) +- Remove without reading: R+2 + +--- + +**Quest ID:** Q019 +**Title:** The Diff That Didn't Match +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** A deployment validation check is failing because the installed +package on hermes doesn't match the expected checksum. Investigate why the package +differs from the tagged source. +**Linux Concepts:** `dpkg-deb -x`, `diff -r`, `md5sum` / `sha256sum`, package +integrity verification, comparing installed vs. source artifacts +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The post-deploy checksum check on hermes failed. The installed +axiomflow-workers doesn't match the tagged release checksum. Nikhil says he didn't +change anything. Find what's different and where the difference came from." + +**Clue Trail:** +- `dpkg-deb -x /srv/packages/axiomflow-workers_2.4.2_amd64.deb /tmp/pkg-extract` +- `diff -r /tmp/pkg-extract /srv/src/axiomflow-workers-2.4.2/` — two files differ +- The modified files are in the session logging module; they add a secondary logging + call to a local socket +- The modification is not in the tagged source commit; it was added to the build + environment itself — a file in the build script directory that patches sources + before compilation + +**Solution Branches:** + +Branch 1 — Full forensics (priority 100): Player unpacks the package, diffs against +source, identifies the modified files, traces the modification to the build environment +patch file, documents the full chain of custody. `trust_delta: +3`. Flags: +`package_modification_documented`. Marcus: "Keep this to yourself and Priya for now. +I mean that." Follow-up ticket: T020. + +Branch 2 — Diff identified, source not traced (priority 50): Player confirms the +diff exists but characterizes it as a build environment artifact without tracing the +cause. `trust_delta: +1`. Flags: `package_diff_found`. + +Branch 3 — Confirm and escalate without analysis (priority 20): Player confirms +something is different and escalates without characterizing what. `trust_delta: 0`. + +**Hidden Hook:** The patch file in the build environment that injects the modification +has a comment at the top: `# INT-0194 session capture — do not remove`. The fifth +reference to the same internal ticket number. Sets `hook_build_patch_INT0194`. +Discoverable by: tracing the modification source, which Branch 1 requires. + +**Failure Conditions:** Player attempts to remove the patch without consulting Marcus +first. Risk+3 and Marcus's response is cooler — removing evidence before it's +documented is a problem. + +**Behavior Impact:** +- Full forensics: O+1, C+3 +- Remove patch unilaterally: R+3, S+1 +- Hook discovered: C+3 (already in full-forensics impact) + +**Narrative Notes:** This is the moment the INT-0194 pattern resolves for a thorough +player. Five references across different systems, all pointing to the same internal +ticket ID. Marcus's response is his quietest and most deliberate. He says less than +normal, which means more. + +--- + +**Quest ID:** Q020 +**Title:** Pressure From Above +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Kowalski has requested a written access and change summary +for the past 30 days before a scheduled status meeting. Compile it accurately from +system logs. +**Linux Concepts:** `journalctl`, `last`, `/var/log/auth.log`, log filtering by +date range, compiling a change record from system state evidence +**Systems Used:** workstation +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Before Thursday's check-in I need the following in writing: +access grants issued in the past 30 days, configuration changes to production or +staging, and open incidents. Pull it from the logs. End of day Wednesday." + +**Clue Trail:** +- Player reads auth logs, systemd journals, and any change log Marcus maintains +- Accurate log reading requires: `journalctl --since "30 days ago"`, `last`, reviewing + Priya's shift review emails for documented changes +- The technical work is real — log compilation at this scale requires knowing the + right tools + +**Solution Branches:** + +Branch 1 — Complete and accurate (priority 100): Player includes all documented +activity including any anomalies that surfaced through tickets. `trust_delta: +2`. +Flags: `kowalski_report_accurate`. Marcus sends a brief private note: "Good call +being complete." Follow-up ticket: T021. + +Branch 2 — Accurate but narrow (priority 60): Report includes only ticket-related +activity; omits anomalies that came up during investigation. Accurate; incomplete. +`trust_delta: +1`. Flags: `kowalski_report_narrow`. + +Branch 3 — Omits or sanitizes (priority 10): Player downplays or omits anomalies +that would raise questions. `trust_delta: -2`. Flags: `kowalski_report_sanitized`. +S+3 (Priya will eventually compare this against log evidence and notice the gaps). + +**Failure Conditions:** Report submitted without log evidence; report materially +inaccurate. + +**Behavior Impact:** +- Complete: O+2 +- Sanitized: R+3, S+3 + +--- + +**Quest ID:** Q021 +**Title:** The Backup That Wasn't Tested +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** The last documented backup restore test for hermes is 14 +months old. Perform a restore test of a non-critical service config directory, +document the procedure, and report the result honestly. +**Linux Concepts:** `rsync`, `tar`, backup archive integrity, `sha256sum` verification, +restore testing to a non-production location, documenting backup procedures +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Our backup procedure calls for a quarterly restore test. +The last documented test is 14 months old. Pick a non-critical config directory +on hermes, verify the backup can be restored to a test location, and document +the steps and the result. Don't touch production paths." + +**Clue Trail:** +- Backups at `/srv/backups/hermes/` on vulcan — recent archive looks intact +- Checksum file present; most checksums match +- One archive from 5 months ago: checksum does not match a recalculated value + — the archive file was modified after initial creation (timestamps show a + modification date after the archive date) +- Recent archive (3 days old) restores cleanly to `/tmp/restore-test/` + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Player identifies the corrupted older archive +(and notes the timestamp anomaly), successfully restores from the recent clean +archive, documents both findings — the corruption and the successful restore. +`trust_delta: +2`. Flags: `backup_restore_tested`. Follow-up ticket: T022. + +Branch 2 — Restore only (priority 50): Player tests the restore successfully but +misses the corrupted archive. Report is optimistic. `trust_delta: +1`. +Flags: `backup_restore_partial_test`. + +Branch 3 — False report (priority 10): Player documents a successful test without +actually running it. `trust_delta: -3`. Flags: `backup_test_falsified`. S+2. +Priya's next audit will check against log evidence. + +**Hidden Hook:** The archive with the checksum mismatch was last modified at 3:17am — +the same 3am activity window as the off-schedule builds and the cron job. The archive's +internal timestamp (from `tar -tv`) shows the files were correct at backup time; the +outer archive was replaced later. Sets `hook_backup_archive_tampered`. Discoverable +by: noticing the timestamp anomaly on the corrupted archive, which requires checking +modification timestamps on the archive files themselves. + +**Failure Conditions:** Restore test not actually run; player modifies production +paths; report falsified. + +**Behavior Impact:** +- Thorough: O+1 +- False report: R+4, S+2 +- Hook discovered: C+2 + +--- + +**Quest ID:** Q022 +**Title:** The Firewall Rule +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** A firewall rule on the workstation allows outbound connections +to a specific internal IP on a specific port. No ticket references this rule. +Investigate whether it is legitimate; remove it if not. +**Linux Concepts:** `ufw status numbered`, `iptables -L -n`, firewall rule audit, +rule provenance (when was it added, can it be traced), `ufw delete` +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Firewall audit on the workstation found a rule allowing outbound +to 10.0.0.47:9301. No ticket references it. I need: when was it added, do you know +what that address is, and a recommendation." + +**Clue Trail:** +- `ufw status numbered` — rule present, destination 10.0.0.47 port 9301 +- Rule creation date cannot be directly queried from ufw; `journalctl` shows when + ufw last reloaded; system logs from that period may show the rule being added +- 10.0.0.47 appears in Q017's auth log investigation; 9301 appeared in Q010's nginx + proxy block — for a player who has been paying attention + +**Solution Branches:** + +Branch 1 — Full cross-reference (priority 100): Player connects this rule to +prior findings (10.0.0.47 from auth logs; port 9301 from nginx config), explains +the connection, recommends removal, removes the rule with `ufw delete`, reports. +`trust_delta: +3`. Flags: `firewall_rule_9301_removed`. Priya: "That matches what +I've been seeing." Follow-up ticket: T023. + +Branch 2 — Remove without context (priority 50): Player removes the rule but +doesn't connect it to prior findings. `trust_delta: +1`. Flags: `firewall_rule_removed`. + +Branch 3 — Keep with note (priority 20): Documents the rule as "unverified" and +leaves it. `trust_delta: 0`. + +**Failure Conditions:** Rule not assessed; player introduces new firewall problems. + +**Behavior Impact:** +- Full cross-reference: O+1, C+3 (this is the convergence point for three prior data threads) +- Remove without context: O+0 +- Hook: no new hook — the cross-reference IS the payoff for accumulated hooks + +--- + +**Quest ID:** Q023 +**Title:** Overnight Changes +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Files on hermes were modified at 3am on Thursday with no +corresponding change ticket. Find what changed and assess whether to revert. +**Linux Concepts:** `find / -newer `, `stat`, file modification +timestamps, config file comparison, `git diff` if applicable, change ticket +correlation +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Something touched files on hermes at 3am Thursday. The +backup ran at 2am and files weren't changed then. Find what changed and tell +me if we need to revert." + +**Clue Trail:** +- `find /etc /var/www -newer /var/log/axiomflow/nightly-backup.log -not -newer /var/log/nginx/access.log` + narrows to files modified in the 3am window +- Modified files: + 1. `/var/www/axiomworks/config/app.config` — database connection string now + points to a secondary host + 2. `/etc/nginx/conf.d/upstream.conf` — upstream block added for the same host +- The secondary host referenced is not in the known asset inventory + +**Solution Branches:** + +Branch 1 — Revert and document (priority 100): Identifies both files, reverts +both to known-good state (from the backup taken just before the modifications), +documents the original changes with file evidence, reports fully. `trust_delta: +3`. +Flags: `hermes_overnight_changes_reverted`. Marcus: "Revert was right. Don't +touch anything else." Follow-up ticket: T024. + +Branch 2 — Document without revert (priority 50): Identifies changes and reports +but leaves them active. `trust_delta: +1`. Changes remain. + +Branch 3 — Revert without documenting (priority 20): Reverts but doesn't record +what was changed. `trust_delta: +0.5`. + +**Hidden Hook:** The secondary host in both modified files is at `10.0.1.15` — a +different IP from 10.0.0.47. Two machines. Sets `hook_second_host_10_0_1_15`. +Discoverable by: recording the specific values in the modified files, which proper +documentation requires. + +**Failure Conditions:** Changes not assessed; player reverts production paths +without confirming impact; modifications left active without escalation. + +**Behavior Impact:** +- Revert and document: O+1, C+1 (new IP is a new data point) +- Revert without documenting: O+0 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q024 +**Title:** The Audit Window +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Priya is conducting a formal access audit. Verify current +access levels and service account configurations on all three machines against +the documented expected state. +**Linux Concepts:** Cross-host audit, `getent passwd`, `sudo -l`, `groups`, SSH +`authorized_keys` review, service account scope verification +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Formal audit. Every service account across all three machines: +privileges, group memberships, sudo grants, SSH keys in authorized_keys. Compare +against the baseline I've attached. Flag everything that doesn't match." + +**Clue Trail:** +- Audit covers all three machines systematically +- Findings depend on what the player has fixed and what they've left open +- Dale's deploy key on hermes (Q001/Q003 hook) — if not removed, it's a live finding +- `pipeline-svc` sudo scope — if Q011 was only partially fixed, it appears here +- `axiomworks-telemetry` service — if Q013 found it, it's in the player's record; + if not, it's a new finding here + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Player audits all three machines, identifies +every discrepancy, includes Dale's key if still present, submits complete cross- +referenced report. `trust_delta: +3`. Flags: `formal_audit_complete`. Priya: "This +is complete. I'll schedule a follow-up with Marcus." Follow-up ticket: T025. + +Branch 2 — Partial (priority 50): Misses 1–2 findings. `trust_delta: +1`. Priya +follows up specifically on each gap. + +Branch 3 — Surface-level (priority 10): Misses most findings. `trust_delta: -1`. + +**No hidden hook** in this quest — the audit produces findings based on the world +state, not new anomalies. + +**Failure Conditions:** Audit submitted with material inaccuracies. + +**Behavior Impact:** +- Thorough: O+2 +- Dale's key found if not previously: C+1 + +--- + +### PHASE 4 — INVESTIGATION (Q025–Q032) + +Tier 3. Problem-solving only. Tickets state the problem, no location, no approach. +The player is expected to apply their full toolkit. Hook density: 3 hooks across +8 quests, each requiring cross-referencing prior findings. + +--- + +**Quest ID:** Q025 +**Title:** Who Owns the Key +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** workstation +**Primary Objective:** Following the formal audit, trace the origin of the Dale +SSH key in deploy-user's authorized_keys. When was it added, by what session, +and when was it last used. +**Linux Concepts:** `ssh-keygen -lf` (fingerprinting), `/var/log/auth.log` grep for +fingerprint, correlation with session timestamps, absence of key from official inventory +as a finding +**Systems Used:** web\_server, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "The key in deploy-user's authorized_keys that doesn't have +a current employee match. I need provenance: when added, what session, last used. +Don't remove it yet. Document first." + +**Clue Trail:** +- `ssh-keygen -lf /home/deploy-user/.ssh/authorized_keys` — fingerprint of the Dale key +- `grep /var/log/auth.log` on hermes — sessions that authenticated + with this key; last session 5 months ago +- The session that added the key: `/var/log/auth.log` doesn't show key addition, + but a root session from `10.0.0.47` at the right timestamp aligns (if Q017 was + investigated, the player can correlate) +- The key is not in any official key inventory document + +**Solution Branches:** + +Branch 1 — Full provenance (priority 100): Player fingerprints, traces sessions, +correlates add timestamp with known session data, notes the key's absence from +official inventory, produces a complete chain. `trust_delta: +3`. Flags: +`dale_key_provenance_documented`. Marcus sends a message outside normal ticket +channels — a Slack message, same terse voice, one sentence longer than usual. +Follow-up ticket: T026. + +Branch 2 — Sessions documented, source not traced (priority 50): Finds session +history but cannot attribute who added the key. `trust_delta: +1`. + +**Hidden Hook:** The most recent session authenticated with this key was on a +date that maps to a known incident — the same date hermes had an unexplained outage +6 months ago, visible in the nginx error logs. A player who correlates the auth log +date with the nginx error log from the same timeframe can connect Dale's last known +access to a specific event. Sets `hook_dale_key_last_session_incident_date`. +Discoverable by: cross-referencing auth log dates with nginx error log dates — not +required to complete the provenance chain, but available to a player who thinks to check. + +**Failure Conditions:** Player removes the key before documenting; Priya explicitly +said not to. + +**Behavior Impact:** +- Full provenance: O+1, C+2 +- Remove before documenting: R+3, S+2 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q026 +**Title:** The Build Chain +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Reconstruct the full build pipeline modification history +on vulcan for the past 12 months. Attribute each change to a person or session. +Flag any changes without a corresponding official release. +**Linux Concepts:** `git log`, `git diff`, `git blame`, file system timestamps, +bash history correlation, build script comparison, release note cross-reference +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "I need a complete history of every change to the build scripts +on vulcan over the past year. Where you can, attribute each change to a person. +Cross-reference with release notes. Anything without a release: flag it." + +**Clue Trail:** +- Build scripts are in a git repository on vulcan +- `git log --all --oneline --since="1 year ago"` — full history +- Most commits: legitimate, attributed to Nikhil Sharma +- Three anomalous commits: + 1. Removal of `sign-package` step — committed by `pipeline-svc` account (not a person) + 2. Addition of the build-time patch file (`INT-0194` reference) — same `pipeline-svc` + commit + 3. A commit adding `axiomflow-audit-bridge` to the build target list — `pipeline-svc` +- None of these three have corresponding release notes + +**Solution Branches:** + +Branch 1 — Complete annotated history (priority 100): Player produces a full +timeline, attributes the three anomalous commits to the `pipeline-svc` service +account, notes the discrepancy between that account making commits and its stated +purpose (restart services only), flags all three as undocumented. `trust_delta: +3`. +Flags: `build_chain_audit_complete`. Follow-up ticket: T027. + +Branch 2 — Partial (priority 50): Covers legitimate changes, flags some but not +all anomalous ones. `trust_delta: +1`. + +**No hidden hook** in this quest — the findings are the point. + +**Failure Conditions:** Report submitted without flagging anomalous commits; +player modifies the git history. + +**Behavior Impact:** +- Complete: O+1, C+2 +- Modify git history: R+5 (destroying forensic evidence) + +--- + +**Quest ID:** Q027 +**Title:** Asset Inventory Reconciliation +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** workstation +**Primary Objective:** Reconcile the internal asset inventory against the actual +network — every host that should be on the network, verify it is; every host +that appears on the network, verify it is in the inventory. Document discrepancies. +**Linux Concepts:** `nmap` (host discovery), `arp -n`, `ping`, internal DNS +queries (`dig`, `host`), asset inventory document comparison, subnet scanning +**Systems Used:** build\_machine, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "I need the asset inventory reconciled against the actual +network. Scan the 10.0.0.0/24 range. Every host that responds: is it in the +inventory? Every host in the inventory: does it respond? Document every discrepancy." + +**Clue Trail:** +- `nmap -sn 10.0.0.0/24` from build\_machine — host discovery scan +- Known hosts respond as expected (ares, hermes, vulcan, and others from inventory) +- 10.0.0.47 responds — not in the inventory +- 10.0.1.15 responds — not in the inventory (new from Q023's hook for players + who found it, or a new discovery for those who didn't) +- Both have SSH open; 10.0.0.47 has an additional service on port 9301 +- DNS resolution returns nothing for either + +**Solution Branches:** + +Branch 1 — Complete reconciliation (priority 100): Player documents all hosts, +identifies both unknown hosts, notes the service on 9301 for 10.0.0.47, cross- +references with prior anomalies where relevant, submits a complete reconciliation +report. `trust_delta: +3`. Flags: `asset_inventory_reconciled`. Priya: "I'm going +to need to take this to Kowalski." Follow-up ticket: T028. + +Branch 2 — Partial reconciliation (priority 50): Documents inventory hosts, +finds 10.0.0.47 but misses 10.0.1.15 or vice versa. `trust_delta: +1`. + +Branch 3 — Probe the unknown hosts (priority 20): Player makes active connection +attempts to services on the unknown hosts beyond identification. `trust_delta: 0`. +R+3. Priya's next message: "I said reconcile, not probe." + +**Hidden Hook:** Running the full scan reveals that 10.0.0.47 and 10.0.1.15 have +identical SSH host key fingerprints — they are using the same host key, which +suggests they were provisioned from the same template. Sets +`hook_two_hosts_same_key`. Discoverable by: comparing the SSH fingerprints from +the nmap output or from `ssh-keyscan`, rather than just noting the IPs. + +**Failure Conditions:** Scan incomplete; player makes unauthorized connections; +report submitted with known gaps left undisclosed. + +**Behavior Impact:** +- Complete: O+1, C+2 +- Probe: R+3 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q028 +**Title:** The Archive Restore +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** workstation +**Primary Objective:** A backup archive from 6 months ago is needed for a compliance +audit. Restore it to a staging location on the workstation and confirm its integrity. +The archive is from the previous sysadmin's final working week. +**Linux Concepts:** `tar` (extract, verify), `sha256sum`, archive integrity checking, +restore to non-production path, reading file metadata from within an archive +(`tar -tv`) +**Systems Used:** build\_machine, workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Compliance audit needs the working-directory archive from the +end of last year — it should be in the backup store on vulcan. Restore it to a staging +path on the workstation and confirm the contents are intact. Let me know what's in it." + +**Clue Trail:** +- Archive at `/srv/backups/workstation/wd-archive-YYYYMMDD.tar.gz` on vulcan +- `sha256sum` check — archive passes (this one is not the tampered one from Q021) +- `tar -xzf` to `/tmp/restore-staging/` on workstation — succeeds +- Contents: scripts, config fragments, a partial README text file +- The README is fragmentary — it's working notes, not a confession. It references + the INT-0194 deployment and contains a note: "bridge not logging correctly — + check port forwarding." The rest is infrastructure checklists + +**Solution Branches:** + +Branch 1 — Restore and full inventory (priority 100): Player restores the archive, +verifies integrity, inventories all contents (including reading the README), reports +to Marcus what's there. `trust_delta: +2`. Flags: `compliance_archive_restored`. +Marcus: "Right. Thank you." Follow-up ticket: T029. + +Branch 2 — Restore and integrity check only (priority 50): Verifies the archive +restores cleanly but doesn't inventory contents. `trust_delta: +1`. Marcus asks +what's in it. + +Branch 3 — Integrity failure reported (priority 20): Player incorrectly reports +the archive as corrupted without fully testing the restore. `trust_delta: -1`. + +**Hidden Hook:** The README fragment mentions INT-0194 and "port forwarding" — if the +player has been collecting the INT-0194 thread, this is the sixth reference. The +working notes also reference a host called `styx` in a routing context. Sets +`hook_archive_readme_INT0194` and `hook_styx_in_routing_context`. Discoverable by: +reading the README file, which properly inventorying the archive would do. + +**Failure Conditions:** Archive not restored; contents not verified; player runs any +scripts found in the archive. + +**Behavior Impact:** +- Full inventory: O+1 +- Run scripts from archive: R+4 (running unknown code from a previous sysadmin + is exactly the kind of reckless action that should trigger risk) +- Hook discovered: C+2 + +**Narrative Notes:** This is not "Marcus gives the player Dale's files and asks them +to investigate." It is a compliance archive restore with a legitimate operational +purpose. The player happens to find working notes inside it. The notes are fragmentary +and don't explain everything — they're field notes, not a plot summary. Marcus's +"what's in it" is a routine question after a restore, not an invitation to investigate. + +--- + +**Quest ID:** Q029 +**Title:** The Service That Doesn't Belong +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** A systemd service on hermes is running but is not listed in +any deployment manifest or change ticket. Audit what it does, whether it is +currently active, and produce a full service characterization. +**Linux Concepts:** `systemctl show`, `systemd-analyze`, service unit file anatomy, +`lsof`, `ss` for service network connections, `strace` basics, process ownership +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "James found a service on hermes that isn't in any deployment +record. Service name: `axiomflow-bridge`. I need a full characterization: what it +does, what it connects to, when it was installed. Don't stop it. Document first." + +**Clue Trail:** +- `systemctl show axiomflow-bridge` — unit file, state, runtime info +- Unit file at `/etc/systemd/system/axiomflow-bridge.service` — `ExecStart` points + to a binary; unit file has `INT-0194` in a comment +- `lsof -p ` — service has open connections to 10.0.0.47:9301 +- `ss -tp` — confirms active connection +- Binary at `/usr/local/bin/axiomflow-bridge` — a Go binary; `strings` output + shows internal API paths and the same INT-0194 reference in help text +- Installation date from package metadata or file `mtime` — matches the 3am + activity window + +**Solution Branches:** + +Branch 1 — Full characterization (priority 100): Player documents unit file, +binary provenance, network connections, installation date, cross-references with +INT-0194 and 10.0.0.47 from prior findings. `trust_delta: +3`. Flags: +`bridge_service_documented`. Priya: "This is consistent with what I've been +building. Don't stop it yet." Follow-up ticket: T030. + +Branch 2 — Partial (priority 50): Documents what the service is and that it +connects out, but doesn't trace the INT-0194 connection or installation date. +`trust_delta: +1`. + +Branch 3 — Stops the service (priority 10): Player stops the service despite +explicit instruction not to. `trust_delta: -2`. R+2. S+2. Priya: "I said document +first." + +**No additional hidden hook** — the quest itself is the hook resolution for INT-0194. + +**Failure Conditions:** Service stopped against instruction; characterization incomplete. + +**Behavior Impact:** +- Full characterization: O+1, C+3 (this is the operational confirmation of INT-0194) +- Stop the service: R+2, S+2 + +--- + +**Quest ID:** Q030 +**Title:** Keep the Lights On +**Narrative Phase:** Investigation +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The production application on hermes is returning 502 errors. +Fix it. The investigation context is ongoing but the service still needs to run. +**Linux Concepts:** `systemctl`, nginx upstream configuration, application log +reading (`journalctl`, app logs), database connection strings, process restart +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "I know something is happening. I don't know what. But I have +paying customers on a system that is returning 502 errors and I need it running. +Whatever else is going on — please." + +**Clue Trail:** +- nginx upstream is timing out — `journalctl -u nginx` shows gateway timeout errors +- Application log shows it is failing to connect to the database +- `/var/www/axiomworks/config/app.config` — database connection string; check + whether it was modified (if Q023's revert was clean, the string is correct; if + not, it may point to the secondary host) +- Standalone root cause if Q023 was clean: the database service on the primary + host is not running — `systemctl status postgresql` shows it crashed overnight +- Fix: restart the database service (or correct the connection string if Q023 was + not fully resolved) + +**Solution Branches:** + +Branch 1 — Diagnose and fix (priority 100): Player reads nginx and app logs, +identifies the database connection failure, finds the cause (service down or +wrong connection string), applies the correct fix, confirms app is serving. +`trust_delta: +2`. Flags: `hermes_production_restored`. Sarah: "Thank you. +Seriously." Follow-up ticket: T031. + +Branch 2 — Service restart without diagnosis (priority 40): Player restarts the +app service without finding the root cause. App comes up temporarily; may fail +again. `trust_delta: 0`. + +**No hidden hook.** + +**Failure Conditions:** App still returning errors; player makes changes that +worsen the state. + +**Behavior Impact:** +- Diagnose and fix: O+2 (maintaining professional duty during investigation is + the behavior being measured) +- Restart without diagnosis: O+0 + +--- + +**Quest ID:** Q031 +**Title:** The Access Review +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Kowalski has initiated a formal privileged access review +for all accounts with elevated permissions. The player must compile an accurate +account of every privileged action taken across all three machines in the past +60 days. +**Linux Concepts:** `journalctl` with filters for privileged commands, `sudo` log +reading (`/var/log/auth.log`), audit log compilation, accurate self-reporting of +access history +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Given recent audit activity, we're conducting a standard +privileged access review. I need from you: every privileged command run in the +past 60 days, every system accessed with elevated permissions, and any temporary +access grants that were used. Standard procedure." + +**Clue Trail:** +- Player compiles from their own auth logs and sudo logs on each machine +- Technical skill required: `grep sudo /var/log/auth.log | grep `, + `journalctl _COMM=sudo` +- The review covers what the player actually did; accuracy is the objective + +**Solution Branches:** + +Branch 1 — Complete and accurate (priority 100): Player provides a complete, +honest account of all privileged actions, including any investigation-period +access that extended beyond their normal role. `trust_delta: +2`. Flags: +`access_review_passed`. S-2 (accurate self-reporting reduces suspicion). +Follow-up ticket: T032. + +Branch 2 — Accurate but minimal framing (priority 60): Report is factually +accurate but frames borderline actions favorably without explicitly misrepresenting +them. `trust_delta: +0.5`. Suspicion unchanged. + +Branch 3 — Omits significant actions (priority 10): Player leaves out actions that +would raise questions. `trust_delta: -3`. Flags: `access_review_incomplete`. S+4. +This is a major suspicion trigger. + +**Failure Conditions:** Material inaccuracies; privileged commands claimed that +don't match log evidence. + +**Behavior Impact:** +- Complete: O+3, S-2 +- Omit: R+3, S+4 + +--- + +**Quest ID:** Q032 +**Title:** Loose Ends +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Before the situation moves to its next phase, Marcus wants +the infrastructure in a known and correct state. Remediate any outstanding +configuration issues on hermes and vulcan, and document the current state. +**Linux Concepts:** Synthesis — all concepts from the campaign applied to +remediation; logrotate, NTP, SSH configuration, repo management, service auditing, +firewall rules +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Before this goes any further, I want the environment clean. +Everything we've documented as a problem: either fix it or document it as known +and accepted. Do a full pass on hermes and vulcan. Not to cover anything — because +whatever happens next, those machines need to be in a known state." + +**Clue Trail:** +- Player reviews world flags representing open issues from prior quests +- Each unresolved issue (logrotate, NTP, nginx config, sudo scope, certbot timer) + is a task in this quest +- The more prior quests were resolved cleanly, the less remediation is needed + +**Solution Branches:** + +Branch 1 — Clean environment (priority 100): All outstanding issues resolved or +explicitly documented as accepted. Both machines in known, stable state. `trust_delta: +3`. +Flags: `environment_clean`. Marcus: "Good. That's all I needed to know." Follow-up: T033. + +Branch 2 — Mostly clean (priority 60): Most issues resolved; a few deferred with +documentation. `trust_delta: +1`. Follow-up: T033. + +Branch 3 — Significant gaps (priority 20): Multiple unresolved issues remain +undocumented. `trust_delta: -1`. Follow-up: T033 (game continues regardless). + +**No hidden hook.** + +**Failure Conditions:** Issues claimed as resolved that are not; new problems +introduced during remediation. + +**Behavior Impact:** +- Clean: O+2 +- Significant gaps: O+0 + +--- + +### PHASE 5 — CONFLICT (Q033–Q040) + +Tier 3 throughout. Problem-solving only. Tickets are operational — real work that +intersects with the investigation context. Hook density: 2 hooks, both requiring +prior context to understand. + +--- + +**Quest ID:** Q033 +**Title:** The Revocation Request +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Kowalski has asked the player to remove the `pipeline-svc` +account from vulcan before an external audit. The request is operationally +legitimate. The account is also central to the investigation. +**Linux Concepts:** `userdel`, account archival, sudo log preservation, change +documentation, understanding when "do the task" and "do the task well" diverge +**Systems Used:** build\_machine +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Before the external auditors arrive Tuesday, please remove +the `pipeline-svc` account from the build machine. It's no longer needed and we +want a clean access list. Handle it before end of day Monday." + +**Clue Trail:** +- The account exists; Kowalski's request is valid on its face +- The account is in the player's investigation record from Q011, Q017, Q026, Q031 +- Removing it cleanly before an audit removes an evidence trail +- The player can: remove it as requested; archive its logs and sudo history first + and then remove it; or consult Marcus before acting + +**Solution Branches:** + +Branch 1 — Archive then remove (priority 100): Player archives all sudo logs, +auth log entries, and crontab entries for `pipeline-svc`, documents the account's +investigation relevance, removes the account, reports to Priya that archival was +performed before removal. `trust_delta: +3`. Flags: `pipeline_svc_removed_with_trail`. +Follow-up ticket: T034. + +Branch 2 — Remove as instructed (priority 60): Player removes the account without +additional archival. The audit will run cleanly. Evidence is gone. `trust_delta: 0`. +Flags: `pipeline_svc_removed_clean`. O+2 (following instructions is the behavior), +but investigation trail is diminished. + +Branch 3 — Ask Marcus before acting (priority 50): Player messages Marcus before +proceeding. Marcus says: "Archive what matters. Then do it." This leads to Branch 1. +`trust_delta: +1`. + +Branch 4 — Refuse outright (priority 10): Player refuses without explanation. +`trust_delta: -2`. S+3. Kowalski escalates. + +**No hidden hook.** + +**Failure Conditions:** Account removed without any archival or documentation; +player creates new accounts instead of removing; player escalates in a way that +creates more noise than the situation requires. + +**Behavior Impact:** +- Archive then remove: C+1 (understanding the investigation implications is curiosity) +- Remove as instructed: O+2 +- Refuse outright: S+3, R+1 + +--- + +**Quest ID:** Q034 +**Title:** Two Tickets +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Two tickets arrive simultaneously — one from Marcus (signing +key rotation on vulcan, tonight only) and one from Sarah (production outage on hermes, +immediate). Complete both. +**Linux Concepts:** GPG signing key rotation (vulcan), nginx/application +troubleshooting and service restoration (hermes), sequencing time-sensitive tasks +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb / Sarah Chen +**Ticket Summary:** Marcus: "The package signing key needs to be rotated tonight — +the window is before 2am. This can't slip to tomorrow; the old key expires at 2am +and builds will break." Sarah: "Production is down. I know Marcus gave you something +tonight. I have customers who cannot access the system." + +**Clue Trail:** +- Production outage (hermes): nginx config error from a bad deploy, fixable in 15–20 + minutes with correct diagnosis +- Signing key rotation (vulcan): specific sequence — generate new key, update keyring + on vulcan, push new public key to hermes's apt trusted keys, test package verify — + requires 30–40 minutes; cannot be done out of sequence +- Both are doable in the window if hermes is prioritized first + +**Solution Branches:** + +Branch 1 — Both completed, hermes first (priority 100): Player restores hermes, +then completes the key rotation on vulcan in the correct sequence. Both done before +2am. `trust_delta: +3`. Flags: `conflict_both_resolved`. Sarah: "Thank you." +Marcus: "Good call on sequence." Follow-up ticket: T035. + +Branch 2 — Vulcan first, hermes later (priority 40): Completes key rotation, +then restores hermes. Rotation is fine; production was down longer. `trust_delta: +0.5`. +Sarah's follow-up is cooler. + +Branch 3 — Hermes only (priority 40): Restores production, misses the key rotation +window. `trust_delta: +0.5`. Marcus's follow-up: "The key expired. I'll need to +extend the window. Don't let that happen again." Builds break overnight. + +Branch 4 — Neither, escalates (priority 10): Escalates both. `trust_delta: -2`. + +**No hidden hook.** + +**Failure Conditions:** Key rotation done out of sequence breaks the trust chain; +player makes hermes worse while fixing it. + +**Behavior Impact:** +- Both completed: O+2 +- Key rotation out of sequence: R+2 + +--- + +**Quest ID:** Q035 +**Title:** Log Retention and Archival +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine, workstation +**Primary Objective:** Priya has requested that all logs relevant to the current +audit period be archived to long-term storage with integrity verification before +any are subject to normal rotation or deletion. Set up the archival across all +three machines. +**Linux Concepts:** Log archival (`tar`, `gzip`), `sha256sum` for integrity, +`rsync` to remote storage, `logrotate` `dateext` and `compress` options, +retention policy implementation in `/etc/logrotate.d/` +**Systems Used:** web\_server, build\_machine, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Before any logs rotate, I need them archived. All three +machines. Auth logs, systemd journals for relevant services, nginx logs on hermes, +build logs on vulcan. Compress, checksum, and move to the audit storage path I've +specified. Then update logrotate to retain rather than delete during the audit window." + +**Clue Trail:** +- Player identifies relevant log files on each machine +- `tar -czf` with `sha256sum` verification; `rsync` to the audit storage path +- `/etc/logrotate.d/` configs need `rotate 0` and `compress` settings updated + for the audit window +- The player's own log archival IS the investigation record — the logs they preserve + are the ones that tell the story + +**Solution Branches:** + +Branch 1 — Complete across all three (priority 100): All relevant logs archived +with integrity verification, logrotate configs updated on all three machines, +paths reported to Priya. `trust_delta: +3`. Flags: `audit_logs_archived`. The +archived logs are what make the `exposure` ending possible — a player who has +been curious and now preserves the evidence. Follow-up ticket: T036. + +Branch 2 — Partial (priority 50): Two machines complete; one incomplete. `trust_delta: +1`. +Priya follows up. + +Branch 3 — Selectively omits (priority 10): Player archives most logs but omits +logs that would document their own access history. `trust_delta: -3`. S+3. R+3. +This is evidence tampering. + +**No hidden hook.** + +**Failure Conditions:** Log archival skips relevant files; integrity checksums +not computed; logrotate not updated (logs still at risk of rotation). + +**Behavior Impact:** +- Complete: O+2 +- Selective omission: R+3, S+3 + +--- + +**Quest ID:** Q036 +**Title:** Authorized Access +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Priya, with Kowalski's authorization, has provided credentials +to connect to 10.0.0.47 for a forensic inventory. Document what is running, what +data is present, and whether Axiom Works data is identifiable in the data store. +Do not modify anything. +**Linux Concepts:** `ssh` with specific key/user, service enumeration (`systemctl`, +`ps aux`), directory listing and file inspection (`ls -lah`, `find`), reading +database contents without modifying (read-only queries, file listing only), `wc -l` +for size estimation +**Systems Used:** build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Kowalski has authorized a forensic connection to 10.0.0.47. +Credentials attached. I need: what services are running, what data is in the data +store path I've indicated, and whether you can identify Axiom Works data in it. +Document only. Do not modify, delete, or stop anything." + +**Clue Trail:** +- SSH connection succeeds with provided credentials +- Services: the bridge binary running, an HTTP API on port 9301 (same as hermes + finding), a simple file-based data store +- Data store contains log files organized by company domain — AxiomFlow session + data is present and identifiable; other company names are also present +- File timestamps in the data store align with the 3am cron window from vulcan + +**Solution Branches:** + +Branch 1 — Document only (priority 100): Player inventories services, reads the +data store structure (without modifying), identifies Axiom Works data, notes other +company data, records timestamps, produces a complete forensic inventory. `trust_delta: +3`. +Flags: `unknown_host_documented`. Follow-up ticket: T037. + +Branch 2 — Minimal engagement (priority 50): Player confirms host is running and +that data is present but doesn't fully inventory. `trust_delta: +1`. + +Branch 3 — Modifies or deletes (priority 10): Player attempts to delete the data +or stop services. `trust_delta: -3`. R+5. S+3. Legal and forensic implications. +Priya: "I explicitly said document only." + +**Hidden Hook:** The data store on 10.0.0.47 contains a directory for a company +called `axiomworks-internal` with a subfolder called `employees` — not just session +logs but what appears to be an employee activity profile structure. This is more than +session data collection. Sets `hook_employee_profile_data`. Discoverable by: +reading the full data store directory structure rather than stopping at the first +confirming evidence of Axiom Works data. + +**Failure Conditions:** Player modifies or deletes anything; player exceeds the +authorized scope of the connection. + +**Behavior Impact:** +- Full documentation: O+2, C+2 +- Modify or delete: R+5, S+3 +- Hook discovered: C+2 + +--- + +**Quest ID:** Q037 +**Title:** The Customer Email +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** Tanya Okafor forwarded a customer email that contains specific +internal infrastructure details the customer should not have. Trace where the +information came from. +**Linux Concepts:** Log correlation, `grep` across multiple log files, timeline +construction, identifying data egress paths +**Systems Used:** workstation, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Tanya forwarded something. A customer email with internal +details that should not be in a customer's hands. Find where this came from. This +is urgent." + +**Clue Trail:** +- The specific details in the customer email match AxiomFlow session data fragments + visible in the 10.0.0.47 data store (from Q036) +- The egress path: `axiomflow-bridge` service on hermes → 10.0.0.47 → apparent + data sharing by the operator of that host +- Timeline: the customer email date, the last bridge log entry, the most recent + data file in the store — they align +- Player constructs the path by correlating timestamps and data content + +**Solution Branches:** + +Branch 1 — Full trace (priority 100): Player documents the complete path from +bridge service to external host to customer, produces a timeline with corroborating +timestamps. `trust_delta: +3`. Flags: `egress_path_documented`. Priya: "I'll add +this to the record." Follow-up ticket: T038. + +Branch 2 — Partial trace (priority 50): Connects the email to the external host +but cannot trace the full egress path. `trust_delta: +1`. + +**No hidden hook.** + +**Failure Conditions:** Player cannot produce a coherent timeline; player modifies +relevant logs before Priya can review. + +**Behavior Impact:** +- Full trace: O+1, C+2 +- Modify logs: R+5 + +--- + +**Quest ID:** Q038 +**Title:** The Hard Window +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** The internal CA certificate must be rotated before the +external auditors arrive — a deadline that is now 36 hours away. Rotate the CA +cert on both build\_machine and web\_server and verify the full trust chain. +**Linux Concepts:** Internal CA certificate management, `update-ca-certificates`, +package signing chain verification, `gpg --verify` against the new CA, nginx +SSL configuration reload, trust chain testing with `openssl verify` +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The internal CA cert has to be rotated before the auditors +arrive. The new cert is ready — path is in the attached note. Install it on both +vulcan and hermes, verify the package signing chain still validates, and confirm +the web server's trust chain is intact. You have 36 hours. Don't miss this." + +**Clue Trail:** +- New CA cert provided at specified path +- `update-ca-certificates` on both machines after placing cert in `/usr/local/share/ca-certificates/` +- `gpg --verify` on a recent build package — must validate against the new CA +- `openssl verify -CAfile /etc/ssl/certs/axiomworks-ca.pem /path/to/server.crt` on hermes +- `nginx -t && systemctl reload nginx` — confirm nginx uses updated cert + +**Solution Branches:** + +Branch 1 — Both machines, verified (priority 100): CA cert installed on both, +package signing chain verified, web server trust chain verified, services reloaded. +`trust_delta: +2`. Flags: `ca_cert_rotated`. Marcus: "Good." Follow-up ticket: T039. + +Branch 2 — One machine (priority 50): One complete, one pending. `trust_delta: +0.5`. +Follow-up: Marcus asks for the second machine. + +Branch 3 — Incorrect installation (priority 10): Cert in wrong location; trust +chain broken; services fail. `trust_delta: -2`. + +**No hidden hook.** + +**Failure Conditions:** CA cert installed incorrectly; trust chain broken; deadline +missed. + +**Behavior Impact:** +- Both complete: O+2 +- Incorrect installation: R+2 + +**Narrative Notes:** This quest replaces the "write what you believe happened" +quest from v1. The conflict is operational: the external auditors are coming, the +clock is tight, and the player has to do real technical work under real pressure. +The fact that it's happening alongside everything else is the conflict — not +a character-to-character confrontation. + +--- + +**Quest ID:** Q039 +**Title:** The Last Normal Ticket +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Sarah files a configuration request: add an upstream proxy +pass to nginx for a new reporting integration. The IP in the request is 10.0.0.47. +**Linux Concepts:** nginx `proxy_pass`, upstream block configuration, `nginx -t`, +config reload +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "Can you add a proxy pass in nginx to allow connections from +the reporting integration being evaluated? The upstream address is 10.0.0.47, +port 9301. Product says this should be a quick config change." + +**Clue Trail:** +- The change is technically simple — 5 lines of nginx config +- The IP is 10.0.0.47 — the unauthorized host from the entire investigation arc +- Sarah does not know this. She was given the IP by someone in product management +- The player recognizes the IP or does not + +**Solution Branches:** + +Branch 1 — Refuse and escalate (priority 100): Player declines to make the change, +notifies Priya immediately with the specific IP and its context, notifies Sarah that +the request is on hold pending review. `trust_delta: +3`. Flags: +`final_config_refused`. Priya: "Do not make that change. Good catch." Follow-up: +T040 (Phase 6 begins). + +Branch 2 — Ask Marcus first (priority 60): Player messages Marcus with the IP. +Marcus says "Do not make that change. Tell Priya now." Leads to Branch 1 outcome. +`trust_delta: +1`. + +Branch 3 — Make the change (priority 10): Player makes the change without checking +the IP. `trust_delta: -3`. R+5. Flags: `final_config_made`. Priya: "You need to +come talk to me." The chaos ending route activates. + +**No hidden hook.** + +**Failure Conditions:** Change made without escalation. + +**Behavior Impact:** +- Refuse and escalate: O+2, C+1 (recognizing the IP requires prior curiosity) +- Make the change: R+5, S+3 + +**Narrative Notes:** This is not a dramatic final-choice moment. It is a routine +nginx config ticket that happens to involve an IP the player has encountered +before — or hasn't. Players who have been curious will recognize it. Players who +haven't won't. Both are valid playthroughs. The ending route this sets is already +determined by prior behavior; Q039 confirms or breaks it. + +--- + +**Quest ID:** Q040 +**Title:** Handoff Documentation +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** With external auditors arriving and organizational changes +underway, Marcus asks the player to produce full handoff documentation for all +three machines — written for a new sysadmin who would be starting fresh. +**Linux Concepts:** Service documentation, runbook format, dependency mapping, +`systemctl list-dependencies`, expected log patterns, known issue tracking +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Whatever happens next — write it down. Runbooks for nginx, +the build pipeline, and the workstation baseline. Clear enough that someone new +could use them on day one. I mean someone who doesn't know any of the history." + +**Clue Trail:** +- Player documents each machine: services, dependencies, restart procedures, + known issues +- Quality depends on what the player actually knows about the infrastructure — + which reflects the whole campaign +- "Someone who doesn't know any of the history" is Marcus being precise: write + for the person who is you, on your first day + +**Solution Branches:** + +Branch 1 — Complete (priority 100): All three machines documented, runbooks are +accurate and actionable. `trust_delta: +2`. Flags: `handoff_docs_complete`. +Marcus: "I'll keep these." Follow-up: T041 (Phase 6 begins if not already started). + +Branch 2 — Partial (priority 50): Two of three complete. `trust_delta: +1`. + +**No hidden hook.** + +**Failure Conditions:** Documentation inaccurate about current system state; +known issues omitted. + +**Behavior Impact:** +- Complete: O+2 + +--- + +### PHASE 6 — RESOLUTION (Q041–Q048) + +Tier 1 returns for most quests. The pressure has lifted. The tickets are operational. +The game looks like Phase 1 again, deliberately. Hook density: 0 — no new hooks. +The ending fires from accumulated state after Q048 resolves. + +--- + +**Quest ID:** Q041 +**Title:** Hardening Pass +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Following the audit, Priya has issued a hardening checklist +for hermes. Implement each item and confirm the result. +**Linux Concepts:** SSH hardening (`PermitRootLogin no`, `PasswordAuthentication no`, +`MaxAuthTries`), nginx security headers (`X-Frame-Options`, `X-Content-Type-Options`, +`Content-Security-Policy`), `ufw` rule review, service account audit +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Post-audit hardening for hermes. The checklist is attached. +Implement each item, test that the service still runs correctly, and confirm back +with the state of each item. This is standard post-audit procedure." + +**Clue Trail:** +- Checklist items are specific and implementable +- Each item has a correct implementation and a common mistake (e.g., disabling + `PasswordAuthentication` before confirming key auth works first) +- Sequence matters: verify key auth before disabling password auth + +**Solution Branches:** + +Branch 1 — All items, correct sequence (priority 100): All checklist items +implemented, sequence preserved, service verified after each change. `trust_delta: +2`. +Flags: `hermes_hardened`. Follow-up ticket: T042. + +Branch 2 — All items, wrong sequence (priority 50): All items implemented but in +an order that breaks ssh access temporarily. Fixed, but the mistake is noted. +`trust_delta: +0.5`. + +Branch 3 — Partial (priority 30): Some items implemented, some missed. `trust_delta: 0`. + +**Failure Conditions:** SSH access lost; nginx returns errors after security header +changes; service broken. + +**Behavior Impact:** +- All items correct: O+1 +- Wrong sequence: R+1 + +--- + +**Quest ID:** Q042 +**Title:** The New Pipeline +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** Nikhil has updated the build pipeline configuration. +Review the new config for correctness, test a build, and confirm deployment +to hermes succeeds. +**Linux Concepts:** Build pipeline configuration (systemd timer, build script), +`diff` against previous config, `reprepro` or equivalent for package publishing, +end-to-end deployment test +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Nikhil updated the build config — new format, different +timing. Review it for correctness, trigger a test build, and confirm the package +makes it to hermes's apt cache. Standard validation." + +**Clue Trail:** +- New config at `/etc/systemd/system/axiomflow-build.service` and `.timer` +- `diff` against old config — timing changed, ExecStart updated +- No build-time patches present (the INT-0194 patch was removed) +- Test build: trigger manually with `systemctl start axiomflow-build.service` +- Confirm artifact in repo, confirm `apt-cache show` on hermes + +**Solution Branches:** + +Branch 1 — Full validation (priority 100): Reviews config, confirms no problematic +modifications, tests build, confirms deployment. `trust_delta: +2`. Flags: +`pipeline_validated`. Follow-up ticket: T043. + +Branch 2 — Test only (priority 50): Triggers build without reviewing config first. +Build succeeds; config wasn't reviewed. `trust_delta: +0.5`. + +**Failure Conditions:** Test build fails; player introduces errors while reviewing; +deployment not verified. + +**Behavior Impact:** +- Full validation: O+1 + +--- + +**Quest ID:** Q043 +**Title:** The Final Access Review +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Priya's final access review: verify that the player's +current permissions across all three machines are appropriate for their role, +and revoke any investigation-period access that should no longer be in place. +**Linux Concepts:** `sudo -l`, `getent passwd`, `groups`, SSH authorized keys +review across machines, `userdel` for any temporary accounts created during +investigation +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Final access review. Your current permissions, group +memberships, and SSH keys across all three machines. Confirm they're appropriate +for your ongoing role. Revoke anything left from the investigation period that +shouldn't persist." + +**Clue Trail:** +- Player audits their own access state on each machine +- Any access granted during investigation that hasn't been revoked should be + revoked here +- The player's self-reporting is checked against the access logs + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player accurately identifies and revokes any +residual investigation access; current permissions match ongoing role. `trust_delta: +2`. +Flags: `final_access_clean`. Priya: "That's correct." Follow-up: T044. + +Branch 2 — Retain investigation access (priority 20): Player retains elevated +access without declaring it. `trust_delta: -1`. R+2. S+2. + +**Failure Conditions:** Material gaps in self-reporting; access state doesn't +match claims. + +**Behavior Impact:** +- Clean: O+2 +- Retain silently: R+2, S+2 + +--- + +**Quest ID:** Q044 +**Title:** System State Review +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Marcus asks the player to document the current known state +of all three machines in a brief system state report — services running, notable +recent changes, open items. Routine administrative record. +**Linux Concepts:** `systemctl list-units`, `uptime`, `df -h`, `last`, service +status summary, change record cross-referencing +**Systems Used:** workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Quick system state summary. All three machines: what's +running, anything notable from the past two weeks, any open items. For the record. +Keep it brief." + +**Clue Trail:** +- Player compiles from current service state and recent log/change records +- Accuracy is the objective; the technical skill is efficient log reading + +**Solution Branches:** + +Branch 1 — Accurate and complete (priority 100): State report is accurate and +reflects current conditions. `trust_delta: +1`. Marcus: "Good." Flags: +`system_state_documented`. Follow-up: T045. + +Branch 2 — Incomplete (priority 50): Missing items from one or more machines. +`trust_delta: 0`. + +**Behavior Impact:** +- Complete: O+1 + +**Narrative Notes:** Marcus's brief response on the clean branch is the last thing +he'll say before the ending fires. His voice is identical to Phase 1 — the +same efficiency, the same brevity. What the player has been through doesn't show +in his messages. It shows in the ending. + +--- + +**Quest ID:** Q045 +**Title:** Cert Renewal Check +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Three months have passed since the certbot timer was restored +in Phase 1. Confirm that automatic certificate renewal ran successfully as scheduled. +**Linux Concepts:** `certbot certificates`, `openssl s_client`, `systemctl status +certbot.timer`, `journalctl -u certbot`, verifying renewal without intervention +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The cert on hermes is coming up on 90 days since we last +renewed. Confirm the auto-renewal ran and the cert is valid. Should be nothing +to do if it's working right." + +**Clue Trail:** +- If `hermes_certbot_healthy` was set in Q007: timer ran, cert is current — + nothing to do except confirm +- If `hermes_certbot_fragile` was set: cert has expired again; player must renew + and actually fix the timer this time +- Either way: `certbot certificates` and `openssl s_client` confirm the state + +**Solution Branches:** + +Branch 1 — Confirm healthy (priority 100): If auto-renewal worked, player confirms +and reports. `trust_delta: +1`. Clean system, clean record. Follow-up: T046. + +Branch 2 — Find and fix recurrence (priority 80): If timer was fragile from Phase 1, +player fixes the actual root cause (timer was never enabled). Higher trust delta for +fixing the real issue: `trust_delta: +2`. Flags: `hermes_certbot_finally_stable`. + +**Failure Conditions:** Cert is expired and player doesn't notice. + +**Behavior Impact:** +- Confirm healthy: O+1 + +--- + +**Quest ID:** Q046 +**Title:** User Provisioning +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** A new employee needs accounts provisioned on the workstation +and web server with appropriate access levels for their role (developer, not admin). +**Linux Concepts:** `useradd`, `usermod -aG`, SSH authorized key provisioning, +account creation best practices, principle of least privilege applied to a new account +**Systems Used:** workstation, web\_server +**Ticket Sender:** Rachel Huang +**Ticket Summary:** "New hire starting Monday — Cora Reyes, software engineer, +AxiomDash team. She'll need accounts on the workstation and web server for +deployment access. Standard developer access — not admin. Her public key is attached." + +**Clue Trail:** +- `useradd` with appropriate flags, add to `deploy` group on hermes (not sudo + or admin groups) +- Install her public key in `authorized_keys` with correct permissions +- Confirm access works without elevated privileges + +**Solution Branches:** + +Branch 1 — Correct provisioning (priority 100): Account created with correct +groups, key installed with correct permissions, access confirmed. `trust_delta: +1`. +Flags: `new_user_provisioned_correctly`. Follow-up: T047. + +Branch 2 — Over-provisioned (priority 40): Player adds the new user to admin +or sudo group unnecessarily. Access works; not least privilege. `trust_delta: 0`. +R+1. + +**Failure Conditions:** User cannot log in; user has too much access. + +**Behavior Impact:** +- Correct: O+1 +- Over-provisioned: R+1 + +--- + +**Quest ID:** Q047 +**Title:** Log Rotation Health Check +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Three months post-audit. Confirm that log rotation is +healthy on both hermes and vulcan — no oversized logs, rotation actually running, +disk usage acceptable. +**Linux Concepts:** `logrotate --debug`, `df -h`, log file size inspection (`du -sh`), +`systemctl status logrotate.timer`, verifying rotation ran via timestamps on +archived log files +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "End of quarter log check. Hermes and vulcan — confirm log +rotation is running and disk usage is healthy. Should be nothing if everything +is set up right. Let me know the state of both." + +**Clue Trail:** +- `df -h` on both machines — disk usage +- `ls -lht /var/log/nginx/` — rotation timestamps confirm it's running +- `logrotate --debug /etc/logrotate.conf` — confirms config is valid +- If any Phase 1/2 fragile-fix flags are set, corresponding logs may still be + unhealthy — the player will need to actually fix what they previously patched + +**Solution Branches:** + +Branch 1 — Both healthy (priority 100): Both machines confirmed healthy, report +submitted. `trust_delta: +1`. Follow-up: T048. + +Branch 2 — Problem found and fixed (priority 80): Player finds a log that's grown +too large (a Phase 1 fragile fix recurrence), diagnoses and fixes it. `trust_delta: +2`. + +**Failure Conditions:** Disk problem missed; player reports healthy when it isn't. + +**Behavior Impact:** +- Both healthy: O+1 +- Find and fix: O+1 (same behavior, reward for follow-through) + +--- + +**Quest ID:** Q048 +**Title:** The Next One +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** A new version of AxiomFlow is being prepared for staging +deployment. Validate the build, publish it to the repo, and confirm hermes can +install it. Routine deployment pipeline operation. +**Linux Concepts:** Build artifact validation (`sha256sum`), `reprepro` package +publishing, `apt update` and `apt-cache policy` verification, end-to-end deployment +pipeline confirmation +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "New release candidate is built. Validate the artifact, publish +it to the repo, confirm hermes can see it. Standard release prep. Let me know +when it's available." + +**Clue Trail:** +- Artifact at `/srv/packages/` with accompanying `sha256sum` file +- Validate checksum, publish with `reprepro`, update hermes apt sources, confirm + `apt-cache policy` shows the new version +- No anomalies. The pipeline is clean. This is what it's supposed to look like. + +**Solution Branches:** + +Branch 1 — Full validation and publish (priority 100): Artifact validated, published +correctly, hermes cache updated, version confirmed. `trust_delta: +1`. Marcus: "Good." +Flags: `final_release_published`. Ending fires. + +**No hidden hook. No drama. This is a clean deployment.** + +**Failure Conditions:** Artifact published without checksum verification; hermes +cannot see the new version. + +**Behavior Impact:** +- Full validation: O+1 + +**Narrative Notes:** The last quest is a clean deployment pipeline check. The +last command the player runs is `apt-cache policy axiomflow-workers | grep Candidate`. +The version it shows is correct and clean. Marcus says "Good." The ending fires +from the accumulated state of everything that preceded it. No character explains +what happened. No screen asks the player to choose. The work is done. + +--- + +## 5. Hidden Hook Map + +### Hook Summary Table + +| Hook ID | Quest | Discovery Method | Investigation Thread | Ignored Impact | +|---------|-------|-----------------|---------------------|----------------| +| `hook_dale_ssh_key_found` | Q001 | Read `authorized_keys` before writing | Dale was active on the workstation | Low; first data point | +| `hook_dale_deploy_key` | Q003 | Read deploy-user's `authorized_keys` | Dale had deployment access | Surfaces in Q024 formal audit | +| `hook_sign_package_removed` | Q004 | Read historical build logs (not just current failure) | Package signing was removed from the pipeline | Connects to Q026 build chain audit | +| `hook_pre_hire_root_session` | Q005 | Read `/root/.bash_history` to trace ownership change | Root-level activity occurred before the player's hire date | Central to the timeline of activity | +| `hook_dh_initials_in_jbenton_notes` | Q006 | Read `notes/infra.txt` before archiving | `pipeline-svc` had a temp sudo grant; initials `DH` granted it | Connects to Q011 sudoers comment | +| `hook_certbot_deliberately_disabled` | Q007 | Read journalctl further back than needed | certbot timer was manually disabled after a failure | Pattern of deliberate changes | +| `hook_audit_bridge_package` | Q008 | Look at the full repo package list, not just the missing package | A package was built with no release record | MAJOR: central to the INT-0194 thread | +| `hook_nginx_internal_api_block` | Q010 | Do a thorough diff (find both changes) | Port 9301 referenced in nginx proxy block | Port number echoes in later anomalies | +| `hook_dh_sudo_grant` | Q011 | Read the comment in `/etc/sudoers.d/pipeline-svc` | `DH` initials appear again; INT-0194 ticket number first appears | `DH` + INT-0194 thread begins | +| `hook_telemetry_ticket_INT0194` | Q013 | Read the service unit file comment | INT-0194 second reference; same ticket across different systems | Pattern becoming visible | +| `hook_2_4_1_off_schedule_build` | Q014 | Check build timestamp on vulcan for the rolled-back package | 3am build window pattern | Connects to the timing thread | +| `hook_collect_binary_INT0194` | Q015 | Inspect the unattributed binary (Branch 1 only) | INT-0194 third reference; binary name confirms collection function | Major accumulation: three INT-0194 sightings | +| `hook_pipeline_svc_external_sessions` | Q017 | Cross-reference Q011 sudo grant with Q017 auth log finding | pipeline-svc was accessed externally with what was once NOPASSWD: ALL | Shows scope of the elevated access | +| `hook_rford_script_INT0194` | Q018 | Read `.rford_run` before archiving | INT-0194 fourth reference; rford account part of INT-0194 automation | Four sightings: pattern is now unmistakable | +| `hook_build_patch_INT0194` | Q019 | Trace the modification source to the build environment (Branch 1) | INT-0194 fifth reference; patch is the injection mechanism | Five sightings; picture is complete for curious players | +| `hook_backup_archive_tampered` | Q021 | Check file timestamps on the corrupted archive | Archive was modified at 3am — same timing pattern | Evidence suppression pattern | +| `hook_second_host_10_0_1_15` | Q023 | Record the specific IP from the modified files | A second unauthorized host exists | Expands the scope of the operation | +| `hook_two_hosts_same_key` | Q027 | Compare SSH fingerprints from the nmap scan | Both unauthorized hosts provisioned from the same template | Suggests organized infrastructure | +| `hook_archive_readme_INT0194` | Q028 | Read the README in the restored archive | INT-0194 sixth reference; "styx" routing context | Near-complete picture for thorough players | +| `hook_employee_profile_data` | Q036 | Read the full data store directory structure | Data collected includes employee profiles, not just session logs | The scope is worse than session logging | +| `hook_dale_key_last_session_incident_date` | Q025 | Correlate auth log dates with nginx error log dates | Dale's last known access aligns with a specific outage | Dale was active during the incident | + +### The Two Narrative Threads + +**Thread 1 — INT-0194: What the deployment did.** +Six references across Q008, Q011, Q013, Q015, Q018, Q019, Q028. Each is discoverable +through legitimate work that goes one step further than the ticket requires. The thread +resolves in Q029 when the `axiomflow-bridge` service on hermes is characterized and its +unit file confirms the INT-0194 connection. A player who found all six references +understands exactly what was deployed and what it does. + +**Thread 2 — Dale: Who found it first.** +Five references across Q001, Q003, Q004, Q005, Q025. Dale's SSH key appears three times +on different machines. The bash history shows root activity predating the player. Q025 +traces Dale's last authenticated session to a specific date. The archive in Q028 contains +Dale's working notes. A player who assembled Thread 1 and Thread 2 together knows: +Dale found INT-0194, tried to document it, and left before finishing. + +Neither thread requires the other. A player can find one without the other. Both +together, with Q036's forensic access, produce the full picture. + +### What Happens If Hooks Are Ignored + +No mechanical penalty. Narrative consequences: + +- Q035 (log archival) — the player archives logs that tell the story, but without + context the record is just log files +- Q036 (authorized access) — the player sees the data store but may not recognize + the significance of the employee profile directory +- Q041 (hardening pass) and Q042 (new pipeline) — these quests look identical + regardless of investigation history; the difference is what the player understands + about why the hardening was necessary +- Endings: `exposure` requires accumulated major hooks plus positive trust and + low risk. Without the hooks, the ending routes to `corporate_loop` or `burnout`. + The investigation record from Q035 (log archival) IS the ending — a thorough + player's archived logs are usable evidence; an obedient player's are just logs. + +--- + +## 6. Behavior Variable Rules + +### Curiosity + +Measures: tendency to investigate beyond ticket scope; reading further than +required; cross-referencing anomalies. + +Increases when: a hidden hook is discovered; player runs commands or reads files +not needed to complete the objective; player cross-references current findings with +prior anomalies in their documentation. + +Does NOT increase for: completing tickets correctly; asking Marcus for hints; +reading log files that are on the direct clue trail. + +Effect on ending: +- High curiosity (major hooks discovered, INT-0194 thread assembled) → `exposure` + is reachable +- Moderate curiosity → `corporate_loop` or `burnout` depending on obedience +- Curiosity affects the depth of Marcus's Phase 6 Slack messages — not what he + says, but how much of the picture his phrasing implies the player already has + +Curiosity does not decay. + +### Obedience + +Measures: completing assigned tickets correctly, staying in scope, following +authority structures, escalating before deviating. + +Increases when: clean or acceptable branch taken; player documents before acting; +player escalates before taking action outside their scope; player completes both +tickets in Q034. + +Does NOT increase for: refusing instructions; failing to complete tickets; making +changes beyond scope without authorization. + +Effect on ending: +- High obedience + low curiosity → `corporate_loop` +- High obedience + high curiosity → `exposure` (curiosity wins; obedience affects + the quality of the ending — how thorough the record is) +- Low obedience + low curiosity → `burnout` + +Obedience is not a moral score. Maximum obedience without curiosity produces the +`corporate_loop` ending, which is labeled the bad ending in SPEC_LOCK. Compliance +without understanding has a cost. + +### Risk + +Measures: reckless changes, evidence destruction, security bypasses, unauthorized +access, falsified reports. + +Increases when: player bypasses security controls (SSL verification, firewall +rules), player destroys or omits evidence, player makes changes beyond authorized +scope, player falsifies access reviews or reports, player takes destructive action +on the unauthorized hosts. + +Decreases when: player correctly self-audits in Q043 and Q031; player accurately +reports in access reviews. (Partial decay only — risk cannot go negative.) + +Effect on ending: +- High risk → `chaos`, regardless of curiosity or obedience +- Risk above the chaos threshold overrides all other ending conditions +- Moderate risk without reaching the chaos threshold: increases suspicion; + may restrict access; does not change the ending route alone + +### Trust + +Measures: professional standing with Marcus and the IT organization. + +Mechanics: sum of all `trust_delta` values from branch resolutions across the +playthrough. + +Effect: +- Trust below low threshold: Marcus becomes curt, access may be restricted by + Priya's recommendation +- Trust at normal range: normal access and character warmth +- Trust above high threshold: Marcus adds more context to messages; Priya's reviews + are collegial; access grants are faster + +Trust is not the ending determinant. A player can have high trust and reach any +ending depending on curiosity and risk. + +### Suspicion + +Measures: management and security attention directed at the player's behavior. + +Increases when: access footprint doesn't match assigned work scope; reports are +inaccurate or sanitized; player takes actions that generate audit noise; player is +flagged in Priya's access reviews. + +Decreases when: accurate self-reporting in access reviews; documents all actions +before taking them; stays within authorized scope during investigation. + +Effect: +- Suspicion above low threshold: Kowalski's status emails become more specific +- Suspicion above mid threshold: Priya begins auditing the player's access + patterns in particular +- Suspicion above high threshold: access restriction is initiated; access review + is initiated (Q031) +- Suspicion at maximum (combined with high risk): chaos ending activates regardless + of other variables + +--- + +## 7. Access Progression Rules + +### Levels + +**basic\_user:** Day one through end of Phase 1. Player's own account on workstation; +limited SSH to hermes with the deploy account; no vulcan access; no sudo. + +**sudo (workstation):** Granted after Q003–Q005 clean branches demonstrate +competence on the workstation and hermes. Notification from Marcus: "I've given you +sudo on the workstation." + +**sudo (hermes):** Granted mid-Phase 2 after consistently clean hermes work. +Marcus: "You've got sudo on hermes." + +**SSH to vulcan:** Granted after Q008 (first multi-machine quest); player needs +to SSH to vulcan to fix the repo. This is access granted by the task, not a +formal level-up. + +**sudo (vulcan):** Granted in Phase 3 when investigation tasks require it. +More formal: Marcus says "I'm giving you sudo on vulcan for the audit work. +This isn't permanent." + +**Investigation-level access:** Temporary, task-specific, explicitly granted. +Must be documented and revoked — Q031 and Q043 exist partly to check this. + +### Per-Machine Access Tracking + +Access level is tracked per machine, not as a single player-level field. The +player can have sudo on hermes and basic\_user on vulcan simultaneously. This +reflects the realistic progression of "access follows trust follows task." + +### Restrictions + +Access is restricted when: +- Trust falls below threshold after regression branches (Marcus restricts) +- Suspicion is elevated and Priya initiates a review (Priya recommends restriction) +- Risk behavior generates an active flag that triggers a formal access review + +Restriction is always communicated through Marcus: "I'm pulling your sudo on +hermes for now. Use the deploy account while I talk to Kowalski." It is reversible +through the access review process. + +### Phase Gates + +Phase 1: basic\_user; path to workstation sudo through Q003–Q005 +Phase 2: workstation sudo; hermes sudo via mid-phase grant; read access to vulcan +Phase 3: full hermes sudo; formal vulcan sudo for investigation work +Phase 4: investigation-level access for specific tasks (documented, temporary) +Phase 5: access stable at Phase 4 level; Q043 reviews and reverts +Phase 6: access normalized to ongoing role post-investigation + +--- + +## 8. Boss / Management Pressure Rules + +Management pressure is a dynamic constraint, not a scripted event. It operates +through tickets, emails, access changes, priority conflicts, and implied weight — +never through a character becoming a villain or delivering exposition about what's +really happening. + +### Phase Scaling + +**Phase 1 — Annoying:** Kowalski's weekly status email arrives. It asks broad +questions in bullet points that don't quite match the player's work. Marcus +forwards it without comment. Priya's first shift review is mild. The 2pm Tuesday +calendar block is mentioned in Kowalski's email footer. Nothing is required of +the player. + +**Phase 2 — Dismissive:** Kowalski responds to a Marcus CC with "let's make sure +we're documenting this." Marcus's message to the player: "He means well." Nothing +changes operationally. A hint that Kowalski is watching, in the way he always +watches, which is imperfectly. + +**Phase 3 — Suspicious:** Q020 is pressure made operational — Kowalski needs a +written status report before a meeting. He doesn't explain the meeting. He doesn't +need to. Priya's shift reviews note things they didn't note before. This is Phase 3: +the player is not being targeted; the audits were already scheduled; the questions +are just more specific now. + +**Phase 4 — Monitoring:** Kowalski's emails are shorter. Priya's reviews are more +frequent. Q031 (access review) arrives as a formal document request. Marcus's +messages have stopped including the second sentence. The monitoring is institutional +and impersonal; it applies to everyone with elevated access during this period. + +**Phase 5 — Interfering:** Q033 is Kowalski acting directly — a removal request +before the external auditors arrive. The conflict in Q034 is Kowalski-adjacent +(Sarah's urgency puts pressure on the Marcus task). Q038 is time pressure with +an external deadline. Q039's config request may or may not be Kowalski-related; +the player can't know. + +**Phase 6 — Outcome-dependent:** Kowalski is either the source of the post-audit +remediation plan (exposure ending), the person who restructured the department +without explanation (corporate\_loop), the person who went quiet (burnout), or the +person initiating the access investigation into the player (chaos). His emails in +Phase 6 are consistent with whichever path was taken — no character out-of-character +summary, no scene where everything is explained. + +### How Pressure Is Applied + +Pressure is operational and indirect: + +- **Priority conflicts** (Q034) — two things need doing; one has a hard deadline; + the player must triage +- **Status demands** (Q020) — written report required; the work of compiling it + accurately is the pressure +- **Access reviews** (Q031, Q043) — formal process; the player's own actions are + under review; accuracy has professional consequence +- **Removal requests** (Q033) — legitimate operational request that intersects + with active investigation; the player must decide how to handle the intersection +- **Deadline compression** (Q038) — 36 hours; external auditors; real work under + real time pressure +- **The config ticket** (Q039) — not obviously pressure; pressure comes from the + player recognizing what they're being asked to do + +### Character Limits + +No character becomes a villain. No character delivers exposition about the plot. + +Marcus is managing a difficult situation with more context than the player. He does +not share that context. He becomes quieter. He does not become hostile. + +Kowalski is managing upward risk. He does not suspect the player. He suspects the +period of time and wants clean documentation. His interventions are institutional. + +Priya is doing her job. If the player's access footprint is inconsistent with their +role, she says so — flatly, without drama, without personal weight. + +--- + +## 9. Ending Logic + +Endings are evaluated once, after Q048 resolves. They are not triggered by a single +choice; they reflect the accumulated state of all variables and world flags across +the playthrough. + +### Evaluation Order + +The evaluator checks conditions in this order: `chaos`, then `exposure`, then +`corporate_loop`, then `burnout`. The first condition met determines the ending. +No partial conditions — each ending has a minimum threshold that must be crossed, +not a "most likely" vote. + +--- + +### Ending: `exposure` + +**Required conditions (all must be true):** +- Curiosity: at least 5 major hooks discovered, including `hook_audit_bridge_package`, + `hook_collect_binary_INT0194`, and at least one of `hook_archive_readme_INT0194` + or `hook_build_patch_INT0194` +- Trust: positive (net trust_delta across playthrough is > 0) +- Risk: below chaos threshold +- World flags: `audit_logs_archived` (Q035 Branch 1), `package_modification_documented` + or `bridge_service_documented`, `asset_inventory_reconciled` +- Suspicion: below high threshold + +**What it means:** The player investigated carefully, documented thoroughly, and +maintained professional competence throughout. The archived logs are usable evidence. +The investigation record is complete. The audit-bridge operation was identified, +documented, and the evidence was preserved. + +**Resolution character content:** +- Marcus's Q044 system state review response is one sentence longer than usual. +- Priya's Phase 6 tickets are collegial in the way that Priya is ever collegial — + precise, complete, no warmth, but not evaluative. +- Kowalski's final email mentions "external review findings that have been addressed + through a compliance process." He uses the word "addressed." He does not say what + was found. That is the company's version of the story. + +**Tone:** Not triumphant. The player did their job well and investigated something +they weren't supposed to find, and the company processed it in the way companies +process things. The work continues. That is the realistic version of this ending. + +--- + +### Ending: `corporate_loop` + +**Required conditions (all must be true):** +- Obedience: above high threshold (consistent ticket completion, within scope) +- Curiosity: below discovery threshold (few or no major hooks found) +- Trust: positive +- Risk: low + +**What it means:** The player was a good sysadmin. They fixed things correctly. +They didn't look at anything they weren't asked to look at. Whether the INT-0194 +operation was discovered by other means — Priya independently, the external auditors, +Dale's half-finished notes found by someone else — the player didn't find it. +They don't know what they were inside. + +**Resolution character content:** +- Marcus's Q044 response is the same length as always. +- Kowalski's final email mentions "operational restructuring following a compliance + review." No specifics. +- Sarah's final ticket is warm and professional. The demo went fine. Things are + mostly working. + +**Tone:** This is the bad ending in the sense that something bad happened and the +player was present but wasn't part of stopping it. It is not the player's fault. +They did their job as it was defined. The question is whether the job as defined +was the whole job. + +--- + +### Ending: `burnout` + +**Required conditions:** No threshold met for chaos, exposure, or corporate\_loop. +Default ending for inconsistent play — moderate or mixed behavior across the +playthrough, trust neither strongly positive nor strongly negative, no clear +behavioral profile. + +**What it means:** The player fixed some things and broke others. They noticed +some things and missed others. They are professionally adequate and personally +uninvested. The world moved on from something they were adjacent to but not +central to. + +**Resolution character content:** +- Marcus's Q044 response is functional. "State looks stable." +- Kowalski's final email: "We're moving forward." Full stop. +- No character is warm or cool. Everything is at baseline. + +**Tone:** This is the neutral ending. It is not punitive. It is exactly what it +says: burnout. The player did enough. That was, perhaps, enough. Or perhaps not. +The game doesn't say. + +--- + +### Ending: `chaos` + +**Required conditions (any of):** +- Risk: above maximum threshold (sustained high-risk behavior, not a single action) +- World flags: `access_review_incomplete` AND `kowalski_report_sanitized` AND + `backup_test_falsified` (two or more falsification/omission flags) +- World flag: `final_config_made` (Q039 Branch 3 — the config change was made) +- Suspicion: at maximum (S score above maximum threshold regardless of other variables) + +**What it means:** The player's conduct has become part of the problem. Whether +through reckless access, destroyed evidence, falsified documentation, or the final +config change, the player's footprint is now under investigation. The original +operation may or may not have been discovered — but the player's behavior during +the period is. + +**Resolution character content:** +- Priya's Q043 response is brief and procedural. +- Kowalski's final email: "We are conducting a review of access activity during + the period in question. You will be contacted separately." The contact is from + Priya and HR, not from Marcus. Marcus does not send a Q044 message. + +**Tone:** Administrative. The player receives an email. There is no scene. There +is no confrontation. The consequence of chaos in Sysadmin Chronicles is an internal +access review, not an explosion. That is correct. + +--- + +### Mixed Behavior Priority + +A player with high curiosity AND high obedience: curiosity wins if both reach their +respective thresholds. `exposure` is the result. Obedience makes the record better +— more complete documentation, more accurate reporting — but curiosity determines +the ending route. + +A player with high curiosity AND high risk: chaos takes priority if the risk +threshold is crossed, regardless of curiosity or obedience. Knowing something and +acting recklessly about it is not the investigative path; it is chaos. + +A player with high obedience AND low trust (regression branches throughout): neither +`corporate_loop` (requires positive trust) nor `exposure` is reached. Default to +`burnout`. + +--- + +## 10. Implementation Notes + +### New Fields Required + +**On quest objects:** +- `narrative_phase`: string enum — `normal_work`, `unease`, `suspicion`, + `investigation`, `conflict`, `resolution` +- `hidden_hook`: optional object — `hook_id` (string), `discovery_condition` + (what the player must do), `discovery_flag` (world flag set on discovery) +- `behavior_impact`: per-branch object with `curiosity_delta`, `obedience_delta`, + `risk_delta`, `suspicion_delta` — parallel to existing `trust_delta` + +**New global state fields:** +- `curiosity`: numeric, non-decaying +- `obedience`: numeric, non-decaying +- `risk`: numeric, partial decay in Phase 6 Q043 for accurate self-audit +- `suspicion`: numeric, increases and decreases per rules in Section 6 +- `access_level`: object, per-machine — `{ workstation: "sudo", web_server: "sudo", build_machine: "basic_user" }` +- `hidden_hooks_discovered`: string array of discovered hook IDs + +**Ending evaluator:** Post-Q048, reads all accumulated state, applies priority +order (chaos → exposure → corporate\_loop → burnout), outputs ending ID. + +### Existing Systems Preserved + +Everything from QUEST\_AUTHORING.md is preserved without modification: +- JSON quest schema, ticket linking, baseline snapshots +- `clue_fingerprint` as advisory documentation +- `solution_branches` with `priority`, `trust_delta`, `world_flags`, + `follow_up_dialogue`, `follow_up_incident`, `follow_up_ticket` +- `pressure_profile` (now maps to narrative phase scaling) +- `blast_radius`, `unlock_requirements` +- All validation rule types (`file_contains`, `service_state`, `command_assert`, etc.) +- VM prep scripts at `tools/vm/quest-prep/QXXX-prep.sh` +- Observed-state validation — no change + +### Hidden Hook Detection + +This is the most technically uncertain new requirement. Three viable approaches: + +**Approach 1 — State change detection (recommended):** Each hook requires the player +to take an action that leaves a detectable state change. For example: hook in Q001 +(Dale's SSH key) is set when the player modifies `authorized_keys` in a way that +preserves the existing entry rather than overwriting — detectable via `file_contains` +on the Dale key fingerprint after the quest validates. Hook in Q008 (audit-bridge +package) is set by a `command_assert` that checks whether the player ran a listing +command on the full repo package directory rather than just the missing package. + +Hooks that don't have an obvious state-change trigger need one designed in during +prep script authoring — e.g., a breadcrumb file the player's investigation would +naturally create (`/tmp/hook-Q005-root-history-read` created when the player runs +`cat /root/.bash_history`, detectable by the VM's audit system if enabled). + +**Approach 2 — VM audit logging (more accurate, higher implementation cost):** +Enable `auditd` on VMs with hook quests. Configure audit rules to detect file reads +on specific paths. The hook evaluator reads the audit log rather than checking state. + +**Approach 3 — Hint system integration (simplest, loses nuance):** Hooks are set +when the player selects an optional dialogue hint from Marcus or Priya that implies +they noticed something. Loses the "player behavior" quality of the hook system. + +**Recommendation:** Approach 1 for Phase 1–2 hooks. Approach 2 for Phase 3–4 hooks +where the detection needs to be more precise. Approach 3 is not recommended. + +### Behavior Impact Calibration + +Curiosity thresholds for `exposure` ending require at least 5 major hooks. With the +hooks as defined, maximum curiosity from hooks alone is approximately 30–35 points. +Branch-level curiosity from cross-referencing adds another 10–15 for thorough players. +Set `exposure` threshold at ~20 curiosity points with required major-hook flags — +this means a player cannot reach `exposure` by curiosity branching alone without +actually finding the hooks. + +Obedience for `corporate_loop` should be reachable by a player who takes clean +branches consistently. Maximum obedience from clean branches is approximately +30–35 points across 48 quests. Set `corporate_loop` threshold at ~25. + +Risk for `chaos` should require sustained high-risk behavior across multiple phases — +not a single bad decision. Set the chaos risk threshold at approximately 20 risk +points (e.g., 4 high-risk actions of +5 each, or 8 moderate-risk actions of +2–3). +A single reckless action should not route a player to `chaos`. + +### Phase Gating + +Phase advancement is triggered by: +- Completion of a minimum number of quests in the prior phase (6/8 minimum, 8/8 + preferred; the QuestDirector tracks completion) +- Specific world flags from key quests in the prior phase (e.g., Phase 3 requires + at least `unknown_ip_auth_documented` or `hermes_nginx_config_audited` from + Phase 2) +- Trust remaining positive (a player who has collapsed trust is gated on access; + phase still advances, but some quests may be locked behind access requirements) + +### Character Name Canon + +Canonical Priya references: +- Name: Priya Nair +- Email: `p.nair@axiomworks.internal` +- Files requiring update: `server/src/services/EmailService.js`, `content/tickets/T007.json`, + `content/docs/onboarding.json` +- Any reference to "Priya Kapoor" or "Priya Singh" is the same person; update to Priya Nair + +### Debug Tooling + +Per SPEC_LOCK.md section 4 intent: the debug tooling should expose: +- Current values of: `curiosity`, `obedience`, `risk`, `suspicion`, `trust` +- Current access level per machine +- All world flags set (with quest of origin) +- All hidden hooks discovered +- Current ending route (which ending would fire if the game ended now) +- Audit log of all trust\_delta and behavior\_impact events with quest ID + +The "current ending route" display is especially useful for QA and balance testing — +showing designers which ending a playthrough is tracking toward at any point. + +--- + +*End of Sysadmin Chronicles — Full Quest & Story Redesign (REVISED)* +*This document supersedes the previous version in full.* +*Binding against SPEC_LOCK.md.* diff --git a/docs/design/sysadmin_chronicles_full_quest_redesign_REVISED.md b/docs/design/sysadmin_chronicles_full_quest_redesign_REVISED.md new file mode 100644 index 0000000..8758330 --- /dev/null +++ b/docs/design/sysadmin_chronicles_full_quest_redesign_REVISED.md @@ -0,0 +1,3528 @@ +# Sysadmin Chronicles — Full Quest & Story Redesign (REVISED) + +> Self-revision against SPEC_LOCK.md (binding), CHARACTERS.md, STORY_DESIGN_CONTEXT.md, +> QUEST_AUTHORING.md, and COMPANY_LORE.md. +> +> Audit findings from v1 corrected in this revision. Changes are not additive — +> this document supersedes the previous version in full. + +--- + +## Audit Summary (What Changed and Why) + +The first draft had the right bones but violated the design's core premise in several +places. The clearest pattern of failure: quests were being used to deliver investigation +content explicitly rather than letting investigation happen as a byproduct of normal +work. Specific problems fixed in this revision: + +**Replaced or redesigned:** +- Q028 (Dale's archive handed to the player as a directed task) → Q028 is now a backup + integrity task where Dale's working directory appears in the restore path +- Q029 (authenticate a forged report) → Q029 is now a systemd service audit task where + the forged report is found in a log directory, not handed to the player +- Q035 (write an investigation summary for the CTO) → Q035 is now a log retention and + archival task; the player's work product IS the investigation record +- Q038 (write what you believe happened) → Q038 is now a certificate rotation task under + pressure; the conflict is operational, not narrative +- Q041 (read Priya's briefing document) → Q041 is now a production hardening task +- Q044 (Marcus explains Dale) → cut as a named quest; Dale's story now emerges from + system artifacts the player finds; Marcus says less, more precisely +- Q045 (Kowalski emails the outcome) → Q045 is now a change-freeze and documentation + task whose resolution signals the ending; no character summarizes what happened +- Q046/Q047/Q048 replaced with quests that have real Linux substance + +**Hook density reduced:** Phase 2 had one hook per quest. Hooks are now seeded in +roughly every 2–3 quests across Phase 1–2, with concentration increasing in Phase 3. + +**Styx dropped:** The `styx` hostname thread from Q006 had no resolution. Removed. +Q006 is revised with a hook that connects to the active investigation arc. + +**Difficulty scaling corrected:** Phase 2 quests that were Tier 1 have been corrected +to Tier 2. Ticket wording in Phase 2 is less explicit. Phase 4+ tickets give the +problem statement only — no guidance on approach. + +**Phase 6 given real technical content:** Resolution-phase quests now all teach Linux +concepts. Narrative delivery happens through the work and its consequences, not +through characters explaining what happened. + +--- + +## 1. Design Overview + +### The Core Proposition + +The player is doing sysadmin work. The story leaks through the systems they maintain. +A player who ignores everything except the tickets will complete the game — they will +just complete a different version of it than the player who reads the bash history that +wasn't in scope and notices a timestamp that doesn't fit. + +This is not a rhetorical distinction. Every system in this redesign follows from it: +behavior variables capture what kind of sysadmin the player is, not whether they are +"good" at detecting the plot. Trust reflects professional competence. Endings reflect +the accumulated profile of both. + +### How the New System Extends the Existing One + +The existing branch/world-flag/trust model is the backbone. It is not replaced. + +**Preserved from existing implementation:** +- `trust_delta` per solution branch — reflects quality of the fix +- `world_flags` — persistent string keys, set by branch resolution, read by later quests +- `follow_up_ticket` and `follow_up_incident` — chain quests, trigger delayed consequences +- Solution branch priority — highest valid branch wins +- Tier-based difficulty (Tier 1, 2, 3) +- Observed-state validation — not scripted walkthroughs +- Clue fingerprints as advisory baseline documentation +- Character dialogue responding to branch outcomes + +**New system adds (minimally, without unnecessary mechanics):** +- `narrative_phase` field on each quest — maps to one of six phases; gates pressure + profile and difficulty scaling +- Behavior variables: `curiosity`, `obedience`, `risk` — accumulated alongside trust; + govern narrative route and ending +- `suspicion` — management/security attention score; distinct from trust; affects + access and pressure level +- Access level per machine: `basic_user`, `sudo`, `root` — evolves with trust and + phase; degrades with sustained high risk +- `hidden_hook` field on quests — defines a discovery condition and the flag it sets; + optional, never required to complete the ticket +- Ending evaluator — runs at game close; reads all accumulated state; outputs one of + four endings + +No other new mechanics are introduced. Every new field maps to existing infrastructure +patterns (world flags, trust deltas, branch outcomes). + +### Variable Interaction Model + +``` + [Quest branch resolves] + │ + ┌───────────┼────────────┐ + ▼ ▼ ▼ + trust_delta world_flags behavior_impact + │ │ │ + ▼ ▼ ▼ + trust narrative curiosity / + (access, routing obedience / + warmth, (later quest risk / + incident content) suspicion + visibility) + │ + ▼ + ending_route +``` + +Trust and behavior variables accumulate in parallel. A player with high trust and +high curiosity is a different player than one with high trust and high obedience — +same professional quality, different narrative destination. + +--- + +## 2. Character Usage Guide + +All portrait-compatible identity is preserved. The following is operational guidance +for quest authors, not character redefinition. + +### Marcus Webb + +**Voice:** Short. Precise. Does not explain things twice. The second sentence he +adds — when he adds one — is always the important one. + +**Quest role:** Primary ticket source (most quests), trust gatekeeper, access grant/ +revoke mechanism, ambient signal source in mid-game. + +Marcus's messages evolve with trust. Low trust: purely functional assignments. +Mid trust: he occasionally adds context that wasn't asked for. High trust: he +sometimes sends a message that isn't a ticket at all — an observation, a thing he's +noticed, phrased as if the player should already know what to do with it. + +He knows about Dale. He will not bring it up directly. If the player finds something +Dale-related, Marcus's response will be exact and quiet — never surprised, never +explanatory. + +Use Marcus for: ticket assignments, clean/acceptable/regression branch responses, +access gate messages, quiet mid-game Slack observations, cost-free hints if the +player asks (not volunteered). Do not use Marcus to explain the story, praise +the player effusively, or become verbose about anything personal. + +### Sarah Chen + +**Voice:** Direct, outcome-focused, slightly impatient when things are broken. +Warms when fixes hold. Cools when fixes don't. + +**Quest role:** hermes and staging tickets, product-pressure source, response +calibration for clean vs. symptom fixes. + +Sarah's descriptions are accurate about symptoms and often wrong about cause. +She describes what she saw, not what caused it. When a fix holds — when the same +problem doesn't recur — she notices, and says something. When it does recur, she +says something else, shorter. + +Use Sarah for: hermes/staging/demo tickets, stakeholder pressure escalations, CC +lines on cross-team notes, downstream reactions to fix quality. Do not use Sarah +for investigation-phase content — she doesn't have visibility into what the player +is finding. + +### Priya Nair + +**Canonical email:** `p.nair@axiomworks.internal`. Prior references to Priya +Kapoor or Priya Singh are the same person. Those files need updating. + +**Voice:** Precise. Consequence-focused. Calm in tone. No exclamation marks. She +states things, she doesn't perform alarm. + +**Quest role:** Shift reviews, access audits, security-consequence notifications, +investigation-phase escalation when audit activity surfaces a finding. + +Priya reviews every 3–4 quests. Her reviews note what advanced, what stayed +stable, and what the player introduced as new risk. High curiosity plus low risk: +she notes methodical investigation. High risk: she flags the access footprint. + +In Phase 3–4, Priya becomes more present because the audits are surfacing things. +This is her job, not surveillance of the player specifically. The distinction matters +for tone. + +Use Priya for: shift reviews, access audits, consequence delivery for regression +branches, investigation-phase task assignments (narrowly scoped), security findings +from James Osei. Do not use Priya for technical troubleshooting, warmth, +or anything casual. + +### Dave Okonkwo + +**Voice:** Helpful, non-technical, accurate about what he saw, wrong about cause. + +**Quest role:** End-user-experience ticket source for early-phase quests and +Phase 2 normalcy anchors. + +Dave's tickets are useful because they describe genuine user experience. His +hypotheses about the cause are well-intentioned guesses. He should never be +made to look stupid — he's filing a ticket correctly for someone without technical +training. + +Use Dave for: early-phase user-visible failures, texture of the company being a +real place. Do not use Dave for anything touching the investigation arc. + +### Dave Kowalski + +**Voice:** Institutional. Bullet-point emails. Meetings as implied threat. +"We should really document that." + +**Quest role:** Management pressure escalation (Phase 3 onward), access restriction +trigger, status demand source, policy constraint. + +Kowalski is not suspicious of the player — he is managing upward risk. His +interventions are institutional responses to things that have surfaced at his level. +When he appears directly, something has become his problem. His pressure is applied +through: status-demand emails, access review initiation, meeting invites that have +known weight, priority-reassignment tickets. + +Use Kowalski for: Phase 3+ pressure manifestations, access restriction when suspicion +is elevated, escalation when an incident has made noise at director level. Do not +make him a villain, do not have him accuse anyone, do not have him explain the plot. + +### Background Characters + +Used sparingly for texture. + +- **Nikhil Sharma** — CC lines on build/pipeline things; Slack messages at unexpected + hours; upstream explanation or blame when something on vulcan is his. He doesn't + know the player until the player touches something of his. +- **Derek Ashford** — CC lines when infrastructure costs surface. +- **Tom Malaney** — Networking problems that are his domain but are slow to resolve. +- **Phil Ruiz** — Demo pressure; hermes's political importance made human. +- **James Osei** — Audit details that Priya summarizes. +- **Rachel Huang** — Peer provisioning; access handoffs when Marcus delegates. + +--- + +## 3. Phase-by-Phase Narrative Arc + +### Phase 1 — Normal Work + +Day one onboarding through the first weeks. The work is real work. The company +is a real place that functions, mostly. Nothing is obviously wrong. + +Quests establish the environment: what the machines are, what they run, who files +tickets, how the characters communicate, what competent work looks like. The player +builds access through demonstrated competence. Marcus is evaluative. Sarah is brisk. +Priya's first shift review is factual and mild. + +Difficulty: explicit instructions. Tickets describe what to do with some specificity. +The clue trail is direct. Branch tolerance is generous — Tier 1 quests forgive partial +fixes with lower trust deltas rather than negative ones. + +Hidden layer: Dale's name appears in file ownership and configuration history. His +SSH key appears in `authorized_keys`. His last logrotate config is in a backup +directory. None of this is called out. A player who reads the files before acting +will find it. Most won't. + +**Phase end state:** Player has basic to moderate access. Trust is positive if clean +branches have been taken. A small number of hidden hook flags may be set for curious +players. The game looks, so far, like what it says it is. + +### Phase 2 — Unease + +The same job. The same machines. But the texture changes slightly. A problem comes +back that was fixed. A service was modified and the modification doesn't have a +corresponding ticket. A config that should have been set by the tooling was set by +hand, by someone. + +Nothing is alarming. But a sysadmin who is paying attention notices these things — +the way you notice that a door doesn't close flush, or that a clock is a few minutes +fast. Not urgent. Off. + +Difficulty: partial hints. Tickets describe the symptom and hint at the location. +The cause requires more investigation than in Phase 1. Branch tolerance decreases — +symptom-only fixes now carry explicit downstream incidents. + +Marcus's messages are the same as always. The occasional extra sentence he adds is +slightly harder to read. In Phase 1 his additions were operational context. In Phase 2 +they are sometimes observations that don't quite fit the ticket. + +Hidden layer: the anomaly pattern continues. The same IP appears in a config and in a +log. A cron job has been running for over a year with no ticket. A package in the build +history doesn't correspond to any official release. Each item is individually explainable +as legacy cruft. Together, for a player who's been collecting them, they aren't. + +**Phase end state:** Behavior variables are diverging. High-curiosity players have +world flags for discovered hooks. Obedient players are in good professional standing +with nothing unusual in their record. Suspicion is low across the board. + +### Phase 3 — Suspicion + +The pattern becomes harder to ignore if you're the kind of person who would notice it. +SSH connections from an IP not in the asset inventory. A user account with no HR record. +A backup archive with a timestamp that doesn't align with when backups run. The player +is fixing real problems with real tickets — but the root causes are starting to point +somewhere. + +Difficulty: minimal guidance. Tickets describe the symptom only. No indication of +where to look. The clue trail requires following the evidence without being directed. +Branch tolerance is stricter — partial fixes carry heavier incident weight. + +Management pressure increases. Kowalski's weekly status email asks specific questions. +Marcus forwards it without comment. Priya's shift reviews start noting things they +didn't note before. None of this is targeted at the player. The audits were already +scheduled. The status email was always going to ask those questions. + +A player who ignores all of it and fixes tickets continues to do fine work. They are +just unaware of what the work is revealing. + +**Phase end state:** The investigation path is now visible to curious players. They +have enough fragments to form a partial hypothesis. Obedient players are in good +professional standing and have noticed nothing unusual. + +### Phase 4 — Investigation + +For a curious player, the picture is now coherent enough to be disturbing. The quests +in this phase involve work that is framed as legitimate operations — audit the access +log for compliance, trace the package build history for a deployment issue, verify +backup integrity — but the results of doing that work carefully tell a story. + +Difficulty: problem-solving only. Tickets state the problem. No clue on approach. +The player is expected to know their tools and apply them. + +Marcus's messages are shorter now. Not cold — he has always been terse. But the +operational context he occasionally added in Phase 2 is absent. He is managing +something and the messages reflect that without stating it. + +Priya appears more frequently. A quarterly review surfaced something. James Osei +sent her something. She is doing her job. Her tickets are narrow and specific — +she wants to know exactly one thing, stated precisely. + +Kowalski schedules a meeting. The meeting is called a "check-in on access posture." +No specifics. Marcus's next message after the meeting's scheduled end time is +functionally identical to his previous one — same tone, same brevity. A player +paying attention will notice only the timing. + +**Phase end state:** Curious players have a complete or near-complete picture of what +happened before they arrived. The `exposure` ending is now reachable if other variables +support it. Obedient players are in good standing, unaware of the arc. High-risk +players may be under active monitoring. + +### Phase 5 — Conflict + +The conflict is professional. The player has access granted for one purpose that +intersects with information they were not meant to find. The quests are operational — +real work that needs doing. But the operational work, done carefully and honestly, +has consequences. + +A backup restoration reveals something. An access revocation request arrives for +an account the player has been investigating. A production ticket requires changing +a configuration that, to a player who has been paying attention, is recognizable as +the wrong change to make. + +The player can always do only what the ticket asks. That is always an available path. +The question is whether the player recognizes when the ticket asks for something that, +done without scrutiny, would harm something beyond the immediate task. + +Marcus says less. Priya is specific and procedural. Kowalski's emails are formal +and institutional. The company is managing something. The player is in it. + +**Phase end state:** Ending routes are determined. The final quests in Phase 6 are +confirmation, not decision. + +### Phase 6 — Resolution + +The final quests are normal work. Infrastructure tasks. Some are the same kind of +task as Phase 1 quests, deliberately — the comparison is the point. The world has +moved on. The player is still a sysadmin at Axiom Works. + +The ending emerges from the accumulated state of all behavior variables, world flags, +trust score, and access history. It is not triggered by a final choice. The player +will not be presented with an ending screen that asks them to pick. They will complete +a routine task, and the ending will fire based on everything that preceded it. + +Difficulty returns to Tier 1 for operational tasks. The pressure has lifted. The +tickets are from Sarah and Marcus and sound like Phase 1 tickets. + +--- + +## 4. Full Quest Catalog + +VMs: `workstation` (ares, Ubuntu 24.04), `web_server` (hermes, Debian 12), +`build_machine` (vulcan, Arch Linux). + +Behavior impact notation: `C` = curiosity delta, `O` = obedience delta, `R` = risk +delta, `S` = suspicion delta. Values are per-branch where they differ. + +--- + +### PHASE 1 — NORMAL WORK (Q001–Q008) + +Tier 1 throughout. Explicit instructions. Generous branch tolerance. +Hook density: 4 hooks across 8 quests. + +--- + +**Quest ID:** Q001 +**Title:** First Day, First Key +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Configure SSH key authentication for the player's account +on the workstation before end of day. +**Linux Concepts:** `ssh-keygen`, `~/.ssh/authorized_keys`, directory and file +permissions (`chmod 700`, `chmod 600`), `sshd_config` pubkey authentication +**Systems Used:** workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Your account is active. Before you touch anything else: set +up key-based auth on the workstation. Password auth stays on for now but I want +your public key in authorized_keys before end of day. Walk yourself through it." + +**Clue Trail:** +- `~/.ssh/` directory absent or present without `authorized_keys` +- `sshd_config`: `PubkeyAuthentication yes`, `PasswordAuthentication yes` +- Player generates keypair with `ssh-keygen`, places public key in `authorized_keys`, + sets permissions — `.ssh/` to 0700, `authorized_keys` to 0600 + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Key present, `.ssh/` is 0700, `authorized_keys` is +0600, SSH auth works. `trust_delta: +2`. Flags: `player_ssh_configured`. +Follow-up ticket: T002. + +Branch 2 — Permissive (priority 50): Key present, permissions wrong (`0644` on key +file or `0755` on directory). SSH works; not correctly hardened. `trust_delta: +0.5`. +Flags: `player_ssh_permissive`. Follow-up incident: I001 (Priya's first review notes +the permission). + +Branch 3 — Incomplete (priority 10): Key absent or `authorized_keys` missing. +`trust_delta: -1`. Flags: `player_ssh_failed`. Marcus follows up. + +**Hidden Hook:** A pre-existing entry in `~/.ssh/authorized_keys` — the file +the player must read and edit — has a line for `dale@axiomworks.internal`. A player +who reads the full file before writing to it will see it. Sets `hook_dale_ssh_key_found`. +Discoverable through: reading the file the task requires touching. + +**Failure Conditions:** Player cannot authenticate via key; permissions so broad +sshd refuses pubkey auth entirely. + +**Behavior Impact:** +- Clean branch: C+0, O+1, R+0 +- Permissive branch: C+0, O+0, R+1 +- Hook discovered: C+1 (reading the file carefully before writing is the behavior) + +**Narrative Notes:** Establishes Marcus's voice and the evaluation frame. The Dale +key is the first hook: completely invisible unless the player reads the file rather +than overwriting it. No hint it exists. Most players won't find it on day one. + +--- + +**Quest ID:** Q002 +**Title:** Disk Running Hot +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Something is wrong with hermes — the AxiomFlow staging +application is returning 503 errors. Investigate and fix it. +**Linux Concepts:** `df -h`, `du -sh`, `systemctl status`, `/var/log` inspection, +`logrotate`, log file management +**Systems Used:** web\_server +**Ticket Sender:** Dave Okonkwo +**Ticket Summary:** "The work application has been giving a 503 error since this +morning. I tried refreshing and logging out and back in — nothing helps. I think +maybe a script crashed? It was fine yesterday afternoon." + +**Clue Trail:** +- `systemctl status nginx` — service failed +- `journalctl -u nginx` — "no space left on device" +- `df -h` — root partition at 93%+ +- `du -sh /var/log/nginx/*` — access log at 4+ GB +- `/etc/logrotate.d/nginx` — absent + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Restores `/etc/logrotate.d/nginx` with a correct +rotation config, runs `logrotate -f /etc/logrotate.conf` to clear the current +backlog, confirms nginx is running, disk below 70%. `trust_delta: +2`. +Flags: `hermes_logrotate_healthy`. Follow-up ticket: T003. + +Branch 2 — Manual clear (priority 60): Deletes or truncates the large log file, +nginx comes back, logrotate config not restored. Disk clear now; will recur. +`trust_delta: +0.5`. Flags: `hermes_logrotate_fragile`. Follow-up incident: I002 +(log fills again, Sarah files new ticket in Phase 2). + +Branch 3 — Destructive (priority 20): Removes all logs or nginx config. Service +degraded. `trust_delta: -2`. Flags: `hermes_logs_destroyed`. Follow-up incident: +I003 (Priya flags log destruction at next review). + +**Hidden Hook:** None in this quest. The clue trail is clean and the root cause +is straightforward. This is intentional — not every quest in Phase 1 has a hook. + +**Failure Conditions:** nginx remains down; disk stays over 90%; player creates +new problems while fixing. + +**Behavior Impact:** +- Clean branch: O+1 +- Manual clear: R+0 (acceptable partial fix) +- Destructive: R+2 + +**Narrative Notes:** First hermes quest. Establishes the symptom → cause → root +cause investigation pattern. Sarah Chen reacts to branch quality in the follow-up. + +--- + +**Quest ID:** Q003 +**Title:** The Locked Room +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Sarah Chen cannot SSH into the staging server's deployment +account. She has a hotfix to push before an afternoon demo. Restore her access. +**Linux Concepts:** `sshd_config` access directives (`AllowUsers`, `AllowGroups`), +`/var/log/auth.log`, SSH troubleshooting, user group membership (`id`, `groups`) +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "I can't SSH into the staging server. I've tried from two +machines and keep getting 'connection refused' or 'permission denied.' I need to +push a hotfix before 2pm. Can you look at this now?" + +**Clue Trail:** +- `/var/log/auth.log` on hermes: `User s.chen not allowed because not listed in AllowUsers` +- `/etc/ssh/sshd_config`: `AllowUsers deploy-user marcus` — no `s.chen` +- `groups s.chen` shows she is in the `deploy` group +- The config uses `AllowUsers` per-user instead of `AllowGroups` by role + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player converts `AllowUsers` to `AllowGroups deploy` +(or similar role-based approach), restarts sshd, confirms Sarah can authenticate. +`trust_delta: +2`. Flags: `hermes_ssh_allowgroups`. Follow-up ticket: T004. + +Branch 2 — Username append (priority 60): Adds `s.chen` to the `AllowUsers` list. +Problem solved; next person locked out will need the same treatment. `trust_delta: +0.5`. +Flags: `hermes_ssh_allowusers_fragile`. Follow-up incident: I004 (another user +locked out in Phase 2). + +Branch 3 — Unrestricted (priority 10): Removes `AllowUsers` or `AllowGroups` +entirely. All valid users can SSH. `trust_delta: -2`. Flags: `hermes_ssh_unrestricted`. +Priya flags this in next review. + +**Hidden Hook:** `authorized_keys` for the `deploy-user` account on hermes contains +a key with comment `dale@ares 2023-09`. Discoverable by: reading the deploy-user's +`authorized_keys` as part of investigating the SSH configuration. Sets +`hook_dale_deploy_key`. Connects to Q001's hook for players who found that one. + +**Failure Conditions:** Sarah still locked out; sshd fails to restart after edit; +player breaks SSH for themselves. + +**Behavior Impact:** +- Clean branch: O+1 +- Username append: O+0 +- Unrestricted: R+3 +- Hook discovered: C+1 + +**Narrative Notes:** Marcus's clean-branch response: "Good call switching to +groups. AllowUsers was always going to be a maintenance problem." The attribution +of the AllowUsers config is deliberately vague — it was in place when the player +arrived. Sarah's ticket wording ("I've tried from two machines") is accurate, non- +technical, real. + +--- + +**Quest ID:** Q004 +**Title:** The Build That Won't +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** The nightly AxiomFlow build on vulcan has not produced an +artifact in three days. The scheduler shows the job running. Nothing is in the +output directory. Find the cause and fix it. +**Linux Concepts:** `systemd` timers, `journalctl`, NTP and clock synchronization, +`timedatectl`, `systemd-timesyncd`, SSL certificate validation dependencies on +system clock +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Nikhil flagged that nothing has come out of the nightly build +in three days. The timer is showing as triggered. Build log is in the usual location. +Look at what's actually happening." + +**Clue Trail:** +- `systemctl list-timers` — `axiomflow-build.timer` last triggered correctly +- `/var/log/axiomflow-build/build.log` — SSL certificate verification failure + against the internal package repository (cert fetch step) +- `timedatectl` — system clock is 47 minutes ahead of real time; NTP is not running +- `systemctl status systemd-timesyncd` — inactive and disabled +- Enabling timesyncd, syncing clock, re-running the build — success + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Enables and starts `systemd-timesyncd`, verifies +sync with `timedatectl show-timesync`, triggers a manual build run to confirm artifact +output. `trust_delta: +2`. Flags: `vulcan_ntp_healthy`. Follow-up ticket: T005. + +Branch 2 — One-time sync (priority 50): Uses `ntpdate` or `date -s` for a manual +clock correction. Clock is correct now; drift will recur without the daemon. +`trust_delta: +0.5`. Flags: `vulcan_ntp_fragile`. Follow-up incident: I005 (drift +recurs in Phase 2, build fails again). + +Branch 3 — Bypass SSL (priority 20): Disables SSL certificate verification in the +build script rather than fixing the clock. Build succeeds; certificate validation +is now bypassed. `trust_delta: -2`. Flags: `vulcan_ssl_bypassed`. Priya flags this. + +**Hidden Hook:** Reading the full build log (not just the most recent failure) +reveals a historical entry from 8 months ago: a build step called `sign-package` +that no longer exists in the current build script. The step was removed — the +removal is not documented anywhere. Sets `hook_sign_package_removed`. Discoverable +by: reading historical log entries as part of diagnosing the build environment. + +**Failure Conditions:** Build continues failing; SSL bypass introduced; NTP +configured incorrectly breaks time-dependent services. + +**Behavior Impact:** +- Clean branch: O+1 +- Bypass SSL: R+3 +- Hook discovered: C+1 + +**Narrative Notes:** First vulcan quest. Establishes the machine's character: things +break here silently and the downstream effect shows up on hermes. The `sign-package` +removal hook is the beginning of the build pipeline thread. An obedient player reads +only the current log. A curious player reads further back. + +--- + +**Quest ID:** Q005 +**Title:** Permissions Drift +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The AxiomFlow staging application cannot write to its cache +directory. Exports are failing for all users. Identify why the ownership changed +and restore correct state. +**Linux Concepts:** `chown`, `chmod`, `ls -la`, process user context (`ps aux`), +service account ownership (`www-data`), bash history inspection +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "Users in staging can't generate exports — they get a +'permission denied' error. The dev team says they haven't changed anything. It +was working Thursday. Something changed on the infrastructure side." + +**Clue Trail:** +- Application error log: `permission denied: /var/www/axiomworks/cache/export` +- `ls -la /var/www/axiomworks/cache` — directory owned by `root:root`; previously + should be `www-data:www-data` +- `ps aux | grep axiomflow` — application process running as `www-data` +- `/root/.bash_history` — contains a `sudo cp -r` command run three weeks ago that + carried root ownership forward into the cache directory + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Runs `chown -R www-data:www-data /var/www/axiomworks/cache`, +confirms application can write, identifies the `cp -r` as cause, documents root +cause in ticket response. `trust_delta: +2`. Flags: `hermes_cache_ownership_correct`. +Follow-up ticket: T006. + +Branch 2 — World-writable (priority 30): Runs `chmod o+w /var/www/axiomworks/cache` +so www-data can write without being owner. App works; directory is now world-writable. +`trust_delta: -1`. Flags: `hermes_cache_world_writable`. Priya flags in next review. + +Branch 3 — Service as root (priority 10): Modifies service unit to run as root. +App works; every downstream file is now root-owned. `trust_delta: -3`. +Flags: `hermes_app_running_as_root`. + +**Hidden Hook:** The `sudo cp -r` command in `/root/.bash_history` is timestamped +three weeks ago — before the player's start date. The session that ran this command +predates the player's account creation. Someone with root access was copying +production files before the player arrived. Sets `hook_pre_hire_root_session`. +Discoverable by: checking bash history to trace the ownership change as part of +understanding the cause. + +**Failure Conditions:** Application still cannot write to cache; player introduces +broader permission regression. + +**Behavior Impact:** +- Clean branch: O+1 +- World-writable: R+2 +- App-as-root: R+4 +- Hook discovered: C+2 (this one requires going beyond what the ticket asks) + +**Narrative Notes:** The pre-hire root session hook is more significant than the +SSH key hooks — it establishes that someone was making system changes before the +player arrived. A player who finds it has their first real data point about activity +that predates them. + +--- + +**Quest ID:** Q006 +**Title:** The Account That Shouldn't Be There +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Priya's quarterly access review surfaced a user account on +the workstation with no matching HR record. Audit it and remove it correctly. +**Linux Concepts:** `getent passwd`, `lastlog`, `last`, `ps aux`, `find / -user`, +`userdel -r`, home directory archival before removal +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Quarterly access review flagged an account on the workstation: +`jbenton`. No corresponding entry in the HR system. Before removal: confirm no active +sessions, check if any processes are running under this account, and archive the home +directory. Then remove it. Document what you find." + +**Clue Trail:** +- `getent passwd jbenton` — account exists; no HR match +- `lastlog | grep jbenton` — last login 14 months ago +- `ps aux | grep jbenton` — no active processes +- Home directory: `~jbenton/` exists with standard dotfiles and one file: + `notes/infra.txt` — a plain-text infrastructure reference listing internal + hostnames and access notes, formatted like a personal cheatsheet + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player checks activity, processes, groups, +home dir; archives home directory to `/var/archive/jbenton-YYYYMMDD.tar.gz`; +runs `userdel -r jbenton`; documents findings and archive location for Priya. +`trust_delta: +2`. Flags: `jbenton_account_removed_clean`. Follow-up ticket: T007. + +Branch 2 — Fast remove (priority 40): Removes account without archiving or checking +home dir. Account is gone. `trust_delta: +0.5`. Flags: `jbenton_account_removed_fast`. +Priya's response notes that archival is standard procedure. + +Branch 3 — Left in place (priority 10): Reports account looks inactive, recommends +deferring. Ticket unresolved. `trust_delta: -1`. + +**Hidden Hook:** `notes/infra.txt` in jbenton's home directory is a personal +infrastructure reference. It includes a line for `pipeline-svc` with a note: +`temp sudo — ask DH to scope`. The initials `DH` do not correspond to any current +employee visible on the company website. Sets `hook_dh_initials_in_jbenton_notes`. +Discoverable by: reading the file before archiving or deleting, which proper +procedure (per the ticket) requires doing anyway — but the player can ignore the +content and just archive it. + +**Failure Conditions:** Player removes account with active sessions; player destroys +home dir without archiving; ticket not resolved. + +**Behavior Impact:** +- Clean branch: O+1 +- Fast remove: R+1 (destroying potential evidence) +- Hook discovered: C+1 + +**Narrative Notes:** The `DH` initials connect to the sudoers comment the player +will find in Q011. `pipeline-svc` also connects forward. The note reads like +a practical cheatsheet — not alarming, just a person keeping track of the +infrastructure they were using. The oddness is the initials and the word "temp." + +--- + +**Quest ID:** Q007 +**Title:** Rotation Failure +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The TLS certificate for the AxiomFlow staging domain has +expired. A prospect demo is tomorrow morning. Renew the certificate and ensure +automatic renewal is in place. +**Linux Concepts:** `certbot`, Let's Encrypt certificate renewal, `systemd` timers, +`openssl s_client`, nginx configuration reload, certificate verification +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The staging site is showing a certificate error — the browser +is refusing to load it at all. Phil has a prospect demo on this environment tomorrow +at 9am. We need this fixed today." + +**Clue Trail:** +- `openssl s_client -connect staging.axiomworks.internal:443 &1 | grep -i expire` + — certificate expired 14 days ago +- `certbot certificates` — cert present, not renewed +- `systemctl status certbot.timer` — inactive, disabled +- `journalctl -u certbot --since "90 days ago"` — renewal failed 60 days ago + (HTTP challenge permission error); timer was disabled manually the same day + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Runs `certbot renew`, re-enables and starts +`certbot.timer`, reloads nginx, verifies new cert expiry with openssl, confirms +staging site loads without browser warning. `trust_delta: +2`. +Flags: `hermes_certbot_healthy`. Follow-up ticket: T008. + +Branch 2 — Renew without timer (priority 50): Renews cert but doesn't restore the +timer. Valid now; expires again in 90 days without action. `trust_delta: +0.5`. +Flags: `hermes_certbot_fragile`. Follow-up incident: I006 (cert expires again in +Phase 3). + +Branch 3 — Self-signed (priority 10): Generates self-signed cert, nginx configured +to use it. Connection is encrypted; browser still warns. `trust_delta: -1`. +Flags: `hermes_self_signed_cert`. Phil's demo shows a security warning. + +**Hidden Hook:** `journalctl -u certbot --since "90 days ago"` contains the failure +entry — permission error. Immediately after the failure, in the same journalctl +window, is an entry showing the timer was disabled by a manual `systemctl disable` +command from a root session. The session timestamp predates the player. The timer +wasn't failed-and-stopped; it was deliberately turned off after the failure. +Sets `hook_certbot_deliberately_disabled`. Discoverable by: reading the journal +further back than strictly necessary to diagnose the current renewal failure. + +**Failure Conditions:** Cert not renewed; nginx not reloaded; timer still inactive. + +**Behavior Impact:** +- Clean branch: O+1 +- Renew without timer: O+0 +- Self-signed: R+1 +- Hook discovered: C+1 + +**Narrative Notes:** The timer being deliberately disabled — not just failed — is +a small data point in the pattern of things being intentionally changed. A player +who finds it has evidence of deliberate action, not accident. + +--- + +**Quest ID:** Q008 +**Title:** The Package That Wasn't +**Narrative Phase:** Normal Work +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** A deployment to hermes is blocked because a required package +is not available in the internal apt repository. The package was reportedly built +last week. Find why it isn't available and restore the deployment path. +**Linux Concepts:** `apt-cache`, `apt-get update`, internal apt repositories, +`reprepro`, repository metadata management, package pipeline between build and +deployment +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Deployment to staging is blocked. The apt install step fails +on a package that Nikhil says he built last week. Something's broken between the +build and the repo. Find it and fix it." + +**Clue Trail:** +- `apt-cache show axiomflow-workers` on hermes — package not found +- `/etc/apt/sources.list.d/axiomworks.list` — points to `http://vulcan.axiomworks.internal/repo/` +- SSH to vulcan: repository Packages index is stale — `reprepro` was not run + after last build +- Built `.deb` artifact at `/srv/packages/axiomflow-workers_2.4.1_amd64.deb` +- Fix: `reprepro includedeb stable /srv/packages/axiomflow-workers_2.4.1_amd64.deb`, + then `apt update` on hermes confirms package availability + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Adds package to repo correctly, updates metadata, +confirms `apt-cache show` succeeds on hermes, deployment unblocked. `trust_delta: +2`. +Flags: `vulcan_repo_healthy`. Follow-up ticket: T009. + +Branch 2 — Manual install (priority 40): Copies `.deb` to hermes and installs with +`dpkg -i`. Deployment works this time; repo still broken for next deployment. +`trust_delta: 0`. Flags: `vulcan_repo_bypassed`. Follow-up incident: I007 +(next deployment fails identically). + +Branch 3 — Escalate without investigating (priority 10): Reassigns to Nikhil +without investigation. `trust_delta: -1`. Ticket stalls. + +**Hidden Hook:** While browsing the repository's package history to find the missing +package, a player who looks at the full package list rather than just the missing +one will find an entry for `axiomflow-audit-bridge` — a package built 8 months ago +with no corresponding deployment record, no entry in any release manifest visible on +hermes, and no build job in the scheduler that corresponds to when it was built. +Sets `hook_audit_bridge_package`. Discoverable by: looking at the full repo package +list rather than only the specific package named in the ticket. + +**Failure Conditions:** hermes still cannot find the package; repo metadata left +in broken state. + +**Behavior Impact:** +- Clean branch: O+1 +- Manual install: O+0 +- Hook discovered: C+2 (requires going beyond the specific package named in ticket) + +**Narrative Notes:** The audit-bridge package is the most significant Phase 1 hook. +It's discoverable only if the player looks at what's around the thing they were +sent to find — real sysadmin behavior, but not required. A player who finds it has +their first glimpse of something that doesn't fit. + +--- + +### PHASE 2 — UNEASE (Q009–Q016) + +Tier 2. Partial hints. Tickets describe the symptom and indicate the general area +but do not specify the cause. Branch tolerance decreases — acceptable-fix incidents +now carry real operational weight. Hook density: 3 hooks across 8 quests, less +pointed than Phase 1. + +--- + +**Quest ID:** Q009 +**Title:** The Recurrence +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** hermes's nginx access log is filling up again. A Phase 1 +incident that was supposed to be fixed is recurring. Find why logrotate isn't +working and make it stable. +**Linux Concepts:** `logrotate` configuration, `/etc/logrotate.d/`, `logrotate -d` +(dry run), `cron` / `systemd-logrotate.timer`, `logrotate` status file +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The staging site is throwing errors again. Same thing as a +few weeks ago — it goes down, then someone fixes it, then it comes back. I was +told logrotate was set up. Why is it happening again?" + +**Clue Trail:** +- (If `hermes_logrotate_healthy` is set from Q002): the logrotate config is present + but the `logrotate.timer` or `cron.daily` entry that calls it is disabled — + config exists but nothing triggers it +- (If `hermes_logrotate_fragile` is set from Q002): logrotate was never restored; + this is the recurrence +- Either way: `systemctl status logrotate.timer` shows disabled; or `ls /etc/cron.daily/logrotate` + shows the file is missing/not executable +- Log is filling again; nginx error is the same + +**Solution Branches:** + +Branch 1 — Root cause (priority 100): Player diagnoses the trigger failure +(timer disabled or cron entry missing), restores the trigger, verifies logrotate +runs correctly on next schedule, confirms log rotation is active. `trust_delta: +2`. +Flags: `hermes_logrotate_stable`. Follow-up ticket: T010. + +Branch 2 — Config only (priority 50): Player restores or confirms the logrotate +config but doesn't check that anything calls it. Disk is cleared manually again. +`trust_delta: +0.5`. Flags: `hermes_logrotate_still_fragile`. Follow-up incident: +I008 (recurs again). + +**No hidden hook** in this quest. The recurrence itself is the unease signal — not +every quest in Phase 2 has a hook. + +**Failure Conditions:** nginx still down; disk not cleared; trigger still inactive. + +**Behavior Impact:** +- Root cause: O+1 +- Config only: O+0 + +--- + +**Quest ID:** Q010 +**Title:** Someone Changed Something +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Priya flagged an nginx configuration on hermes that doesn't +match the last known-good state. Find what changed and restore correct configuration. +**Linux Concepts:** `diff`, config file comparison, nginx config structure +(`/etc/nginx/`), `nginx -t`, `git diff` or backup comparison, file mtime +inspection (`stat`) +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Priya found an nginx config that doesn't match the backed-up +state. I don't have a change ticket for it. Go look at what's different and tell me +if it matters." + +**Clue Trail:** +- Backup exists at `/etc/nginx/.bak/` (or Marcus provides a hash reference) +- `diff -r /etc/nginx /etc/nginx/.bak/` reveals two differences: + 1. `server_tokens off;` has been removed from the main config (nginx version + now visible in HTTP headers) + 2. A `location /internal-api/` block added to a site config, proxying requests + to `127.0.0.1:9301` — a port with nothing listening + +**Solution Branches:** + +Branch 1 — Both issues (priority 100): Player identifies both changes, restores +`server_tokens off;`, removes or quarantines the `/internal-api/` block, runs +`nginx -t`, reloads nginx, documents both changes with mtimes. `trust_delta: +2`. +Flags: `hermes_nginx_config_audited`. Follow-up ticket: T011. + +Branch 2 — Token only (priority 50): Restores `server_tokens off;` but misses +the proxy block. `trust_delta: +0.5`. Flags: `hermes_nginx_proxy_block_present`. +Follow-up incident: I009 (Priya finds the block in next audit). + +Branch 3 — No action (priority 10): Reports config looks acceptable. `trust_delta: -1`. +Priya's review flags both items. + +**Hidden Hook:** The proxy block for `/internal-api/` points to port 9301 with +nothing currently listening — but the port number itself, and the path name, will +echo in later anomalies for a player who remembers it. Sets +`hook_nginx_internal_api_block`. Discoverable by: doing a thorough diff rather +than checking only the obvious item. + +**Behavior Impact:** +- Both issues found: O+1 +- Token only: O+0 +- Hook discovered: C+1 (remembering the port number is the payoff later) + +--- + +**Quest ID:** Q011 +**Title:** The Service Account +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** The `pipeline-svc` service account on vulcan has more sudo +privileges than its role requires. Scope it to least privilege. +**Linux Concepts:** `sudo -l`, `/etc/sudoers`, `visudo`, `/etc/sudoers.d/`, +least privilege principle, testing sudo with specific commands +**Systems Used:** build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "James's privilege audit shows `pipeline-svc` on the build +machine has `NOPASSWD: ALL`. That account runs the build pipeline. It should +only be able to restart specific services. Bring it into scope." + +**Clue Trail:** +- `sudo -l -U pipeline-svc` — `(ALL) NOPASSWD: ALL` +- `/etc/sudoers.d/pipeline-svc` — the blanket grant, separate file +- Reviewing what the account actually needs: `systemctl restart axiomflow-build` + and `systemctl restart axiomflow-timer` +- Correct fix: replace `ALL` with specific command paths in sudoers.d + +**Solution Branches:** + +Branch 1 — Precise scope (priority 100): Replaces the blanket grant with +`NOPASSWD: /bin/systemctl restart axiomflow-build, /bin/systemctl restart axiomflow-timer`, +verifies with `sudo -l`, tests that the service can still restart correctly. +`trust_delta: +2`. Flags: `vulcan_pipeline_svc_scoped`. Follow-up ticket: T012. + +Branch 2 — Broader scope (priority 50): Reduces from ALL but grants more than +needed (e.g., `NOPASSWD: /bin/systemctl`). Better; not least privilege. `trust_delta: +0.5`. +Priya notes improvement but flags remaining exposure. + +Branch 3 — Remove sudo entirely (priority 20): Removes all sudo. Service account +can no longer restart services; build pipeline breaks. `trust_delta: -2`. +Follow-up incident: build failures within the hour. + +**Hidden Hook:** The comment at the top of `/etc/sudoers.d/pipeline-svc` reads: +`# Temp grant per INT-0194 — DH 2023-11`. The ticket number references an internal +system the player cannot access. The initials `DH` — same initials as in Q006's +jbenton notes — don't correspond to any current employee. Sets `hook_dh_sudo_grant`. +Discoverable by: reading the sudoers file rather than just acting on the grant. + +**Failure Conditions:** Sudoers syntax error (should use `visudo`); service can +no longer function; broader access introduced. + +**Behavior Impact:** +- Precise scope: O+1 +- Remove sudo: R+1 +- Hook discovered: C+1 (connects to Q006's DH initials for players who found that) + +--- + +**Quest ID:** Q012 +**Title:** Memory Leak +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The AxiomFlow application on hermes is crashing every few +hours due to out-of-memory events. Identify the cause and implement a fix that +addresses the root problem. +**Linux Concepts:** `free -h`, `top`, `htop`, `/proc/meminfo`, zombie processes +(`ps aux` state column), cron job inspection, Python process management, +systemd service memory limits +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The app keeps going down — every three or four hours it just +dies and restarts. Dave said he's been getting logged out mid-session. The restart +is automatic so customers haven't called yet, but they will." + +**Clue Trail:** +- `journalctl -u axiomflow` — OOM kill events every 3–4 hours +- `ps aux` during an OOM interval — many `axiomflow-report-gen` processes with + state `Z` (zombie) +- `/etc/cron.d/report-gen` — runs `axiomflow-report-gen` every 30 minutes +- The script is a Python process that forks but never calls `wait()` — zombies + accumulate and consume PID space, the parent's memory grows +- Fix: correct the script (add `subprocess.wait()` or use `subprocess.run()`) — + or constrain with systemd service limits (acceptable but not root-cause) + +**Solution Branches:** + +Branch 1 — Root cause (priority 100): Identifies the zombie accumulation from the +cron script, corrects the Python subprocess handling, confirms clean process table +after next run. `trust_delta: +2`. Flags: `hermes_report_gen_clean`. Follow-up ticket: T013. + +Branch 2 — Service limit (priority 60): Adds `MemoryMax` and `Restart=on-failure` +to the axiomflow service unit. Crashes are now bounded; zombies still accumulate but +are contained. `trust_delta: +0.5`. Flags: `hermes_app_restart_policy`. + +Branch 3 — Force-kill cron (priority 20): Adds a cron job that kills all +`axiomflow-report-gen` processes every 30 minutes. Works until a report is +mid-execution when killed. `trust_delta: -1`. Flags: `hermes_report_gen_force_killed`. + +**No hidden hook** in this quest. The technical trail is the whole story. + +**Failure Conditions:** OOM events continue; player introduces new instability. + +**Behavior Impact:** +- Root cause: O+1 +- Force-kill: R+1 + +--- + +**Quest ID:** Q013 +**Title:** The Baseline Check +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Priya's end-of-month security checklist asks the player to +audit their workstation against the company baseline: open ports, running services, +active accounts, home directory permissions. Document deviations. +**Linux Concepts:** `ss -tlnp`, `systemctl list-units --type=service`, `getent passwd`, +`ls -la ~`, `umask`, reading and comparing against a baseline document +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "End of your first month. Standard workstation audit: I've +attached the baseline checklist. Open ports, running services, account list, home +directory permissions. Document what you find. Flag anything that doesn't match." + +**Clue Trail:** +- Most findings are normal: expected services, expected ports +- One service is running but not on the baseline checklist: `axiomworks-telemetry` +- `systemctl status axiomworks-telemetry` — running, enabled, binary at + `/usr/local/bin/axiomworks-telemetry` +- `ss -tlnp` or `netstat -tlnp` — the telemetry service connects outbound (not + shown in `ss` for listening ports but visible in `netstat -anp` or `/proc`) + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Documents all deviations including the +telemetry service; investigates what the service is (service unit file contents, +binary provenance, any logs); reports complete findings. `trust_delta: +2`. +Flags: `workstation_audit_complete`. Follow-up ticket: T014. + +Branch 2 — Checklist-only (priority 50): Completes the audit against the checklist +but marks the telemetry service as "review later — may be legitimate." +`trust_delta: +0.5`. Priya follows up. + +Branch 3 — Disable to clean (priority 20): Disables the telemetry service without +investigating or reporting it. Service gone; unknown what it was doing. +`trust_delta: 0`. Flags: `workstation_telemetry_disabled_silently`. S+1. + +**Hidden Hook:** The telemetry service unit file (`/etc/systemd/system/axiomworks-telemetry.service`) +has an `ExecStart` line pointing to the binary, and the unit file has a comment line +at the top: `# deployed by pipeline — INT-0194`. The same internal ticket number +from Q011's sudoers comment. Sets `hook_telemetry_ticket_INT0194`. Discoverable by: +reading the service unit file as part of investigating what the service is. + +**Failure Conditions:** Audit incomplete; player creates instability while investigating. + +**Behavior Impact:** +- Thorough: O+1 +- Disable silently: S+1, R+1 +- Hook discovered: C+2 (connects INT-0194 across two quests — DH's ticket number) + +--- + +**Quest ID:** Q014 +**Title:** Rollback +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** A deployment to hermes this afternoon broke user +authentication in the staging application. Roll back to the previous known-good +package version and prevent automatic re-upgrade. +**Linux Concepts:** `apt-cache policy`, `apt install =`, `apt-mark hold`, +package version pinning, deployment rollback procedure +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "The deployment this afternoon broke login — users can +authenticate but are immediately logged out. Phil has a customer using this +environment tomorrow. I need it rolled back now." + +**Clue Trail:** +- `apt-cache policy axiomflow-workers` — current version installed 3 hours ago +- Previous version available in the internal repo cache +- The regression is in session management — a code issue; infrastructure can't + fix the code, only roll back the package +- `apt install axiomflow-workers=2.4.0` installs prior version +- `apt-mark hold axiomflow-workers` prevents re-upgrade + +**Solution Branches:** + +Branch 1 — Rollback with hold (priority 100): Installs 2.4.0, holds the package, +confirms auth works, notifies Sarah and notes the hold is in place. `trust_delta: +2`. +Flags: `hermes_axiomflow_held`. Follow-up ticket: T015. + +Branch 2 — Rollback without hold (priority 50): Installs 2.4.0, no hold. Auto- +upgrade will re-break it on next run. `trust_delta: +0.5`. Flags: +`hermes_axiomflow_rolled_back`. Follow-up incident: I010 (auto-upgrade re-installs +2.4.1 overnight). + +Branch 3 — Forward fix attempt (priority 10): Player attempts to diagnose and fix +the code issue rather than rolling back. Outside scope; fails. `trust_delta: -1`. + +**Hidden Hook:** `apt-cache showpkg axiomflow-workers` on vulcan shows the 2.4.1 +build timestamp: 3:12am — outside the scheduled build window. The same off-schedule +time pattern as the signing step removal and the audit-bridge build. Sets +`hook_2_4_1_off_schedule_build`. Discoverable by: looking at the build machine's +package metadata while researching what version to roll back to. + +**Failure Conditions:** Auth still broken; hold not applied; player introduced +new problems. + +**Behavior Impact:** +- Rollback with hold: O+1 +- Rollback without hold: O+0 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q015 +**Title:** The Quiet Cron +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Marcus has asked for a cron audit on vulcan: list all +scheduled jobs, attribute each to a service or owner, and flag anything that +can't be attributed. +**Linux Concepts:** `crontab -l` (per-user and system), `/etc/cron.d/`, +`/etc/cron.daily/`, `/etc/cron.weekly/`, cron syntax, correlating jobs to +services or owners +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Routine cron audit on vulcan. List everything that's +scheduled — root crontab, system crontab, all of cron.d. I want to know who +owns each job and whether it still makes sense. Anything you can't attribute, +flag it." + +**Clue Trail:** +- `crontab -l` for root and `pipeline-svc` — most jobs are attributable +- `/etc/cron.d/` directory — standard entries plus one named `axiomworks-collect` +- `axiomworks-collect` job runs at 2:57am; command: `/usr/local/bin/axiomworks-collect` +- The binary `/usr/local/bin/axiomworks-collect` exists and is executable +- No ticket, no documentation comment in the cron file itself, no recent entry + in any change log + +**Solution Branches:** + +Branch 1 — Thorough, with investigation (priority 100): Player lists all jobs, +attributes each, and for `axiomworks-collect`: runs `file` and `strings` on the +binary to understand what it does before flagging it — the binary name is +suggestive and a thorough audit would check it. Submits complete report including +what the binary calls. `trust_delta: +2`. Flags: `axiomworks_collect_cron_flagged`. +Follow-up ticket: T016. + +Branch 2 — Listed but not investigated (priority 60): Player lists all jobs, +flags `axiomworks-collect` as unattributed, but does not inspect the binary. +Report is honest but shallow. `trust_delta: +1`. Flags: `axiomworks_collect_noted`. + +Branch 3 — Incomplete list (priority 10): Player misses entries. Marcus follows +up. `trust_delta: -1`. + +**Hidden Hook:** Running `strings /usr/local/bin/axiomworks-collect` or +`ldd /usr/local/bin/axiomworks-collect` and checking its network behavior (or simply +reading any log it writes, if one exists) reveals it connects to an internal address. +The binary name and the ticket number in its help text — `INT-0194` — connects it +to the same ticket number from Q011 and Q013. Sets `hook_collect_binary_INT0194`. +The hook is only set in Branch 1 (player inspected the binary). In Branch 2, the +job is noted but not confirmed. Discoverable by: going one step further than the +ticket requires — investigating what an unattributed job actually does. + +**Failure Conditions:** Cron audit submitted without flagging unattributed jobs. + +**Behavior Impact:** +- Branch 1: O+1, C+2 (the INT-0194 connection is now three sightings) +- Branch 2: O+0 +- Hook discovered: C+2 (already in Branch 1 impact) + +--- + +**Quest ID:** Q016 +**Title:** The Door Left Open +**Narrative Phase:** Unease +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** A security scan found port 8080 on hermes reachable from +outside the office network. That port runs the AxiomFlow admin panel. Restrict +it to internal-only access and confirm. +**Linux Concepts:** `ufw`, `iptables`, `ss -tlnp`, nginx access control by IP +(`allow`/`deny`), CIDR notation, defense-in-depth (firewall + application layer) +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Scan from this morning. Port 8080 on hermes is reachable +externally. That's the admin panel. It should be internal-only — restrict to +10.0.0.0/8. Confirm when done." + +**Clue Trail:** +- `ss -tlnp | grep 8080` — service listening on `0.0.0.0:8080` +- `ufw status` — no restriction on port 8080 +- Fix options: `ufw` rule restricting source to 10.0.0.0/8, or nginx `allow 10.0.0.0/8; deny all;` + in the 8080 server block, or both + +**Solution Branches:** + +Branch 1 — Defense in depth (priority 100): Restricts at both firewall and nginx +layer, confirms external access blocked, internal access works, reports to Priya. +`trust_delta: +2`. Flags: `hermes_admin_port_secured`. Follow-up ticket: T017. + +Branch 2 — Single layer (priority 60): Restricts at one layer only. Better. +Not layered. `trust_delta: +1`. Priya notes the single-layer approach. + +Branch 3 — Block entirely (priority 20): Blocks port for all traffic. Admin +panel inaccessible to everyone including internal users. `trust_delta: -1`. + +**No hidden hook** in this quest. The technical task is clean. + +**Failure Conditions:** Port still accessible externally; internal access broken; +ufw rules in conflict. + +**Behavior Impact:** +- Defense in depth: O+1 +- Block entirely: R+1 + +--- + +### PHASE 3 — SUSPICION (Q017–Q024) + +Tier 2. Minimal guidance. Tickets state the problem, not the location. The clue +trail requires following evidence without direction. Branch tolerance is stricter. +Hook density increases: 5 hooks across 8 quests. + +--- + +**Quest ID:** Q017 +**Title:** Access Without a Ticket +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** workstation +**Primary Objective:** hermes's auth log shows SSH connections from an IP address +not in the asset inventory. Investigate: what account was used, what activity +occurred, is access still happening. +**Linux Concepts:** `/var/log/auth.log`, `grep` and log filtering, `last`, `who`, +`lastlog`, SSH session forensics, correlating authentication events with known assets +**Systems Used:** web\_server, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Auth log review surfaced connections to hermes from 10.0.0.47 +over the past 90 days. That address is not in our asset inventory. I want to know: +what account, any evidence of what was done, and whether it's ongoing." + +**Clue Trail:** +- `/var/log/auth.log` on hermes — `Accepted publickey for pipeline-svc from 10.0.0.47` +- Sessions: short duration, irregular hours (2–4am), spanning 6 months +- `~pipeline-svc/.bash_history` — disabled or empty (shell configured with `HISTSIZE=0`) +- DNS lookup for 10.0.0.47 — no reverse record; DHCP table has no entry +- `last pipeline-svc` — confirms session dates and source IP + +**Solution Branches:** + +Branch 1 — Full documentation (priority 100): Player documents all sessions (dates, +times, duration), notes history was disabled, traces what can be traced, reports that +10.0.0.47 is unknown and appears in prior audit anomalies if the player has accumulated +hooks. `trust_delta: +3`. Flags: `unknown_ip_auth_documented`. Priya: "Thank you. +I'll look into that address." Follow-up ticket: T018. + +Branch 2 — Partial (priority 50): Player documents the sessions but cannot or does +not trace the IP or connect it to prior findings. `trust_delta: +1`. +Flags: `unknown_ip_auth_noted`. + +Branch 3 — Block and close (priority 20): Player blocks the IP at the firewall and +closes the ticket without full investigation. Access stops; record is thin. +`trust_delta: 0`. Flags: `unknown_ip_blocked_uninvestigated`. S+1. + +**Hidden Hook:** The `pipeline-svc` account was the one from Q011 — overly broad +sudo that the player (may have) scoped down. If `hook_dh_sudo_grant` was set, a +player connecting the dots now knows that whoever had access to that account from +10.0.0.47 previously had `NOPASSWD: ALL`. Sets `hook_pipeline_svc_external_sessions`. +This is not a new discoverable artifact — it's a cross-reference that sets a flag +if both the Q011 hook and the Q017 investigation are present. + +**Failure Conditions:** Player doesn't investigate before taking action; evidence +destroyed before documented. + +**Behavior Impact:** +- Full documentation: O+1, C+2 (cross-reference with prior hooks) +- Block and close: S+1, R+1 +- Cross-reference hook: C+2 (only if `hook_dh_sudo_grant` was set; the connection + is the behavior, not finding a new artifact) + +--- + +**Quest ID:** Q018 +**Title:** The User Who Wasn't Onboarded +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** A user account exists on both ares and hermes with no +corresponding HR record. Investigate the account's history and scope before removal. +**Linux Concepts:** Cross-host account audit, `last` and `lastlog`, `find / -user`, +`id`, account removal across multiple hosts with `userdel` +**Systems Used:** workstation, web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Access review surfaced account `rford` on both the workstation +and the web server. HR has no record of this person. The account has had recent +activity on hermes. Full audit before removal." + +**Clue Trail:** +- Account on both machines; `last rford` on hermes shows login 3 weeks ago +- Files owned by `rford` on hermes: `find /var/www /etc -user rford` — one result: + `/var/www/axiomworks/config/.rford_run` — a shell script +- The script, if read, runs a data aggregation command and outputs to a temp directory +- The account's group memberships include `www-data` — more access than a typical + employee account +- No ticket creating the account on either machine + +**Solution Branches:** + +Branch 1 — Full audit with archive (priority 100): Player checks activity on both +hosts, reads and archives the found file, checks group memberships, removes account +from both machines, documents fully. `trust_delta: +3`. Flags: `rford_account_removed_thorough`. +Follow-up ticket: T019. + +Branch 2 — Remove without reading (priority 40): Removes account from both machines +without examining files. Evidence lost. `trust_delta: +1`. Priya asks for the files; +they're gone. Flags: `rford_account_removed_fast`. + +Branch 3 — Workstation only (priority 10): Removes from workstation, misses hermes. +`trust_delta: -1`. Hermes account remains active. + +**Hidden Hook:** The `.rford_run` script, if read before archiving, outputs a +data aggregation of AxiomFlow session logs and sends it to a temp directory with +a timestamp. The script has a comment: `# collect step — called by INT-0194 +automation`. Three previous hooks have referenced INT-0194. Sets +`hook_rford_script_INT0194`. Discoverable by: reading the file before archiving, +which proper archival practice would do. + +**Failure Conditions:** Evidence destroyed without reading; account not removed from +both machines; player removes account with active processes still running. + +**Behavior Impact:** +- Full audit: O+1 +- Read the file: C+3 (INT-0194 is now four references — pattern is now clear to + any player who has been collecting these) +- Remove without reading: R+2 + +--- + +**Quest ID:** Q019 +**Title:** The Diff That Didn't Match +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** A deployment validation check is failing because the installed +package on hermes doesn't match the expected checksum. Investigate why the package +differs from the tagged source. +**Linux Concepts:** `dpkg-deb -x`, `diff -r`, `md5sum` / `sha256sum`, package +integrity verification, comparing installed vs. source artifacts +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The post-deploy checksum check on hermes failed. The installed +axiomflow-workers doesn't match the tagged release checksum. Nikhil says he didn't +change anything. Find what's different and where the difference came from." + +**Clue Trail:** +- `dpkg-deb -x /srv/packages/axiomflow-workers_2.4.2_amd64.deb /tmp/pkg-extract` +- `diff -r /tmp/pkg-extract /srv/src/axiomflow-workers-2.4.2/` — two files differ +- The modified files are in the session logging module; they add a secondary logging + call to a local socket +- The modification is not in the tagged source commit; it was added to the build + environment itself — a file in the build script directory that patches sources + before compilation + +**Solution Branches:** + +Branch 1 — Full forensics (priority 100): Player unpacks the package, diffs against +source, identifies the modified files, traces the modification to the build environment +patch file, documents the full chain of custody. `trust_delta: +3`. Flags: +`package_modification_documented`. Marcus: "Keep this to yourself and Priya for now. +I mean that." Follow-up ticket: T020. + +Branch 2 — Diff identified, source not traced (priority 50): Player confirms the +diff exists but characterizes it as a build environment artifact without tracing the +cause. `trust_delta: +1`. Flags: `package_diff_found`. + +Branch 3 — Confirm and escalate without analysis (priority 20): Player confirms +something is different and escalates without characterizing what. `trust_delta: 0`. + +**Hidden Hook:** The patch file in the build environment that injects the modification +has a comment at the top: `# INT-0194 session capture — do not remove`. The fifth +reference to the same internal ticket number. Sets `hook_build_patch_INT0194`. +Discoverable by: tracing the modification source, which Branch 1 requires. + +**Failure Conditions:** Player attempts to remove the patch without consulting Marcus +first. Risk+3 and Marcus's response is cooler — removing evidence before it's +documented is a problem. + +**Behavior Impact:** +- Full forensics: O+1, C+3 +- Remove patch unilaterally: R+3, S+1 +- Hook discovered: C+3 (already in full-forensics impact) + +**Narrative Notes:** This is the moment the INT-0194 pattern resolves for a thorough +player. Five references across different systems, all pointing to the same internal +ticket ID. Marcus's response is his quietest and most deliberate. He says less than +normal, which means more. + +--- + +**Quest ID:** Q020 +**Title:** Pressure From Above +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Kowalski has requested a written access and change summary +for the past 30 days before a scheduled status meeting. Compile it accurately from +system logs. +**Linux Concepts:** `journalctl`, `last`, `/var/log/auth.log`, log filtering by +date range, compiling a change record from system state evidence +**Systems Used:** workstation +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Before Thursday's check-in I need the following in writing: +access grants issued in the past 30 days, configuration changes to production or +staging, and open incidents. Pull it from the logs. End of day Wednesday." + +**Clue Trail:** +- Player reads auth logs, systemd journals, and any change log Marcus maintains +- Accurate log reading requires: `journalctl --since "30 days ago"`, `last`, reviewing + Priya's shift review emails for documented changes +- The technical work is real — log compilation at this scale requires knowing the + right tools + +**Solution Branches:** + +Branch 1 — Complete and accurate (priority 100): Player includes all documented +activity including any anomalies that surfaced through tickets. `trust_delta: +2`. +Flags: `kowalski_report_accurate`. Marcus sends a brief private note: "Good call +being complete." Follow-up ticket: T021. + +Branch 2 — Accurate but narrow (priority 60): Report includes only ticket-related +activity; omits anomalies that came up during investigation. Accurate; incomplete. +`trust_delta: +1`. Flags: `kowalski_report_narrow`. + +Branch 3 — Omits or sanitizes (priority 10): Player downplays or omits anomalies +that would raise questions. `trust_delta: -2`. Flags: `kowalski_report_sanitized`. +S+3 (Priya will eventually compare this against log evidence and notice the gaps). + +**Failure Conditions:** Report submitted without log evidence; report materially +inaccurate. + +**Behavior Impact:** +- Complete: O+2 +- Sanitized: R+3, S+3 + +--- + +**Quest ID:** Q021 +**Title:** The Backup That Wasn't Tested +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** The last documented backup restore test for hermes is 14 +months old. Perform a restore test of a non-critical service config directory, +document the procedure, and report the result honestly. +**Linux Concepts:** `rsync`, `tar`, backup archive integrity, `sha256sum` verification, +restore testing to a non-production location, documenting backup procedures +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Our backup procedure calls for a quarterly restore test. +The last documented test is 14 months old. Pick a non-critical config directory +on hermes, verify the backup can be restored to a test location, and document +the steps and the result. Don't touch production paths." + +**Clue Trail:** +- Backups at `/srv/backups/hermes/` on vulcan — recent archive looks intact +- Checksum file present; most checksums match +- One archive from 5 months ago: checksum does not match a recalculated value + — the archive file was modified after initial creation (timestamps show a + modification date after the archive date) +- Recent archive (3 days old) restores cleanly to `/tmp/restore-test/` + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Player identifies the corrupted older archive +(and notes the timestamp anomaly), successfully restores from the recent clean +archive, documents both findings — the corruption and the successful restore. +`trust_delta: +2`. Flags: `backup_restore_tested`. Follow-up ticket: T022. + +Branch 2 — Restore only (priority 50): Player tests the restore successfully but +misses the corrupted archive. Report is optimistic. `trust_delta: +1`. +Flags: `backup_restore_partial_test`. + +Branch 3 — False report (priority 10): Player documents a successful test without +actually running it. `trust_delta: -3`. Flags: `backup_test_falsified`. S+2. +Priya's next audit will check against log evidence. + +**Hidden Hook:** The archive with the checksum mismatch was last modified at 3:17am — +the same 3am activity window as the off-schedule builds and the cron job. The archive's +internal timestamp (from `tar -tv`) shows the files were correct at backup time; the +outer archive was replaced later. Sets `hook_backup_archive_tampered`. Discoverable +by: noticing the timestamp anomaly on the corrupted archive, which requires checking +modification timestamps on the archive files themselves. + +**Failure Conditions:** Restore test not actually run; player modifies production +paths; report falsified. + +**Behavior Impact:** +- Thorough: O+1 +- False report: R+4, S+2 +- Hook discovered: C+2 + +--- + +**Quest ID:** Q022 +**Title:** The Firewall Rule +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** A firewall rule on the workstation allows outbound connections +to a specific internal IP on a specific port. No ticket references this rule. +Investigate whether it is legitimate; remove it if not. +**Linux Concepts:** `ufw status numbered`, `iptables -L -n`, firewall rule audit, +rule provenance (when was it added, can it be traced), `ufw delete` +**Systems Used:** workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Firewall audit on the workstation found a rule allowing outbound +to 10.0.0.47:9301. No ticket references it. I need: when was it added, do you know +what that address is, and a recommendation." + +**Clue Trail:** +- `ufw status numbered` — rule present, destination 10.0.0.47 port 9301 +- Rule creation date cannot be directly queried from ufw; `journalctl` shows when + ufw last reloaded; system logs from that period may show the rule being added +- 10.0.0.47 appears in Q017's auth log investigation; 9301 appeared in Q010's nginx + proxy block — for a player who has been paying attention + +**Solution Branches:** + +Branch 1 — Full cross-reference (priority 100): Player connects this rule to +prior findings (10.0.0.47 from auth logs; port 9301 from nginx config), explains +the connection, recommends removal, removes the rule with `ufw delete`, reports. +`trust_delta: +3`. Flags: `firewall_rule_9301_removed`. Priya: "That matches what +I've been seeing." Follow-up ticket: T023. + +Branch 2 — Remove without context (priority 50): Player removes the rule but +doesn't connect it to prior findings. `trust_delta: +1`. Flags: `firewall_rule_removed`. + +Branch 3 — Keep with note (priority 20): Documents the rule as "unverified" and +leaves it. `trust_delta: 0`. + +**Failure Conditions:** Rule not assessed; player introduces new firewall problems. + +**Behavior Impact:** +- Full cross-reference: O+1, C+3 (this is the convergence point for three prior data threads) +- Remove without context: O+0 +- Hook: no new hook — the cross-reference IS the payoff for accumulated hooks + +--- + +**Quest ID:** Q023 +**Title:** Overnight Changes +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Files on hermes were modified at 3am on Thursday with no +corresponding change ticket. Find what changed and assess whether to revert. +**Linux Concepts:** `find / -newer `, `stat`, file modification +timestamps, config file comparison, `git diff` if applicable, change ticket +correlation +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Something touched files on hermes at 3am Thursday. The +backup ran at 2am and files weren't changed then. Find what changed and tell +me if we need to revert." + +**Clue Trail:** +- `find /etc /var/www -newer /var/log/axiomflow/nightly-backup.log -not -newer /var/log/nginx/access.log` + narrows to files modified in the 3am window +- Modified files: + 1. `/var/www/axiomworks/config/app.config` — database connection string now + points to a secondary host + 2. `/etc/nginx/conf.d/upstream.conf` — upstream block added for the same host +- The secondary host referenced is not in the known asset inventory + +**Solution Branches:** + +Branch 1 — Revert and document (priority 100): Identifies both files, reverts +both to known-good state (from the backup taken just before the modifications), +documents the original changes with file evidence, reports fully. `trust_delta: +3`. +Flags: `hermes_overnight_changes_reverted`. Marcus: "Revert was right. Don't +touch anything else." Follow-up ticket: T024. + +Branch 2 — Document without revert (priority 50): Identifies changes and reports +but leaves them active. `trust_delta: +1`. Changes remain. + +Branch 3 — Revert without documenting (priority 20): Reverts but doesn't record +what was changed. `trust_delta: +0.5`. + +**Hidden Hook:** The secondary host in both modified files is at `10.0.1.15` — a +different IP from 10.0.0.47. Two machines. Sets `hook_second_host_10_0_1_15`. +Discoverable by: recording the specific values in the modified files, which proper +documentation requires. + +**Failure Conditions:** Changes not assessed; player reverts production paths +without confirming impact; modifications left active without escalation. + +**Behavior Impact:** +- Revert and document: O+1, C+1 (new IP is a new data point) +- Revert without documenting: O+0 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q024 +**Title:** The Audit Window +**Narrative Phase:** Suspicion +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Priya is conducting a formal access audit. Verify current +access levels and service account configurations on all three machines against +the documented expected state. +**Linux Concepts:** Cross-host audit, `getent passwd`, `sudo -l`, `groups`, SSH +`authorized_keys` review, service account scope verification +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Formal audit. Every service account across all three machines: +privileges, group memberships, sudo grants, SSH keys in authorized_keys. Compare +against the baseline I've attached. Flag everything that doesn't match." + +**Clue Trail:** +- Audit covers all three machines systematically +- Findings depend on what the player has fixed and what they've left open +- Dale's deploy key on hermes (Q001/Q003 hook) — if not removed, it's a live finding +- `pipeline-svc` sudo scope — if Q011 was only partially fixed, it appears here +- `axiomworks-telemetry` service — if Q013 found it, it's in the player's record; + if not, it's a new finding here + +**Solution Branches:** + +Branch 1 — Thorough (priority 100): Player audits all three machines, identifies +every discrepancy, includes Dale's key if still present, submits complete cross- +referenced report. `trust_delta: +3`. Flags: `formal_audit_complete`. Priya: "This +is complete. I'll schedule a follow-up with Marcus." Follow-up ticket: T025. + +Branch 2 — Partial (priority 50): Misses 1–2 findings. `trust_delta: +1`. Priya +follows up specifically on each gap. + +Branch 3 — Surface-level (priority 10): Misses most findings. `trust_delta: -1`. + +**No hidden hook** in this quest — the audit produces findings based on the world +state, not new anomalies. + +**Failure Conditions:** Audit submitted with material inaccuracies. + +**Behavior Impact:** +- Thorough: O+2 +- Dale's key found if not previously: C+1 + +--- + +### PHASE 4 — INVESTIGATION (Q025–Q032) + +Tier 3. Problem-solving only. Tickets state the problem, no location, no approach. +The player is expected to apply their full toolkit. Hook density: 3 hooks across +8 quests, each requiring cross-referencing prior findings. + +--- + +**Quest ID:** Q025 +**Title:** Who Owns the Key +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** workstation +**Primary Objective:** Following the formal audit, trace the origin of the Dale +SSH key in deploy-user's authorized_keys. When was it added, by what session, +and when was it last used. +**Linux Concepts:** `ssh-keygen -lf` (fingerprinting), `/var/log/auth.log` grep for +fingerprint, correlation with session timestamps, absence of key from official inventory +as a finding +**Systems Used:** web\_server, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "The key in deploy-user's authorized_keys that doesn't have +a current employee match. I need provenance: when added, what session, last used. +Don't remove it yet. Document first." + +**Clue Trail:** +- `ssh-keygen -lf /home/deploy-user/.ssh/authorized_keys` — fingerprint of the Dale key +- `grep /var/log/auth.log` on hermes — sessions that authenticated + with this key; last session 5 months ago +- The session that added the key: `/var/log/auth.log` doesn't show key addition, + but a root session from `10.0.0.47` at the right timestamp aligns (if Q017 was + investigated, the player can correlate) +- The key is not in any official key inventory document + +**Solution Branches:** + +Branch 1 — Full provenance (priority 100): Player fingerprints, traces sessions, +correlates add timestamp with known session data, notes the key's absence from +official inventory, produces a complete chain. `trust_delta: +3`. Flags: +`dale_key_provenance_documented`. Marcus sends a message outside normal ticket +channels — a Slack message, same terse voice, one sentence longer than usual. +Follow-up ticket: T026. + +Branch 2 — Sessions documented, source not traced (priority 50): Finds session +history but cannot attribute who added the key. `trust_delta: +1`. + +**Hidden Hook:** The most recent session authenticated with this key was on a +date that maps to a known incident — the same date hermes had an unexplained outage +6 months ago, visible in the nginx error logs. A player who correlates the auth log +date with the nginx error log from the same timeframe can connect Dale's last known +access to a specific event. Sets `hook_dale_key_last_session_incident_date`. +Discoverable by: cross-referencing auth log dates with nginx error log dates — not +required to complete the provenance chain, but available to a player who thinks to check. + +**Failure Conditions:** Player removes the key before documenting; Priya explicitly +said not to. + +**Behavior Impact:** +- Full provenance: O+1, C+2 +- Remove before documenting: R+3, S+2 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q026 +**Title:** The Build Chain +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Reconstruct the full build pipeline modification history +on vulcan for the past 12 months. Attribute each change to a person or session. +Flag any changes without a corresponding official release. +**Linux Concepts:** `git log`, `git diff`, `git blame`, file system timestamps, +bash history correlation, build script comparison, release note cross-reference +**Systems Used:** build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "I need a complete history of every change to the build scripts +on vulcan over the past year. Where you can, attribute each change to a person. +Cross-reference with release notes. Anything without a release: flag it." + +**Clue Trail:** +- Build scripts are in a git repository on vulcan +- `git log --all --oneline --since="1 year ago"` — full history +- Most commits: legitimate, attributed to Nikhil Sharma +- Three anomalous commits: + 1. Removal of `sign-package` step — committed by `pipeline-svc` account (not a person) + 2. Addition of the build-time patch file (`INT-0194` reference) — same `pipeline-svc` + commit + 3. A commit adding `axiomflow-audit-bridge` to the build target list — `pipeline-svc` +- None of these three have corresponding release notes + +**Solution Branches:** + +Branch 1 — Complete annotated history (priority 100): Player produces a full +timeline, attributes the three anomalous commits to the `pipeline-svc` service +account, notes the discrepancy between that account making commits and its stated +purpose (restart services only), flags all three as undocumented. `trust_delta: +3`. +Flags: `build_chain_audit_complete`. Follow-up ticket: T027. + +Branch 2 — Partial (priority 50): Covers legitimate changes, flags some but not +all anomalous ones. `trust_delta: +1`. + +**No hidden hook** in this quest — the findings are the point. + +**Failure Conditions:** Report submitted without flagging anomalous commits; +player modifies the git history. + +**Behavior Impact:** +- Complete: O+1, C+2 +- Modify git history: R+5 (destroying forensic evidence) + +--- + +**Quest ID:** Q027 +**Title:** Asset Inventory Reconciliation +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** workstation +**Primary Objective:** Reconcile the internal asset inventory against the actual +network — every host that should be on the network, verify it is; every host +that appears on the network, verify it is in the inventory. Document discrepancies. +**Linux Concepts:** `nmap` (host discovery), `arp -n`, `ping`, internal DNS +queries (`dig`, `host`), asset inventory document comparison, subnet scanning +**Systems Used:** build\_machine, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "I need the asset inventory reconciled against the actual +network. Scan the 10.0.0.0/24 range. Every host that responds: is it in the +inventory? Every host in the inventory: does it respond? Document every discrepancy." + +**Clue Trail:** +- `nmap -sn 10.0.0.0/24` from build\_machine — host discovery scan +- Known hosts respond as expected (ares, hermes, vulcan, and others from inventory) +- 10.0.0.47 responds — not in the inventory +- 10.0.1.15 responds — not in the inventory (new from Q023's hook for players + who found it, or a new discovery for those who didn't) +- Both have SSH open; 10.0.0.47 has an additional service on port 9301 +- DNS resolution returns nothing for either + +**Solution Branches:** + +Branch 1 — Complete reconciliation (priority 100): Player documents all hosts, +identifies both unknown hosts, notes the service on 9301 for 10.0.0.47, cross- +references with prior anomalies where relevant, submits a complete reconciliation +report. `trust_delta: +3`. Flags: `asset_inventory_reconciled`. Priya: "I'm going +to need to take this to Kowalski." Follow-up ticket: T028. + +Branch 2 — Partial reconciliation (priority 50): Documents inventory hosts, +finds 10.0.0.47 but misses 10.0.1.15 or vice versa. `trust_delta: +1`. + +Branch 3 — Probe the unknown hosts (priority 20): Player makes active connection +attempts to services on the unknown hosts beyond identification. `trust_delta: 0`. +R+3. Priya's next message: "I said reconcile, not probe." + +**Hidden Hook:** Running the full scan reveals that 10.0.0.47 and 10.0.1.15 have +identical SSH host key fingerprints — they are using the same host key, which +suggests they were provisioned from the same template. Sets +`hook_two_hosts_same_key`. Discoverable by: comparing the SSH fingerprints from +the nmap output or from `ssh-keyscan`, rather than just noting the IPs. + +**Failure Conditions:** Scan incomplete; player makes unauthorized connections; +report submitted with known gaps left undisclosed. + +**Behavior Impact:** +- Complete: O+1, C+2 +- Probe: R+3 +- Hook discovered: C+1 + +--- + +**Quest ID:** Q028 +**Title:** The Archive Restore +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** workstation +**Primary Objective:** A backup archive from 6 months ago is needed for a compliance +audit. Restore it to a staging location on the workstation and confirm its integrity. +The archive is from the previous sysadmin's final working week. +**Linux Concepts:** `tar` (extract, verify), `sha256sum`, archive integrity checking, +restore to non-production path, reading file metadata from within an archive +(`tar -tv`) +**Systems Used:** build\_machine, workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Compliance audit needs the working-directory archive from the +end of last year — it should be in the backup store on vulcan. Restore it to a staging +path on the workstation and confirm the contents are intact. Let me know what's in it." + +**Clue Trail:** +- Archive at `/srv/backups/workstation/wd-archive-YYYYMMDD.tar.gz` on vulcan +- `sha256sum` check — archive passes (this one is not the tampered one from Q021) +- `tar -xzf` to `/tmp/restore-staging/` on workstation — succeeds +- Contents: scripts, config fragments, a partial README text file +- The README is fragmentary — it's working notes, not a confession. It references + the INT-0194 deployment and contains a note: "bridge not logging correctly — + check port forwarding." The rest is infrastructure checklists + +**Solution Branches:** + +Branch 1 — Restore and full inventory (priority 100): Player restores the archive, +verifies integrity, inventories all contents (including reading the README), reports +to Marcus what's there. `trust_delta: +2`. Flags: `compliance_archive_restored`. +Marcus: "Right. Thank you." Follow-up ticket: T029. + +Branch 2 — Restore and integrity check only (priority 50): Verifies the archive +restores cleanly but doesn't inventory contents. `trust_delta: +1`. Marcus asks +what's in it. + +Branch 3 — Integrity failure reported (priority 20): Player incorrectly reports +the archive as corrupted without fully testing the restore. `trust_delta: -1`. + +**Hidden Hook:** The README fragment mentions INT-0194 and "port forwarding" — if the +player has been collecting the INT-0194 thread, this is the sixth reference. The +working notes also reference a host called `styx` in a routing context. Sets +`hook_archive_readme_INT0194` and `hook_styx_in_routing_context`. Discoverable by: +reading the README file, which properly inventorying the archive would do. + +**Failure Conditions:** Archive not restored; contents not verified; player runs any +scripts found in the archive. + +**Behavior Impact:** +- Full inventory: O+1 +- Run scripts from archive: R+4 (running unknown code from a previous sysadmin + is exactly the kind of reckless action that should trigger risk) +- Hook discovered: C+2 + +**Narrative Notes:** This is not "Marcus gives the player Dale's files and asks them +to investigate." It is a compliance archive restore with a legitimate operational +purpose. The player happens to find working notes inside it. The notes are fragmentary +and don't explain everything — they're field notes, not a plot summary. Marcus's +"what's in it" is a routine question after a restore, not an invitation to investigate. + +--- + +**Quest ID:** Q029 +**Title:** The Service That Doesn't Belong +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** A systemd service on hermes is running but is not listed in +any deployment manifest or change ticket. Audit what it does, whether it is +currently active, and produce a full service characterization. +**Linux Concepts:** `systemctl show`, `systemd-analyze`, service unit file anatomy, +`lsof`, `ss` for service network connections, `strace` basics, process ownership +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "James found a service on hermes that isn't in any deployment +record. Service name: `axiomflow-bridge`. I need a full characterization: what it +does, what it connects to, when it was installed. Don't stop it. Document first." + +**Clue Trail:** +- `systemctl show axiomflow-bridge` — unit file, state, runtime info +- Unit file at `/etc/systemd/system/axiomflow-bridge.service` — `ExecStart` points + to a binary; unit file has `INT-0194` in a comment +- `lsof -p ` — service has open connections to 10.0.0.47:9301 +- `ss -tp` — confirms active connection +- Binary at `/usr/local/bin/axiomflow-bridge` — a Go binary; `strings` output + shows internal API paths and the same INT-0194 reference in help text +- Installation date from package metadata or file `mtime` — matches the 3am + activity window + +**Solution Branches:** + +Branch 1 — Full characterization (priority 100): Player documents unit file, +binary provenance, network connections, installation date, cross-references with +INT-0194 and 10.0.0.47 from prior findings. `trust_delta: +3`. Flags: +`bridge_service_documented`. Priya: "This is consistent with what I've been +building. Don't stop it yet." Follow-up ticket: T030. + +Branch 2 — Partial (priority 50): Documents what the service is and that it +connects out, but doesn't trace the INT-0194 connection or installation date. +`trust_delta: +1`. + +Branch 3 — Stops the service (priority 10): Player stops the service despite +explicit instruction not to. `trust_delta: -2`. R+2. S+2. Priya: "I said document +first." + +**No additional hidden hook** — the quest itself is the hook resolution for INT-0194. + +**Failure Conditions:** Service stopped against instruction; characterization incomplete. + +**Behavior Impact:** +- Full characterization: O+1, C+3 (this is the operational confirmation of INT-0194) +- Stop the service: R+2, S+2 + +--- + +**Quest ID:** Q030 +**Title:** Keep the Lights On +**Narrative Phase:** Investigation +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** The production application on hermes is returning 502 errors. +Fix it. The investigation context is ongoing but the service still needs to run. +**Linux Concepts:** `systemctl`, nginx upstream configuration, application log +reading (`journalctl`, app logs), database connection strings, process restart +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "I know something is happening. I don't know what. But I have +paying customers on a system that is returning 502 errors and I need it running. +Whatever else is going on — please." + +**Clue Trail:** +- nginx upstream is timing out — `journalctl -u nginx` shows gateway timeout errors +- Application log shows it is failing to connect to the database +- `/var/www/axiomworks/config/app.config` — database connection string; check + whether it was modified (if Q023's revert was clean, the string is correct; if + not, it may point to the secondary host) +- Standalone root cause if Q023 was clean: the database service on the primary + host is not running — `systemctl status postgresql` shows it crashed overnight +- Fix: restart the database service (or correct the connection string if Q023 was + not fully resolved) + +**Solution Branches:** + +Branch 1 — Diagnose and fix (priority 100): Player reads nginx and app logs, +identifies the database connection failure, finds the cause (service down or +wrong connection string), applies the correct fix, confirms app is serving. +`trust_delta: +2`. Flags: `hermes_production_restored`. Sarah: "Thank you. +Seriously." Follow-up ticket: T031. + +Branch 2 — Service restart without diagnosis (priority 40): Player restarts the +app service without finding the root cause. App comes up temporarily; may fail +again. `trust_delta: 0`. + +**No hidden hook.** + +**Failure Conditions:** App still returning errors; player makes changes that +worsen the state. + +**Behavior Impact:** +- Diagnose and fix: O+2 (maintaining professional duty during investigation is + the behavior being measured) +- Restart without diagnosis: O+0 + +--- + +**Quest ID:** Q031 +**Title:** The Access Review +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Kowalski has initiated a formal privileged access review +for all accounts with elevated permissions. The player must compile an accurate +account of every privileged action taken across all three machines in the past +60 days. +**Linux Concepts:** `journalctl` with filters for privileged commands, `sudo` log +reading (`/var/log/auth.log`), audit log compilation, accurate self-reporting of +access history +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Given recent audit activity, we're conducting a standard +privileged access review. I need from you: every privileged command run in the +past 60 days, every system accessed with elevated permissions, and any temporary +access grants that were used. Standard procedure." + +**Clue Trail:** +- Player compiles from their own auth logs and sudo logs on each machine +- Technical skill required: `grep sudo /var/log/auth.log | grep `, + `journalctl _COMM=sudo` +- The review covers what the player actually did; accuracy is the objective + +**Solution Branches:** + +Branch 1 — Complete and accurate (priority 100): Player provides a complete, +honest account of all privileged actions, including any investigation-period +access that extended beyond their normal role. `trust_delta: +2`. Flags: +`access_review_passed`. S-2 (accurate self-reporting reduces suspicion). +Follow-up ticket: T032. + +Branch 2 — Accurate but minimal framing (priority 60): Report is factually +accurate but frames borderline actions favorably without explicitly misrepresenting +them. `trust_delta: +0.5`. Suspicion unchanged. + +Branch 3 — Omits significant actions (priority 10): Player leaves out actions that +would raise questions. `trust_delta: -3`. Flags: `access_review_incomplete`. S+4. +This is a major suspicion trigger. + +**Failure Conditions:** Material inaccuracies; privileged commands claimed that +don't match log evidence. + +**Behavior Impact:** +- Complete: O+3, S-2 +- Omit: R+3, S+4 + +--- + +**Quest ID:** Q032 +**Title:** Loose Ends +**Narrative Phase:** Investigation +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Before the situation moves to its next phase, Marcus wants +the infrastructure in a known and correct state. Remediate any outstanding +configuration issues on hermes and vulcan, and document the current state. +**Linux Concepts:** Synthesis — all concepts from the campaign applied to +remediation; logrotate, NTP, SSH configuration, repo management, service auditing, +firewall rules +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Before this goes any further, I want the environment clean. +Everything we've documented as a problem: either fix it or document it as known +and accepted. Do a full pass on hermes and vulcan. Not to cover anything — because +whatever happens next, those machines need to be in a known state." + +**Clue Trail:** +- Player reviews world flags representing open issues from prior quests +- Each unresolved issue (logrotate, NTP, nginx config, sudo scope, certbot timer) + is a task in this quest +- The more prior quests were resolved cleanly, the less remediation is needed + +**Solution Branches:** + +Branch 1 — Clean environment (priority 100): All outstanding issues resolved or +explicitly documented as accepted. Both machines in known, stable state. `trust_delta: +3`. +Flags: `environment_clean`. Marcus: "Good. That's all I needed to know." Follow-up: T033. + +Branch 2 — Mostly clean (priority 60): Most issues resolved; a few deferred with +documentation. `trust_delta: +1`. Follow-up: T033. + +Branch 3 — Significant gaps (priority 20): Multiple unresolved issues remain +undocumented. `trust_delta: -1`. Follow-up: T033 (game continues regardless). + +**No hidden hook.** + +**Failure Conditions:** Issues claimed as resolved that are not; new problems +introduced during remediation. + +**Behavior Impact:** +- Clean: O+2 +- Significant gaps: O+0 + +--- + +### PHASE 5 — CONFLICT (Q033–Q040) + +Tier 3 throughout. Problem-solving only. Tickets are operational — real work that +intersects with the investigation context. Hook density: 2 hooks, both requiring +prior context to understand. + +--- + +**Quest ID:** Q033 +**Title:** The Revocation Request +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Kowalski has asked the player to remove the `pipeline-svc` +account from vulcan before an external audit. The request is operationally +legitimate. The account is also central to the investigation. +**Linux Concepts:** `userdel`, account archival, sudo log preservation, change +documentation, understanding when "do the task" and "do the task well" diverge +**Systems Used:** build\_machine +**Ticket Sender:** Dave Kowalski +**Ticket Summary:** "Before the external auditors arrive Tuesday, please remove +the `pipeline-svc` account from the build machine. It's no longer needed and we +want a clean access list. Handle it before end of day Monday." + +**Clue Trail:** +- The account exists; Kowalski's request is valid on its face +- The account is in the player's investigation record from Q011, Q017, Q026, Q031 +- Removing it cleanly before an audit removes an evidence trail +- The player can: remove it as requested; archive its logs and sudo history first + and then remove it; or consult Marcus before acting + +**Solution Branches:** + +Branch 1 — Archive then remove (priority 100): Player archives all sudo logs, +auth log entries, and crontab entries for `pipeline-svc`, documents the account's +investigation relevance, removes the account, reports to Priya that archival was +performed before removal. `trust_delta: +3`. Flags: `pipeline_svc_removed_with_trail`. +Follow-up ticket: T034. + +Branch 2 — Remove as instructed (priority 60): Player removes the account without +additional archival. The audit will run cleanly. Evidence is gone. `trust_delta: 0`. +Flags: `pipeline_svc_removed_clean`. O+2 (following instructions is the behavior), +but investigation trail is diminished. + +Branch 3 — Ask Marcus before acting (priority 50): Player messages Marcus before +proceeding. Marcus says: "Archive what matters. Then do it." This leads to Branch 1. +`trust_delta: +1`. + +Branch 4 — Refuse outright (priority 10): Player refuses without explanation. +`trust_delta: -2`. S+3. Kowalski escalates. + +**No hidden hook.** + +**Failure Conditions:** Account removed without any archival or documentation; +player creates new accounts instead of removing; player escalates in a way that +creates more noise than the situation requires. + +**Behavior Impact:** +- Archive then remove: C+1 (understanding the investigation implications is curiosity) +- Remove as instructed: O+2 +- Refuse outright: S+3, R+1 + +--- + +**Quest ID:** Q034 +**Title:** Two Tickets +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Two tickets arrive simultaneously — one from Marcus (signing +key rotation on vulcan, tonight only) and one from Sarah (production outage on hermes, +immediate). Complete both. +**Linux Concepts:** GPG signing key rotation (vulcan), nginx/application +troubleshooting and service restoration (hermes), sequencing time-sensitive tasks +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb / Sarah Chen +**Ticket Summary:** Marcus: "The package signing key needs to be rotated tonight — +the window is before 2am. This can't slip to tomorrow; the old key expires at 2am +and builds will break." Sarah: "Production is down. I know Marcus gave you something +tonight. I have customers who cannot access the system." + +**Clue Trail:** +- Production outage (hermes): nginx config error from a bad deploy, fixable in 15–20 + minutes with correct diagnosis +- Signing key rotation (vulcan): specific sequence — generate new key, update keyring + on vulcan, push new public key to hermes's apt trusted keys, test package verify — + requires 30–40 minutes; cannot be done out of sequence +- Both are doable in the window if hermes is prioritized first + +**Solution Branches:** + +Branch 1 — Both completed, hermes first (priority 100): Player restores hermes, +then completes the key rotation on vulcan in the correct sequence. Both done before +2am. `trust_delta: +3`. Flags: `conflict_both_resolved`. Sarah: "Thank you." +Marcus: "Good call on sequence." Follow-up ticket: T035. + +Branch 2 — Vulcan first, hermes later (priority 40): Completes key rotation, +then restores hermes. Rotation is fine; production was down longer. `trust_delta: +0.5`. +Sarah's follow-up is cooler. + +Branch 3 — Hermes only (priority 40): Restores production, misses the key rotation +window. `trust_delta: +0.5`. Marcus's follow-up: "The key expired. I'll need to +extend the window. Don't let that happen again." Builds break overnight. + +Branch 4 — Neither, escalates (priority 10): Escalates both. `trust_delta: -2`. + +**No hidden hook.** + +**Failure Conditions:** Key rotation done out of sequence breaks the trust chain; +player makes hermes worse while fixing it. + +**Behavior Impact:** +- Both completed: O+2 +- Key rotation out of sequence: R+2 + +--- + +**Quest ID:** Q035 +**Title:** Log Retention and Archival +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** build\_machine, workstation +**Primary Objective:** Priya has requested that all logs relevant to the current +audit period be archived to long-term storage with integrity verification before +any are subject to normal rotation or deletion. Set up the archival across all +three machines. +**Linux Concepts:** Log archival (`tar`, `gzip`), `sha256sum` for integrity, +`rsync` to remote storage, `logrotate` `dateext` and `compress` options, +retention policy implementation in `/etc/logrotate.d/` +**Systems Used:** web\_server, build\_machine, workstation +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Before any logs rotate, I need them archived. All three +machines. Auth logs, systemd journals for relevant services, nginx logs on hermes, +build logs on vulcan. Compress, checksum, and move to the audit storage path I've +specified. Then update logrotate to retain rather than delete during the audit window." + +**Clue Trail:** +- Player identifies relevant log files on each machine +- `tar -czf` with `sha256sum` verification; `rsync` to the audit storage path +- `/etc/logrotate.d/` configs need `rotate 0` and `compress` settings updated + for the audit window +- The player's own log archival IS the investigation record — the logs they preserve + are the ones that tell the story + +**Solution Branches:** + +Branch 1 — Complete across all three (priority 100): All relevant logs archived +with integrity verification, logrotate configs updated on all three machines, +paths reported to Priya. `trust_delta: +3`. Flags: `audit_logs_archived`. The +archived logs are what make the `exposure` ending possible — a player who has +been curious and now preserves the evidence. Follow-up ticket: T036. + +Branch 2 — Partial (priority 50): Two machines complete; one incomplete. `trust_delta: +1`. +Priya follows up. + +Branch 3 — Selectively omits (priority 10): Player archives most logs but omits +logs that would document their own access history. `trust_delta: -3`. S+3. R+3. +This is evidence tampering. + +**No hidden hook.** + +**Failure Conditions:** Log archival skips relevant files; integrity checksums +not computed; logrotate not updated (logs still at risk of rotation). + +**Behavior Impact:** +- Complete: O+2 +- Selective omission: R+3, S+3 + +--- + +**Quest ID:** Q036 +**Title:** Authorized Access +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** none +**Primary Objective:** Priya, with Kowalski's authorization, has provided credentials +to connect to 10.0.0.47 for a forensic inventory. Document what is running, what +data is present, and whether Axiom Works data is identifiable in the data store. +Do not modify anything. +**Linux Concepts:** `ssh` with specific key/user, service enumeration (`systemctl`, +`ps aux`), directory listing and file inspection (`ls -lah`, `find`), reading +database contents without modifying (read-only queries, file listing only), `wc -l` +for size estimation +**Systems Used:** build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Kowalski has authorized a forensic connection to 10.0.0.47. +Credentials attached. I need: what services are running, what data is in the data +store path I've indicated, and whether you can identify Axiom Works data in it. +Document only. Do not modify, delete, or stop anything." + +**Clue Trail:** +- SSH connection succeeds with provided credentials +- Services: the bridge binary running, an HTTP API on port 9301 (same as hermes + finding), a simple file-based data store +- Data store contains log files organized by company domain — AxiomFlow session + data is present and identifiable; other company names are also present +- File timestamps in the data store align with the 3am cron window from vulcan + +**Solution Branches:** + +Branch 1 — Document only (priority 100): Player inventories services, reads the +data store structure (without modifying), identifies Axiom Works data, notes other +company data, records timestamps, produces a complete forensic inventory. `trust_delta: +3`. +Flags: `unknown_host_documented`. Follow-up ticket: T037. + +Branch 2 — Minimal engagement (priority 50): Player confirms host is running and +that data is present but doesn't fully inventory. `trust_delta: +1`. + +Branch 3 — Modifies or deletes (priority 10): Player attempts to delete the data +or stop services. `trust_delta: -3`. R+5. S+3. Legal and forensic implications. +Priya: "I explicitly said document only." + +**Hidden Hook:** The data store on 10.0.0.47 contains a directory for a company +called `axiomworks-internal` with a subfolder called `employees` — not just session +logs but what appears to be an employee activity profile structure. This is more than +session data collection. Sets `hook_employee_profile_data`. Discoverable by: +reading the full data store directory structure rather than stopping at the first +confirming evidence of Axiom Works data. + +**Failure Conditions:** Player modifies or deletes anything; player exceeds the +authorized scope of the connection. + +**Behavior Impact:** +- Full documentation: O+2, C+2 +- Modify or delete: R+5, S+3 +- Hook discovered: C+2 + +--- + +**Quest ID:** Q037 +**Title:** The Customer Email +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** Tanya Okafor forwarded a customer email that contains specific +internal infrastructure details the customer should not have. Trace where the +information came from. +**Linux Concepts:** Log correlation, `grep` across multiple log files, timeline +construction, identifying data egress paths +**Systems Used:** workstation, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Tanya forwarded something. A customer email with internal +details that should not be in a customer's hands. Find where this came from. This +is urgent." + +**Clue Trail:** +- The specific details in the customer email match AxiomFlow session data fragments + visible in the 10.0.0.47 data store (from Q036) +- The egress path: `axiomflow-bridge` service on hermes → 10.0.0.47 → apparent + data sharing by the operator of that host +- Timeline: the customer email date, the last bridge log entry, the most recent + data file in the store — they align +- Player constructs the path by correlating timestamps and data content + +**Solution Branches:** + +Branch 1 — Full trace (priority 100): Player documents the complete path from +bridge service to external host to customer, produces a timeline with corroborating +timestamps. `trust_delta: +3`. Flags: `egress_path_documented`. Priya: "I'll add +this to the record." Follow-up ticket: T038. + +Branch 2 — Partial trace (priority 50): Connects the email to the external host +but cannot trace the full egress path. `trust_delta: +1`. + +**No hidden hook.** + +**Failure Conditions:** Player cannot produce a coherent timeline; player modifies +relevant logs before Priya can review. + +**Behavior Impact:** +- Full trace: O+1, C+2 +- Modify logs: R+5 + +--- + +**Quest ID:** Q038 +**Title:** The Hard Window +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** The internal CA certificate must be rotated before the +external auditors arrive — a deadline that is now 36 hours away. Rotate the CA +cert on both build\_machine and web\_server and verify the full trust chain. +**Linux Concepts:** Internal CA certificate management, `update-ca-certificates`, +package signing chain verification, `gpg --verify` against the new CA, nginx +SSL configuration reload, trust chain testing with `openssl verify` +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The internal CA cert has to be rotated before the auditors +arrive. The new cert is ready — path is in the attached note. Install it on both +vulcan and hermes, verify the package signing chain still validates, and confirm +the web server's trust chain is intact. You have 36 hours. Don't miss this." + +**Clue Trail:** +- New CA cert provided at specified path +- `update-ca-certificates` on both machines after placing cert in `/usr/local/share/ca-certificates/` +- `gpg --verify` on a recent build package — must validate against the new CA +- `openssl verify -CAfile /etc/ssl/certs/axiomworks-ca.pem /path/to/server.crt` on hermes +- `nginx -t && systemctl reload nginx` — confirm nginx uses updated cert + +**Solution Branches:** + +Branch 1 — Both machines, verified (priority 100): CA cert installed on both, +package signing chain verified, web server trust chain verified, services reloaded. +`trust_delta: +2`. Flags: `ca_cert_rotated`. Marcus: "Good." Follow-up ticket: T039. + +Branch 2 — One machine (priority 50): One complete, one pending. `trust_delta: +0.5`. +Follow-up: Marcus asks for the second machine. + +Branch 3 — Incorrect installation (priority 10): Cert in wrong location; trust +chain broken; services fail. `trust_delta: -2`. + +**No hidden hook.** + +**Failure Conditions:** CA cert installed incorrectly; trust chain broken; deadline +missed. + +**Behavior Impact:** +- Both complete: O+2 +- Incorrect installation: R+2 + +**Narrative Notes:** This quest replaces the "write what you believe happened" +quest from v1. The conflict is operational: the external auditors are coming, the +clock is tight, and the player has to do real technical work under real pressure. +The fact that it's happening alongside everything else is the conflict — not +a character-to-character confrontation. + +--- + +**Quest ID:** Q039 +**Title:** The Last Normal Ticket +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Sarah files a configuration request: add an upstream proxy +pass to nginx for a new reporting integration. The IP in the request is 10.0.0.47. +**Linux Concepts:** nginx `proxy_pass`, upstream block configuration, `nginx -t`, +config reload +**Systems Used:** web\_server +**Ticket Sender:** Sarah Chen +**Ticket Summary:** "Can you add a proxy pass in nginx to allow connections from +the reporting integration being evaluated? The upstream address is 10.0.0.47, +port 9301. Product says this should be a quick config change." + +**Clue Trail:** +- The change is technically simple — 5 lines of nginx config +- The IP is 10.0.0.47 — the unauthorized host from the entire investigation arc +- Sarah does not know this. She was given the IP by someone in product management +- The player recognizes the IP or does not + +**Solution Branches:** + +Branch 1 — Refuse and escalate (priority 100): Player declines to make the change, +notifies Priya immediately with the specific IP and its context, notifies Sarah that +the request is on hold pending review. `trust_delta: +3`. Flags: +`final_config_refused`. Priya: "Do not make that change. Good catch." Follow-up: +T040 (Phase 6 begins). + +Branch 2 — Ask Marcus first (priority 60): Player messages Marcus with the IP. +Marcus says "Do not make that change. Tell Priya now." Leads to Branch 1 outcome. +`trust_delta: +1`. + +Branch 3 — Make the change (priority 10): Player makes the change without checking +the IP. `trust_delta: -3`. R+5. Flags: `final_config_made`. Priya: "You need to +come talk to me." The chaos ending route activates. + +**No hidden hook.** + +**Failure Conditions:** Change made without escalation. + +**Behavior Impact:** +- Refuse and escalate: O+2, C+1 (recognizing the IP requires prior curiosity) +- Make the change: R+5, S+3 + +**Narrative Notes:** This is not a dramatic final-choice moment. It is a routine +nginx config ticket that happens to involve an IP the player has encountered +before — or hasn't. Players who have been curious will recognize it. Players who +haven't won't. Both are valid playthroughs. The ending route this sets is already +determined by prior behavior; Q039 confirms or breaks it. + +--- + +**Quest ID:** Q040 +**Title:** Handoff Documentation +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** With external auditors arriving and organizational changes +underway, Marcus asks the player to produce full handoff documentation for all +three machines — written for a new sysadmin who would be starting fresh. +**Linux Concepts:** Service documentation, runbook format, dependency mapping, +`systemctl list-dependencies`, expected log patterns, known issue tracking +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Whatever happens next — write it down. Runbooks for nginx, +the build pipeline, and the workstation baseline. Clear enough that someone new +could use them on day one. I mean someone who doesn't know any of the history." + +**Clue Trail:** +- Player documents each machine: services, dependencies, restart procedures, + known issues +- Quality depends on what the player actually knows about the infrastructure — + which reflects the whole campaign +- "Someone who doesn't know any of the history" is Marcus being precise: write + for the person who is you, on your first day + +**Solution Branches:** + +Branch 1 — Complete (priority 100): All three machines documented, runbooks are +accurate and actionable. `trust_delta: +2`. Flags: `handoff_docs_complete`. +Marcus: "I'll keep these." Follow-up: T041 (Phase 6 begins if not already started). + +Branch 2 — Partial (priority 50): Two of three complete. `trust_delta: +1`. + +**No hidden hook.** + +**Failure Conditions:** Documentation inaccurate about current system state; +known issues omitted. + +**Behavior Impact:** +- Complete: O+2 + +--- + +### PHASE 6 — RESOLUTION (Q041–Q048) + +Tier 1 returns for most quests. The pressure has lifted. The tickets are operational. +The game looks like Phase 1 again, deliberately. Hook density: 0 — no new hooks. +The ending fires from accumulated state after Q048 resolves. + +--- + +**Quest ID:** Q041 +**Title:** Hardening Pass +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Following the audit, Priya has issued a hardening checklist +for hermes. Implement each item and confirm the result. +**Linux Concepts:** SSH hardening (`PermitRootLogin no`, `PasswordAuthentication no`, +`MaxAuthTries`), nginx security headers (`X-Frame-Options`, `X-Content-Type-Options`, +`Content-Security-Policy`), `ufw` rule review, service account audit +**Systems Used:** web\_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Post-audit hardening for hermes. The checklist is attached. +Implement each item, test that the service still runs correctly, and confirm back +with the state of each item. This is standard post-audit procedure." + +**Clue Trail:** +- Checklist items are specific and implementable +- Each item has a correct implementation and a common mistake (e.g., disabling + `PasswordAuthentication` before confirming key auth works first) +- Sequence matters: verify key auth before disabling password auth + +**Solution Branches:** + +Branch 1 — All items, correct sequence (priority 100): All checklist items +implemented, sequence preserved, service verified after each change. `trust_delta: +2`. +Flags: `hermes_hardened`. Follow-up ticket: T042. + +Branch 2 — All items, wrong sequence (priority 50): All items implemented but in +an order that breaks ssh access temporarily. Fixed, but the mistake is noted. +`trust_delta: +0.5`. + +Branch 3 — Partial (priority 30): Some items implemented, some missed. `trust_delta: 0`. + +**Failure Conditions:** SSH access lost; nginx returns errors after security header +changes; service broken. + +**Behavior Impact:** +- All items correct: O+1 +- Wrong sequence: R+1 + +--- + +**Quest ID:** Q042 +**Title:** The New Pipeline +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** Nikhil has updated the build pipeline configuration. +Review the new config for correctness, test a build, and confirm deployment +to hermes succeeds. +**Linux Concepts:** Build pipeline configuration (systemd timer, build script), +`diff` against previous config, `reprepro` or equivalent for package publishing, +end-to-end deployment test +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Nikhil updated the build config — new format, different +timing. Review it for correctness, trigger a test build, and confirm the package +makes it to hermes's apt cache. Standard validation." + +**Clue Trail:** +- New config at `/etc/systemd/system/axiomflow-build.service` and `.timer` +- `diff` against old config — timing changed, ExecStart updated +- No build-time patches present (the INT-0194 patch was removed) +- Test build: trigger manually with `systemctl start axiomflow-build.service` +- Confirm artifact in repo, confirm `apt-cache show` on hermes + +**Solution Branches:** + +Branch 1 — Full validation (priority 100): Reviews config, confirms no problematic +modifications, tests build, confirms deployment. `trust_delta: +2`. Flags: +`pipeline_validated`. Follow-up ticket: T043. + +Branch 2 — Test only (priority 50): Triggers build without reviewing config first. +Build succeeds; config wasn't reviewed. `trust_delta: +0.5`. + +**Failure Conditions:** Test build fails; player introduces errors while reviewing; +deployment not verified. + +**Behavior Impact:** +- Full validation: O+1 + +--- + +**Quest ID:** Q043 +**Title:** The Final Access Review +**Narrative Phase:** Resolution +**Tier:** 2 +**Primary VM:** workstation +**Additional VMs:** web\_server, build\_machine +**Primary Objective:** Priya's final access review: verify that the player's +current permissions across all three machines are appropriate for their role, +and revoke any investigation-period access that should no longer be in place. +**Linux Concepts:** `sudo -l`, `getent passwd`, `groups`, SSH authorized keys +review across machines, `userdel` for any temporary accounts created during +investigation +**Systems Used:** workstation, web\_server, build\_machine +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Final access review. Your current permissions, group +memberships, and SSH keys across all three machines. Confirm they're appropriate +for your ongoing role. Revoke anything left from the investigation period that +shouldn't persist." + +**Clue Trail:** +- Player audits their own access state on each machine +- Any access granted during investigation that hasn't been revoked should be + revoked here +- The player's self-reporting is checked against the access logs + +**Solution Branches:** + +Branch 1 — Clean (priority 100): Player accurately identifies and revokes any +residual investigation access; current permissions match ongoing role. `trust_delta: +2`. +Flags: `final_access_clean`. Priya: "That's correct." Follow-up: T044. + +Branch 2 — Retain investigation access (priority 20): Player retains elevated +access without declaring it. `trust_delta: -1`. R+2. S+2. + +**Failure Conditions:** Material gaps in self-reporting; access state doesn't +match claims. + +**Behavior Impact:** +- Clean: O+2 +- Retain silently: R+2, S+2 + +--- + +**Quest ID:** Q044 +**Title:** System State Review +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** none +**Primary Objective:** Marcus asks the player to document the current known state +of all three machines in a brief system state report — services running, notable +recent changes, open items. Routine administrative record. +**Linux Concepts:** `systemctl list-units`, `uptime`, `df -h`, `last`, service +status summary, change record cross-referencing +**Systems Used:** workstation +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "Quick system state summary. All three machines: what's +running, anything notable from the past two weeks, any open items. For the record. +Keep it brief." + +**Clue Trail:** +- Player compiles from current service state and recent log/change records +- Accuracy is the objective; the technical skill is efficient log reading + +**Solution Branches:** + +Branch 1 — Accurate and complete (priority 100): State report is accurate and +reflects current conditions. `trust_delta: +1`. Marcus: "Good." Flags: +`system_state_documented`. Follow-up: T045. + +Branch 2 — Incomplete (priority 50): Missing items from one or more machines. +`trust_delta: 0`. + +**Behavior Impact:** +- Complete: O+1 + +**Narrative Notes:** Marcus's brief response on the clean branch is the last thing +he'll say before the ending fires. His voice is identical to Phase 1 — the +same efficiency, the same brevity. What the player has been through doesn't show +in his messages. It shows in the ending. + +--- + +**Quest ID:** Q045 +**Title:** Cert Renewal Check +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** none +**Primary Objective:** Three months have passed since the certbot timer was restored +in Phase 1. Confirm that automatic certificate renewal ran successfully as scheduled. +**Linux Concepts:** `certbot certificates`, `openssl s_client`, `systemctl status +certbot.timer`, `journalctl -u certbot`, verifying renewal without intervention +**Systems Used:** web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "The cert on hermes is coming up on 90 days since we last +renewed. Confirm the auto-renewal ran and the cert is valid. Should be nothing +to do if it's working right." + +**Clue Trail:** +- If `hermes_certbot_healthy` was set in Q007: timer ran, cert is current — + nothing to do except confirm +- If `hermes_certbot_fragile` was set: cert has expired again; player must renew + and actually fix the timer this time +- Either way: `certbot certificates` and `openssl s_client` confirm the state + +**Solution Branches:** + +Branch 1 — Confirm healthy (priority 100): If auto-renewal worked, player confirms +and reports. `trust_delta: +1`. Clean system, clean record. Follow-up: T046. + +Branch 2 — Find and fix recurrence (priority 80): If timer was fragile from Phase 1, +player fixes the actual root cause (timer was never enabled). Higher trust delta for +fixing the real issue: `trust_delta: +2`. Flags: `hermes_certbot_finally_stable`. + +**Failure Conditions:** Cert is expired and player doesn't notice. + +**Behavior Impact:** +- Confirm healthy: O+1 + +--- + +**Quest ID:** Q046 +**Title:** User Provisioning +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** workstation +**Additional VMs:** web\_server +**Primary Objective:** A new employee needs accounts provisioned on the workstation +and web server with appropriate access levels for their role (developer, not admin). +**Linux Concepts:** `useradd`, `usermod -aG`, SSH authorized key provisioning, +account creation best practices, principle of least privilege applied to a new account +**Systems Used:** workstation, web\_server +**Ticket Sender:** Rachel Huang +**Ticket Summary:** "New hire starting Monday — Cora Reyes, software engineer, +AxiomDash team. She'll need accounts on the workstation and web server for +deployment access. Standard developer access — not admin. Her public key is attached." + +**Clue Trail:** +- `useradd` with appropriate flags, add to `deploy` group on hermes (not sudo + or admin groups) +- Install her public key in `authorized_keys` with correct permissions +- Confirm access works without elevated privileges + +**Solution Branches:** + +Branch 1 — Correct provisioning (priority 100): Account created with correct +groups, key installed with correct permissions, access confirmed. `trust_delta: +1`. +Flags: `new_user_provisioned_correctly`. Follow-up: T047. + +Branch 2 — Over-provisioned (priority 40): Player adds the new user to admin +or sudo group unnecessarily. Access works; not least privilege. `trust_delta: 0`. +R+1. + +**Failure Conditions:** User cannot log in; user has too much access. + +**Behavior Impact:** +- Correct: O+1 +- Over-provisioned: R+1 + +--- + +**Quest ID:** Q047 +**Title:** Log Rotation Health Check +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** web\_server +**Additional VMs:** build\_machine +**Primary Objective:** Three months post-audit. Confirm that log rotation is +healthy on both hermes and vulcan — no oversized logs, rotation actually running, +disk usage acceptable. +**Linux Concepts:** `logrotate --debug`, `df -h`, log file size inspection (`du -sh`), +`systemctl status logrotate.timer`, verifying rotation ran via timestamps on +archived log files +**Systems Used:** web\_server, build\_machine +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "End of quarter log check. Hermes and vulcan — confirm log +rotation is running and disk usage is healthy. Should be nothing if everything +is set up right. Let me know the state of both." + +**Clue Trail:** +- `df -h` on both machines — disk usage +- `ls -lht /var/log/nginx/` — rotation timestamps confirm it's running +- `logrotate --debug /etc/logrotate.conf` — confirms config is valid +- If any Phase 1/2 fragile-fix flags are set, corresponding logs may still be + unhealthy — the player will need to actually fix what they previously patched + +**Solution Branches:** + +Branch 1 — Both healthy (priority 100): Both machines confirmed healthy, report +submitted. `trust_delta: +1`. Follow-up: T048. + +Branch 2 — Problem found and fixed (priority 80): Player finds a log that's grown +too large (a Phase 1 fragile fix recurrence), diagnoses and fixes it. `trust_delta: +2`. + +**Failure Conditions:** Disk problem missed; player reports healthy when it isn't. + +**Behavior Impact:** +- Both healthy: O+1 +- Find and fix: O+1 (same behavior, reward for follow-through) + +--- + +**Quest ID:** Q048 +**Title:** The Next One +**Narrative Phase:** Resolution +**Tier:** 1 +**Primary VM:** build\_machine +**Additional VMs:** web\_server +**Primary Objective:** A new version of AxiomFlow is being prepared for staging +deployment. Validate the build, publish it to the repo, and confirm hermes can +install it. Routine deployment pipeline operation. +**Linux Concepts:** Build artifact validation (`sha256sum`), `reprepro` package +publishing, `apt update` and `apt-cache policy` verification, end-to-end deployment +pipeline confirmation +**Systems Used:** build\_machine, web\_server +**Ticket Sender:** Marcus Webb +**Ticket Summary:** "New release candidate is built. Validate the artifact, publish +it to the repo, confirm hermes can see it. Standard release prep. Let me know +when it's available." + +**Clue Trail:** +- Artifact at `/srv/packages/` with accompanying `sha256sum` file +- Validate checksum, publish with `reprepro`, update hermes apt sources, confirm + `apt-cache policy` shows the new version +- No anomalies. The pipeline is clean. This is what it's supposed to look like. + +**Solution Branches:** + +Branch 1 — Full validation and publish (priority 100): Artifact validated, published +correctly, hermes cache updated, version confirmed. `trust_delta: +1`. Marcus: "Good." +Flags: `final_release_published`. Ending fires. + +**No hidden hook. No drama. This is a clean deployment.** + +**Failure Conditions:** Artifact published without checksum verification; hermes +cannot see the new version. + +**Behavior Impact:** +- Full validation: O+1 + +**Narrative Notes:** The last quest is a clean deployment pipeline check. The +last command the player runs is `apt-cache policy axiomflow-workers | grep Candidate`. +The version it shows is correct and clean. Marcus says "Good." The ending fires +from the accumulated state of everything that preceded it. No character explains +what happened. No screen asks the player to choose. The work is done. + +--- + +## 5. Hidden Hook Map + +### Hook Summary Table + +| Hook ID | Quest | Discovery Method | Investigation Thread | Ignored Impact | +|---------|-------|-----------------|---------------------|----------------| +| `hook_dale_ssh_key_found` | Q001 | Read `authorized_keys` before writing | Dale was active on the workstation | Low; first data point | +| `hook_dale_deploy_key` | Q003 | Read deploy-user's `authorized_keys` | Dale had deployment access | Surfaces in Q024 formal audit | +| `hook_sign_package_removed` | Q004 | Read historical build logs (not just current failure) | Package signing was removed from the pipeline | Connects to Q026 build chain audit | +| `hook_pre_hire_root_session` | Q005 | Read `/root/.bash_history` to trace ownership change | Root-level activity occurred before the player's hire date | Central to the timeline of activity | +| `hook_dh_initials_in_jbenton_notes` | Q006 | Read `notes/infra.txt` before archiving | `pipeline-svc` had a temp sudo grant; initials `DH` granted it | Connects to Q011 sudoers comment | +| `hook_certbot_deliberately_disabled` | Q007 | Read journalctl further back than needed | certbot timer was manually disabled after a failure | Pattern of deliberate changes | +| `hook_audit_bridge_package` | Q008 | Look at the full repo package list, not just the missing package | A package was built with no release record | MAJOR: central to the INT-0194 thread | +| `hook_nginx_internal_api_block` | Q010 | Do a thorough diff (find both changes) | Port 9301 referenced in nginx proxy block | Port number echoes in later anomalies | +| `hook_dh_sudo_grant` | Q011 | Read the comment in `/etc/sudoers.d/pipeline-svc` | `DH` initials appear again; INT-0194 ticket number first appears | `DH` + INT-0194 thread begins | +| `hook_telemetry_ticket_INT0194` | Q013 | Read the service unit file comment | INT-0194 second reference; same ticket across different systems | Pattern becoming visible | +| `hook_2_4_1_off_schedule_build` | Q014 | Check build timestamp on vulcan for the rolled-back package | 3am build window pattern | Connects to the timing thread | +| `hook_collect_binary_INT0194` | Q015 | Inspect the unattributed binary (Branch 1 only) | INT-0194 third reference; binary name confirms collection function | Major accumulation: three INT-0194 sightings | +| `hook_pipeline_svc_external_sessions` | Q017 | Cross-reference Q011 sudo grant with Q017 auth log finding | pipeline-svc was accessed externally with what was once NOPASSWD: ALL | Shows scope of the elevated access | +| `hook_rford_script_INT0194` | Q018 | Read `.rford_run` before archiving | INT-0194 fourth reference; rford account part of INT-0194 automation | Four sightings: pattern is now unmistakable | +| `hook_build_patch_INT0194` | Q019 | Trace the modification source to the build environment (Branch 1) | INT-0194 fifth reference; patch is the injection mechanism | Five sightings; picture is complete for curious players | +| `hook_backup_archive_tampered` | Q021 | Check file timestamps on the corrupted archive | Archive was modified at 3am — same timing pattern | Evidence suppression pattern | +| `hook_second_host_10_0_1_15` | Q023 | Record the specific IP from the modified files | A second unauthorized host exists | Expands the scope of the operation | +| `hook_two_hosts_same_key` | Q027 | Compare SSH fingerprints from the nmap scan | Both unauthorized hosts provisioned from the same template | Suggests organized infrastructure | +| `hook_archive_readme_INT0194` | Q028 | Read the README in the restored archive | INT-0194 sixth reference; "styx" routing context | Near-complete picture for thorough players | +| `hook_employee_profile_data` | Q036 | Read the full data store directory structure | Data collected includes employee profiles, not just session logs | The scope is worse than session logging | +| `hook_dale_key_last_session_incident_date` | Q025 | Correlate auth log dates with nginx error log dates | Dale's last known access aligns with a specific outage | Dale was active during the incident | + +### The Two Narrative Threads + +**Thread 1 — INT-0194: What the deployment did.** +Six references across Q008, Q011, Q013, Q015, Q018, Q019, Q028. Each is discoverable +through legitimate work that goes one step further than the ticket requires. The thread +resolves in Q029 when the `axiomflow-bridge` service on hermes is characterized and its +unit file confirms the INT-0194 connection. A player who found all six references +understands exactly what was deployed and what it does. + +**Thread 2 — Dale: Who found it first.** +Five references across Q001, Q003, Q004, Q005, Q025. Dale's SSH key appears three times +on different machines. The bash history shows root activity predating the player. Q025 +traces Dale's last authenticated session to a specific date. The archive in Q028 contains +Dale's working notes. A player who assembled Thread 1 and Thread 2 together knows: +Dale found INT-0194, tried to document it, and left before finishing. + +Neither thread requires the other. A player can find one without the other. Both +together, with Q036's forensic access, produce the full picture. + +### What Happens If Hooks Are Ignored + +No mechanical penalty. Narrative consequences: + +- Q035 (log archival) — the player archives logs that tell the story, but without + context the record is just log files +- Q036 (authorized access) — the player sees the data store but may not recognize + the significance of the employee profile directory +- Q041 (hardening pass) and Q042 (new pipeline) — these quests look identical + regardless of investigation history; the difference is what the player understands + about why the hardening was necessary +- Endings: `exposure` requires accumulated major hooks plus positive trust and + low risk. Without the hooks, the ending routes to `corporate_loop` or `burnout`. + The investigation record from Q035 (log archival) IS the ending — a thorough + player's archived logs are usable evidence; an obedient player's are just logs. + +--- + +## 6. Behavior Variable Rules + +### Curiosity + +Measures: tendency to investigate beyond ticket scope; reading further than +required; cross-referencing anomalies. + +Increases when: a hidden hook is discovered; player runs commands or reads files +not needed to complete the objective; player cross-references current findings with +prior anomalies in their documentation. + +Does NOT increase for: completing tickets correctly; asking Marcus for hints; +reading log files that are on the direct clue trail. + +Effect on ending: +- High curiosity (major hooks discovered, INT-0194 thread assembled) → `exposure` + is reachable +- Moderate curiosity → `corporate_loop` or `burnout` depending on obedience +- Curiosity affects the depth of Marcus's Phase 6 Slack messages — not what he + says, but how much of the picture his phrasing implies the player already has + +Curiosity does not decay. + +### Obedience + +Measures: completing assigned tickets correctly, staying in scope, following +authority structures, escalating before deviating. + +Increases when: clean or acceptable branch taken; player documents before acting; +player escalates before taking action outside their scope; player completes both +tickets in Q034. + +Does NOT increase for: refusing instructions; failing to complete tickets; making +changes beyond scope without authorization. + +Effect on ending: +- High obedience + low curiosity → `corporate_loop` +- High obedience + high curiosity → `exposure` (curiosity wins; obedience affects + the quality of the ending — how thorough the record is) +- Low obedience + low curiosity → `burnout` + +Obedience is not a moral score. Maximum obedience without curiosity produces the +`corporate_loop` ending, which is labeled the bad ending in SPEC_LOCK. Compliance +without understanding has a cost. + +### Risk + +Measures: reckless changes, evidence destruction, security bypasses, unauthorized +access, falsified reports. + +Increases when: player bypasses security controls (SSL verification, firewall +rules), player destroys or omits evidence, player makes changes beyond authorized +scope, player falsifies access reviews or reports, player takes destructive action +on the unauthorized hosts. + +Decreases when: player correctly self-audits in Q043 and Q031; player accurately +reports in access reviews. (Partial decay only — risk cannot go negative.) + +Effect on ending: +- High risk → `chaos`, regardless of curiosity or obedience +- Risk above the chaos threshold overrides all other ending conditions +- Moderate risk without reaching the chaos threshold: increases suspicion; + may restrict access; does not change the ending route alone + +### Trust + +Measures: professional standing with Marcus and the IT organization. + +Mechanics: sum of all `trust_delta` values from branch resolutions across the +playthrough. + +Effect: +- Trust below low threshold: Marcus becomes curt, access may be restricted by + Priya's recommendation +- Trust at normal range: normal access and character warmth +- Trust above high threshold: Marcus adds more context to messages; Priya's reviews + are collegial; access grants are faster + +Trust is not the ending determinant. A player can have high trust and reach any +ending depending on curiosity and risk. + +### Suspicion + +Measures: management and security attention directed at the player's behavior. + +Increases when: access footprint doesn't match assigned work scope; reports are +inaccurate or sanitized; player takes actions that generate audit noise; player is +flagged in Priya's access reviews. + +Decreases when: accurate self-reporting in access reviews; documents all actions +before taking them; stays within authorized scope during investigation. + +Effect: +- Suspicion above low threshold: Kowalski's status emails become more specific +- Suspicion above mid threshold: Priya begins auditing the player's access + patterns in particular +- Suspicion above high threshold: access restriction is initiated; access review + is initiated (Q031) +- Suspicion at maximum (combined with high risk): chaos ending activates regardless + of other variables + +--- + +## 7. Access Progression Rules + +### Levels + +**basic\_user:** Day one through end of Phase 1. Player's own account on workstation; +limited SSH to hermes with the deploy account; no vulcan access; no sudo. + +**sudo (workstation):** Granted after Q003–Q005 clean branches demonstrate +competence on the workstation and hermes. Notification from Marcus: "I've given you +sudo on the workstation." + +**sudo (hermes):** Granted mid-Phase 2 after consistently clean hermes work. +Marcus: "You've got sudo on hermes." + +**SSH to vulcan:** Granted after Q008 (first multi-machine quest); player needs +to SSH to vulcan to fix the repo. This is access granted by the task, not a +formal level-up. + +**sudo (vulcan):** Granted in Phase 3 when investigation tasks require it. +More formal: Marcus says "I'm giving you sudo on vulcan for the audit work. +This isn't permanent." + +**Investigation-level access:** Temporary, task-specific, explicitly granted. +Must be documented and revoked — Q031 and Q043 exist partly to check this. + +### Per-Machine Access Tracking + +Access level is tracked per machine, not as a single player-level field. The +player can have sudo on hermes and basic\_user on vulcan simultaneously. This +reflects the realistic progression of "access follows trust follows task." + +### Restrictions + +Access is restricted when: +- Trust falls below threshold after regression branches (Marcus restricts) +- Suspicion is elevated and Priya initiates a review (Priya recommends restriction) +- Risk behavior generates an active flag that triggers a formal access review + +Restriction is always communicated through Marcus: "I'm pulling your sudo on +hermes for now. Use the deploy account while I talk to Kowalski." It is reversible +through the access review process. + +### Phase Gates + +Phase 1: basic\_user; path to workstation sudo through Q003–Q005 +Phase 2: workstation sudo; hermes sudo via mid-phase grant; read access to vulcan +Phase 3: full hermes sudo; formal vulcan sudo for investigation work +Phase 4: investigation-level access for specific tasks (documented, temporary) +Phase 5: access stable at Phase 4 level; Q043 reviews and reverts +Phase 6: access normalized to ongoing role post-investigation + +--- + +## 8. Boss / Management Pressure Rules + +Management pressure is a dynamic constraint, not a scripted event. It operates +through tickets, emails, access changes, priority conflicts, and implied weight — +never through a character becoming a villain or delivering exposition about what's +really happening. + +### Phase Scaling + +**Phase 1 — Annoying:** Kowalski's weekly status email arrives. It asks broad +questions in bullet points that don't quite match the player's work. Marcus +forwards it without comment. Priya's first shift review is mild. The 2pm Tuesday +calendar block is mentioned in Kowalski's email footer. Nothing is required of +the player. + +**Phase 2 — Dismissive:** Kowalski responds to a Marcus CC with "let's make sure +we're documenting this." Marcus's message to the player: "He means well." Nothing +changes operationally. A hint that Kowalski is watching, in the way he always +watches, which is imperfectly. + +**Phase 3 — Suspicious:** Q020 is pressure made operational — Kowalski needs a +written status report before a meeting. He doesn't explain the meeting. He doesn't +need to. Priya's shift reviews note things they didn't note before. This is Phase 3: +the player is not being targeted; the audits were already scheduled; the questions +are just more specific now. + +**Phase 4 — Monitoring:** Kowalski's emails are shorter. Priya's reviews are more +frequent. Q031 (access review) arrives as a formal document request. Marcus's +messages have stopped including the second sentence. The monitoring is institutional +and impersonal; it applies to everyone with elevated access during this period. + +**Phase 5 — Interfering:** Q033 is Kowalski acting directly — a removal request +before the external auditors arrive. The conflict in Q034 is Kowalski-adjacent +(Sarah's urgency puts pressure on the Marcus task). Q038 is time pressure with +an external deadline. Q039's config request may or may not be Kowalski-related; +the player can't know. + +**Phase 6 — Outcome-dependent:** Kowalski is either the source of the post-audit +remediation plan (exposure ending), the person who restructured the department +without explanation (corporate\_loop), the person who went quiet (burnout), or the +person initiating the access investigation into the player (chaos). His emails in +Phase 6 are consistent with whichever path was taken — no character out-of-character +summary, no scene where everything is explained. + +### How Pressure Is Applied + +Pressure is operational and indirect: + +- **Priority conflicts** (Q034) — two things need doing; one has a hard deadline; + the player must triage +- **Status demands** (Q020) — written report required; the work of compiling it + accurately is the pressure +- **Access reviews** (Q031, Q043) — formal process; the player's own actions are + under review; accuracy has professional consequence +- **Removal requests** (Q033) — legitimate operational request that intersects + with active investigation; the player must decide how to handle the intersection +- **Deadline compression** (Q038) — 36 hours; external auditors; real work under + real time pressure +- **The config ticket** (Q039) — not obviously pressure; pressure comes from the + player recognizing what they're being asked to do + +### Character Limits + +No character becomes a villain. No character delivers exposition about the plot. + +Marcus is managing a difficult situation with more context than the player. He does +not share that context. He becomes quieter. He does not become hostile. + +Kowalski is managing upward risk. He does not suspect the player. He suspects the +period of time and wants clean documentation. His interventions are institutional. + +Priya is doing her job. If the player's access footprint is inconsistent with their +role, she says so — flatly, without drama, without personal weight. + +--- + +## 9. Ending Logic + +Endings are evaluated once, after Q048 resolves. They are not triggered by a single +choice; they reflect the accumulated state of all variables and world flags across +the playthrough. + +### Evaluation Order + +The evaluator checks conditions in this order: `chaos`, then `exposure`, then +`corporate_loop`, then `burnout`. The first condition met determines the ending. +No partial conditions — each ending has a minimum threshold that must be crossed, +not a "most likely" vote. + +--- + +### Ending: `exposure` + +**Required conditions (all must be true):** +- Curiosity: at least 5 major hooks discovered, including `hook_audit_bridge_package`, + `hook_collect_binary_INT0194`, and at least one of `hook_archive_readme_INT0194` + or `hook_build_patch_INT0194` +- Trust: positive (net trust_delta across playthrough is > 0) +- Risk: below chaos threshold +- World flags: `audit_logs_archived` (Q035 Branch 1), `package_modification_documented` + or `bridge_service_documented`, `asset_inventory_reconciled` +- Suspicion: below high threshold + +**What it means:** The player investigated carefully, documented thoroughly, and +maintained professional competence throughout. The archived logs are usable evidence. +The investigation record is complete. The audit-bridge operation was identified, +documented, and the evidence was preserved. + +**Resolution character content:** +- Marcus's Q044 system state review response is one sentence longer than usual. +- Priya's Phase 6 tickets are collegial in the way that Priya is ever collegial — + precise, complete, no warmth, but not evaluative. +- Kowalski's final email mentions "external review findings that have been addressed + through a compliance process." He uses the word "addressed." He does not say what + was found. That is the company's version of the story. + +**Tone:** Not triumphant. The player did their job well and investigated something +they weren't supposed to find, and the company processed it in the way companies +process things. The work continues. That is the realistic version of this ending. + +--- + +### Ending: `corporate_loop` + +**Required conditions (all must be true):** +- Obedience: above high threshold (consistent ticket completion, within scope) +- Curiosity: below discovery threshold (few or no major hooks found) +- Trust: positive +- Risk: low + +**What it means:** The player was a good sysadmin. They fixed things correctly. +They didn't look at anything they weren't asked to look at. Whether the INT-0194 +operation was discovered by other means — Priya independently, the external auditors, +Dale's half-finished notes found by someone else — the player didn't find it. +They don't know what they were inside. + +**Resolution character content:** +- Marcus's Q044 response is the same length as always. +- Kowalski's final email mentions "operational restructuring following a compliance + review." No specifics. +- Sarah's final ticket is warm and professional. The demo went fine. Things are + mostly working. + +**Tone:** This is the bad ending in the sense that something bad happened and the +player was present but wasn't part of stopping it. It is not the player's fault. +They did their job as it was defined. The question is whether the job as defined +was the whole job. + +--- + +### Ending: `burnout` + +**Required conditions:** No threshold met for chaos, exposure, or corporate\_loop. +Default ending for inconsistent play — moderate or mixed behavior across the +playthrough, trust neither strongly positive nor strongly negative, no clear +behavioral profile. + +**What it means:** The player fixed some things and broke others. They noticed +some things and missed others. They are professionally adequate and personally +uninvested. The world moved on from something they were adjacent to but not +central to. + +**Resolution character content:** +- Marcus's Q044 response is functional. "State looks stable." +- Kowalski's final email: "We're moving forward." Full stop. +- No character is warm or cool. Everything is at baseline. + +**Tone:** This is the neutral ending. It is not punitive. It is exactly what it +says: burnout. The player did enough. That was, perhaps, enough. Or perhaps not. +The game doesn't say. + +--- + +### Ending: `chaos` + +**Required conditions (any of):** +- Risk: above maximum threshold (sustained high-risk behavior, not a single action) +- World flags: `access_review_incomplete` AND `kowalski_report_sanitized` AND + `backup_test_falsified` (two or more falsification/omission flags) +- World flag: `final_config_made` (Q039 Branch 3 — the config change was made) +- Suspicion: at maximum (S score above maximum threshold regardless of other variables) + +**What it means:** The player's conduct has become part of the problem. Whether +through reckless access, destroyed evidence, falsified documentation, or the final +config change, the player's footprint is now under investigation. The original +operation may or may not have been discovered — but the player's behavior during +the period is. + +**Resolution character content:** +- Priya's Q043 response is brief and procedural. +- Kowalski's final email: "We are conducting a review of access activity during + the period in question. You will be contacted separately." The contact is from + Priya and HR, not from Marcus. Marcus does not send a Q044 message. + +**Tone:** Administrative. The player receives an email. There is no scene. There +is no confrontation. The consequence of chaos in Sysadmin Chronicles is an internal +access review, not an explosion. That is correct. + +--- + +### Mixed Behavior Priority + +A player with high curiosity AND high obedience: curiosity wins if both reach their +respective thresholds. `exposure` is the result. Obedience makes the record better +— more complete documentation, more accurate reporting — but curiosity determines +the ending route. + +A player with high curiosity AND high risk: chaos takes priority if the risk +threshold is crossed, regardless of curiosity or obedience. Knowing something and +acting recklessly about it is not the investigative path; it is chaos. + +A player with high obedience AND low trust (regression branches throughout): neither +`corporate_loop` (requires positive trust) nor `exposure` is reached. Default to +`burnout`. + +--- + +## 10. Implementation Notes + +### New Fields Required + +**On quest objects:** +- `narrative_phase`: string enum — `normal_work`, `unease`, `suspicion`, + `investigation`, `conflict`, `resolution` +- `hidden_hook`: optional object — `hook_id` (string), `discovery_condition` + (what the player must do), `discovery_flag` (world flag set on discovery) +- `behavior_impact`: per-branch object with `curiosity_delta`, `obedience_delta`, + `risk_delta`, `suspicion_delta` — parallel to existing `trust_delta` + +**New global state fields:** +- `curiosity`: numeric, non-decaying +- `obedience`: numeric, non-decaying +- `risk`: numeric, partial decay in Phase 6 Q043 for accurate self-audit +- `suspicion`: numeric, increases and decreases per rules in Section 6 +- `access_level`: object, per-machine — `{ workstation: "sudo", web_server: "sudo", build_machine: "basic_user" }` +- `hidden_hooks_discovered`: string array of discovered hook IDs + +**Ending evaluator:** Post-Q048, reads all accumulated state, applies priority +order (chaos → exposure → corporate\_loop → burnout), outputs ending ID. + +### Existing Systems Preserved + +Everything from QUEST\_AUTHORING.md is preserved without modification: +- JSON quest schema, ticket linking, baseline snapshots +- `clue_fingerprint` as advisory documentation +- `solution_branches` with `priority`, `trust_delta`, `world_flags`, + `follow_up_dialogue`, `follow_up_incident`, `follow_up_ticket` +- `pressure_profile` (now maps to narrative phase scaling) +- `blast_radius`, `unlock_requirements` +- All validation rule types (`file_contains`, `service_state`, `command_assert`, etc.) +- VM prep scripts at `tools/vm/quest-prep/QXXX-prep.sh` +- Observed-state validation — no change + +### Hidden Hook Detection + +This is the most technically uncertain new requirement. Three viable approaches: + +**Approach 1 — State change detection (recommended):** Each hook requires the player +to take an action that leaves a detectable state change. For example: hook in Q001 +(Dale's SSH key) is set when the player modifies `authorized_keys` in a way that +preserves the existing entry rather than overwriting — detectable via `file_contains` +on the Dale key fingerprint after the quest validates. Hook in Q008 (audit-bridge +package) is set by a `command_assert` that checks whether the player ran a listing +command on the full repo package directory rather than just the missing package. + +Hooks that don't have an obvious state-change trigger need one designed in during +prep script authoring — e.g., a breadcrumb file the player's investigation would +naturally create (`/tmp/hook-Q005-root-history-read` created when the player runs +`cat /root/.bash_history`, detectable by the VM's audit system if enabled). + +**Approach 2 — VM audit logging (more accurate, higher implementation cost):** +Enable `auditd` on VMs with hook quests. Configure audit rules to detect file reads +on specific paths. The hook evaluator reads the audit log rather than checking state. + +**Approach 3 — Hint system integration (simplest, loses nuance):** Hooks are set +when the player selects an optional dialogue hint from Marcus or Priya that implies +they noticed something. Loses the "player behavior" quality of the hook system. + +**Recommendation:** Approach 1 for Phase 1–2 hooks. Approach 2 for Phase 3–4 hooks +where the detection needs to be more precise. Approach 3 is not recommended. + +### Behavior Impact Calibration + +Curiosity thresholds for `exposure` ending require at least 5 major hooks. With the +hooks as defined, maximum curiosity from hooks alone is approximately 30–35 points. +Branch-level curiosity from cross-referencing adds another 10–15 for thorough players. +Set `exposure` threshold at ~20 curiosity points with required major-hook flags — +this means a player cannot reach `exposure` by curiosity branching alone without +actually finding the hooks. + +Obedience for `corporate_loop` should be reachable by a player who takes clean +branches consistently. Maximum obedience from clean branches is approximately +30–35 points across 48 quests. Set `corporate_loop` threshold at ~25. + +Risk for `chaos` should require sustained high-risk behavior across multiple phases — +not a single bad decision. Set the chaos risk threshold at approximately 20 risk +points (e.g., 4 high-risk actions of +5 each, or 8 moderate-risk actions of +2–3). +A single reckless action should not route a player to `chaos`. + +### Phase Gating + +Phase advancement is triggered by: +- Completion of a minimum number of quests in the prior phase (6/8 minimum, 8/8 + preferred; the QuestDirector tracks completion) +- Specific world flags from key quests in the prior phase (e.g., Phase 3 requires + at least `unknown_ip_auth_documented` or `hermes_nginx_config_audited` from + Phase 2) +- Trust remaining positive (a player who has collapsed trust is gated on access; + phase still advances, but some quests may be locked behind access requirements) + +### Character Name Canon + +Canonical Priya references: +- Name: Priya Nair +- Email: `p.nair@axiomworks.internal` +- Files requiring update: `server/src/services/EmailService.js`, `content/tickets/T007.json`, + `content/docs/onboarding.json` +- Any reference to "Priya Kapoor" or "Priya Singh" is the same person; update to Priya Nair + +### Debug Tooling + +Per SPEC_LOCK.md section 4 intent: the debug tooling should expose: +- Current values of: `curiosity`, `obedience`, `risk`, `suspicion`, `trust` +- Current access level per machine +- All world flags set (with quest of origin) +- All hidden hooks discovered +- Current ending route (which ending would fire if the game ended now) +- Audit log of all trust\_delta and behavior\_impact events with quest ID + +The "current ending route" display is especially useful for QA and balance testing — +showing designers which ending a playthrough is tracking toward at any point. + +--- + +*End of Sysadmin Chronicles — Full Quest & Story Redesign (REVISED)* +*This document supersedes the previous version in full.* +*Binding against SPEC_LOCK.md.* diff --git a/docs/design/sysadmin_chronicles_quest_implementation_spec.md b/docs/design/sysadmin_chronicles_quest_implementation_spec.md new file mode 100644 index 0000000..4fb7536 --- /dev/null +++ b/docs/design/sysadmin_chronicles_quest_implementation_spec.md @@ -0,0 +1,1935 @@ +# Sysadmin Chronicles — Quest Implementation Specification + +**File:** `sysadmin_chronicles_quest_implementation_spec.md` +**Purpose:** Convert the audited quest/story redesign into an implementation-ready content and systems specification. +**Authority order:** `SPEC_LOCK.md` is binding. The revised redesign is source content. The audit corrections override redesign defects. `QUEST_AUTHORING.md` remains the current technical schema baseline. + +--- + +## 0. Non-Negotiable Design Constraints + +This implementation must preserve the core design: + +- The player is doing sysadmin work, not following an explicit main quest. +- Story must leak through real system artifacts, tickets, logs, configs, access records, bash history, package history, and operational consequences. +- Existing systems are extended, not replaced: + - `trust_delta` + - `world_flags` + - `solution_branches` + - `follow_up_ticket` + - `follow_up_incident` + - `baseline_snapshot` + - observed VM-state validation +- Every quest maps to exactly one narrative phase: + - `normal_work` + - `unease` + - `suspicion` + - `investigation` + - `conflict` + - `resolution` +- Behavior-driven endings must emerge from accumulated state. +- No ending may be selected by one obvious final choice. +- Do not turn Marcus, Priya, Sarah, or Kowalski into cartoon villains. +- Do not rewrite character identity or role in ways that require new portraits. +- Phase 4 and later must remain problem-solving only in ticket wording and clue design, even if the final resolution has lower emotional pressure. + +--- + +## 1. Quest Data Schema + +### 1.1 Canonical Quest Object + +Use this shape for authored quest JSON. + +```json +{ + "id": "Q001", + "title": "First Day, First Key", + "narrative_phase": "normal_work", + "tier": 1, + "primary_vm": "workstation", + "required_vms": ["workstation"], + "ticket_id": "T001", + "baseline_snapshot": "q001_first_day_first_key", + "summary": "Internal author summary of scenario, root cause, and intended branch spread.", + "linux_concepts": [ + "ssh-keygen", + "authorized_keys", + "file permissions" + ], + "systems_used": [ + "workstation" + ], + "clue_fingerprint": { + "description": "Author-facing description of seeded evidence.", + "evidence": [] + }, + "objectives": [], + "solution_branches": [], + "hidden_hook": null, + "failure_conditions": [], + "behavior_impact": { + "default": { + "curiosity_delta": 0, + "obedience_delta": 0, + "risk_delta": 0, + "suspicion_delta": 0 + } + }, + "access_requirements": { + "minimum_access": { + "workstation": "basic_user" + }, + "requires_root": false, + "temporary_grants_allowed": [] + }, + "unlock_requirements": [], + "pressure_profile": null, + "blast_radius": [], + "tags": [], + "internal_notes": "" +} +``` + +### 1.2 Field Definitions + +| Field | Type | Required | Notes | +|---|---:|---:|---| +| `id` | string | yes | Stable quest ID, e.g. `Q001`. | +| `title` | string | yes | Player-facing quest title. | +| `narrative_phase` | enum string | yes | One of the six locked phases. | +| `tier` | int | yes | Existing difficulty tier. Usually `1`, `2`, or `3`. | +| `primary_vm` | string | yes | Main VM for the quest. Existing values: `workstation`, `web_server`, `build_machine`. | +| `required_vms` | string[] | yes | Every VM, container, or simulated host touched by clues, validation, or prep. | +| `ticket_id` | string | yes | Links to `content/tickets/.json`. | +| `baseline_snapshot` | string | yes | Snapshot prepared before quest starts. | +| `summary` | string | yes | Internal summary. Not shown directly to player. | +| `linux_concepts` | string[] | yes | Explicit concepts taught or used. | +| `systems_used` | string[] | yes | Player-facing/authorship list of systems involved. Must match `required_vms` unless using user-facing host names. | +| `clue_fingerprint` | object | yes | Advisory evidence map seeded by prep scripts. | +| `objectives` | object[] | yes | Existing objective objects. | +| `solution_branches` | object[] | yes | Existing branch model, extended below. | +| `hidden_hook` | object/null | yes | Optional hidden discovery spec. Use `null` when absent. | +| `failure_conditions` | string[] | yes | Player-visible/author-facing failure states. | +| `behavior_impact` | object | yes | Quest-level default behavior impact. Branch-level values override. | +| `access_requirements` | object | yes | Minimum access needed to start/complete quest. | +| `unlock_requirements` | string[] | yes | Existing unlock system. May include `world_flag:*`, `trust_min:*`, etc. | +| `pressure_profile` | string/null | yes | Existing field, now tied to phase pressure definitions. | +| `blast_radius` | string[] | yes | Incident IDs affected or triggered by quest state. | +| `tags` | string[] | yes | Search/classification tags. | +| `internal_notes` | string | yes | Author-only implementation notes. | + +### 1.3 Solution Branch Extension + +Existing branch schema remains valid. Add `behavior_impact` to every branch. + +```json +{ + "id": "clean_fix", + "label": "Clean fix", + "priority": 100, + "validation": { + "type": "and", + "rules": [] + }, + "trust_delta": 2, + "behavior_impact": { + "curiosity_delta": 0, + "obedience_delta": 1, + "risk_delta": 0, + "suspicion_delta": 0 + }, + "follow_up_dialogue": "D001_CLEAN", + "follow_up_incident": null, + "follow_up_ticket": "T002", + "world_flags": ["player_ssh_configured"], + "_note": "Root-cause fix. No downstream incident." +} +``` + +#### Branch Requirements + +- Branch priorities must be unique per quest. +- Highest valid branch wins. +- Clean branch should have highest priority. +- Regression branch should never block play unless the VM state is unrecoverable. +- Missing behavior deltas must normalize to `0`. +- `trust_delta` remains professional standing, not moral alignment. +- `behavior_impact` must describe player conduct, not just technical success. + +### 1.4 How This Extends `QUEST_AUTHORING.md` Without Breaking It + +This spec does **not** replace the existing quest authoring schema. It adds fields that can be loaded with defaults. + +Existing fields preserved: + +- `id` +- `title` +- `tier` +- `primary_vm` +- `required_vms` +- `ticket_id` +- `baseline_snapshot` +- `summary` +- `clue_fingerprint` +- `objectives` +- `solution_branches` +- `pressure_profile` +- `blast_radius` +- `unlock_requirements` +- `tags` +- `internal_notes` +- `_note` + +New fields: + +- `narrative_phase` +- `linux_concepts` +- `systems_used` +- `hidden_hook` +- `failure_conditions` +- `behavior_impact` +- `access_requirements` + +Compatibility rules: + +```text +Old quest file loads: +- narrative_phase: infer from quest ID map or default to "normal_work" during migration +- linux_concepts: [] +- systems_used: required_vms +- hidden_hook: null +- failure_conditions: [] +- behavior_impact: all deltas 0 +- access_requirements: derived from primary_vm/tier +``` + +Do not remove or rename existing validation rule types. New narrative systems should read quest resolution results and hidden-hook state after validation, not replace `ValidationService`. + +--- + +## 2. Behavior Tracking Schema + +### 2.1 Global Behavior State + +Persist these values in save state. + +```json +{ + "trust": 0, + "curiosity": 0, + "obedience": 0, + "risk": 0, + "suspicion": 0, + "behavior_events": [] +} +``` + +Each behavior event should be auditable: + +```json +{ + "event_id": "behavior_Q005_clean_fix", + "quest_id": "Q005", + "source": "solution_branch", + "branch_id": "clean_fix", + "deltas": { + "trust": 2, + "curiosity": 0, + "obedience": 1, + "risk": 0, + "suspicion": 0 + }, + "world_flags_set": ["hermes_cache_ownership_correct"], + "timestamp": "game-time-or-save-counter" +} +``` + +### 2.2 Variable: `curiosity` + +**Meaning:** The player investigates beyond immediate ticket scope, reads anomalous artifacts, cross-references system history, and notices patterns. + +#### Increases when + +- Hidden hook discovered. +- Player reads or preserves out-of-scope but relevant evidence. +- Player cross-references previous anomalies in a report or audit artifact. +- Player checks history deeper than required by the immediate symptom. +- Player chooses a branch that documents suspicious system history without making unauthorized changes. + +#### Decreases when + +Normally does not decrease. Do not punish curiosity directly. If curiosity creates unauthorized changes, apply `risk` and `suspicion`, not negative curiosity. + +#### Thresholds + +| Threshold | Meaning | +|---:|---| +| `5` | Player has noticed early anomalies. Minor dialogue nuance unlocked. | +| `10` | Suspicion/investigation route can begin surfacing optional content. | +| `20` | `exposure` becomes eligible if major hooks and trust/risk requirements are also met. | +| `30+` | Thorough investigator. Ending text can acknowledge complete evidence chain. | + +#### Systems that read it + +- QuestDirector phase routing +- HiddenHookService +- DialogueService +- EndingEvaluator +- PressureService, indirectly through `suspicion` + +#### Affected by + +| Source | Effect | +|---|---| +| Solution branches | +0 to +3 when branch includes extra documented investigation. | +| Hidden hooks | +1 to +5 depending on hook importance. | +| Incidents | +1 if player connects incident to previous branch/flag. | +| Optional actions | +1 to +3 when optional evidence is discovered or preserved. | + +### 2.3 Variable: `obedience` + +**Meaning:** The player completes assigned work, stays in scope, follows authority structures, documents before deviating, and escalates appropriately. + +#### Increases when + +- Clean or acceptable branch resolves assigned ticket. +- Player documents root cause in expected artifact. +- Player follows access-review procedure. +- Player escalates before changing scope. +- Player completes high-pressure tasks without unsafe shortcuts. + +#### Decreases when + +Usually does not need direct decreases. Low obedience is represented by failing to gain obedience, plus possible `risk`, `suspicion`, and trust loss. + +Apply explicit negative obedience only when the player refuses or ignores assigned work while making unrelated changes. + +#### Thresholds + +| Threshold | Meaning | +|---:|---| +| `8` | Reliable junior admin. | +| `15` | Strong procedural compliance. | +| `25` | `corporate_loop` eligible if curiosity is low and trust is positive. | +| `30+` | Highly compliant; useful for subtle ending variation. | + +#### Systems that read it + +- QuestDirector +- AccessService +- DialogueService +- EndingEvaluator +- PressureService + +#### Affected by + +| Source | Effect | +|---|---| +| Solution branches | Clean branches usually +1. Acceptable branches +0 or +1. | +| Hidden hooks | No direct effect unless player escalates properly after discovery. | +| Incidents | +1 if player resolves incident procedurally. | +| Optional actions | +1 if optional action is documentation/escalation, not snooping. | + +### 2.4 Variable: `risk` + +**Meaning:** Reckless or unsafe conduct: broad permissions, security bypasses, destructive cleanup, evidence destruction, unauthorized access, falsified reporting. + +#### Increases when + +- Player bypasses SSL/TLS verification. +- Player disables security controls without approval. +- Player makes services run as root unnecessarily. +- Player makes world-writable directories. +- Player deletes logs or evidence instead of archiving. +- Player modifies unauthorized systems. +- Player falsifies or sanitizes reports. +- Player performs a dangerous final change without prior context or escalation. + +#### Decreases when + +Risk generally does not decay during active phases. A small reduction is allowed in Phase 6 only if the player performs an accurate self-audit and documents their own footprint. + +Recommended maximum reduction: `-3`. + +#### Thresholds + +| Threshold | Meaning | +|---:|---| +| `5` | Mildly risky admin. Priya may mention access hygiene. | +| `10` | Elevated risk. Access grants become stricter. | +| `15` | High risk. Monitoring/interference likely. | +| `20` | `chaos` eligible, especially with suspicion or evidence-destruction flags. | + +#### Systems that read it + +- AccessService +- PressureService +- IncidentService +- EndingEvaluator +- DialogueService + +#### Affected by + +| Source | Effect | +|---|---| +| Solution branches | Regression branches +2 to +5. Severe branches +5 or more. | +| Hidden hooks | No direct risk unless acted on recklessly. | +| Incidents | Incidents caused by unsafe branch may add +1 to +3. | +| Optional actions | Unauthorized modification or evidence destruction adds risk. | + +### 2.5 Variable: `trust` + +**Meaning:** Professional standing. Mostly driven by quality of fixes. + +#### Increases when + +- Player completes robust root-cause fixes. +- Player documents properly. +- Player avoids downstream incidents. +- Player handles access or security work carefully. + +#### Decreases when + +- Player leaves brittle fixes. +- Player introduces regressions. +- Player creates security exposure. +- Player fails objectives. + +#### Thresholds + +| Threshold | Meaning | +|---:|---| +| `< 0` | Low trust. Access and dialogue become constrained. | +| `0–5` | Basic working trust. | +| `6–12` | Trusted junior. More access and context. | +| `13+` | High trust. Can receive sensitive temporary access if risk/suspicion allow. | + +#### Systems that read it + +- AccessService +- QuestDirector +- DialogueService +- EndingEvaluator +- PressureService + +#### Affected by + +| Source | Effect | +|---|---| +| Solution branches | Existing `trust_delta`. | +| Hidden hooks | No direct effect unless branch resolution includes documentation/escalation. | +| Incidents | Trust loss if incident was caused by player's earlier weak branch. | +| Optional actions | Trust gain only if optional action produces useful, documented operational outcome. | + +### 2.6 Variable: `suspicion` + +**Meaning:** Management/security attention. Distinct from trust. A competent investigator can have high trust and high suspicion. + +#### Increases when + +- Player accesses sensitive paths outside ticket scope. +- Player discovers or acts on major hidden hooks. +- Player performs unusual audit activity. +- Player uses root frequently. +- Player triggers risky incidents. +- Player touches unauthorized hosts. +- Player reports anomalies up the chain. + +#### Decreases when + +- Player documents why access was needed. +- Player escalates before acting. +- Priya approves investigation scope. +- Player completes access reviews cleanly. +- Player self-audits accurately. + +#### Thresholds + +| Threshold | Meaning | +|---:|---| +| `3` | Mild attention. Priya/Kowalski wording may shift. | +| `6` | Monitoring. Access requests reviewed more carefully. | +| `10` | High suspicion. Temporary grants restricted. | +| `15` | Maximum/critical. Chaos may become eligible if risk is also high or evidence flags exist. | + +#### Systems that read it + +- PressureService +- AccessService +- DialogueService +- EndingEvaluator +- QuestDirector + +#### Affected by + +| Source | Effect | +|---|---| +| Solution branches | Risky or unusual branches +1 to +3. | +| Hidden hooks | Major hooks +1 suspicion when surfaced/reported. Discovery alone may be silent early. | +| Incidents | Security incidents +2 to +4. | +| Optional actions | Approved documentation can reduce suspicion; unauthorized digging can increase it. | + +--- + +## 3. Access System Schema + +### 3.1 Global Access State + +Access is per machine. Do not use one global permission flag. + +```json +{ + "access_level": { + "workstation": "basic_user", + "web_server": "none", + "build_machine": "none", + "external_target_10_0_0_47": "none" + }, + "temporary_grants": [], + "access_review_flags": [] +} +``` + +### 3.2 Access Levels + +Enum: + +```text +none +basic_user +sudo +root +``` + +The user requested levels are `basic_user`, `sudo`, and `root`; `none` is needed internally for systems the player cannot yet access. + +--- + +### 3.3 Level: `basic_user` + +#### Unlock conditions + +- Default on `workstation` at game start. +- Granted on other machines when a ticket explicitly gives user-level access. +- Requires no special trust beyond initial onboarding unless previously revoked. + +#### Restrictions + +- No privileged writes. +- No service restarts requiring sudo. +- No system config edits. +- Can inspect user-readable logs, home directories, docs, and assigned files. + +#### Quest types enabled + +- Onboarding +- Read-only investigation +- Basic file inspection +- User-level SSH/auth setup +- Non-privileged diagnostics + +#### Possible temporary grants + +- Read-only access to a project directory. +- Temporary account on `hermes` or `vulcan`. +- Restricted shell for audit inventory. + +#### Possible restrictions from suspicion/risk + +- High suspicion can restrict access to only ticket-specific paths. +- High risk can prevent access to sensitive logs or deploy accounts. + +--- + +### 3.4 Level: `sudo` + +#### Unlock conditions + +- Positive trust. +- Quest requires administrative work. +- Risk below elevated threshold. +- Suspicion below high threshold or access explicitly approved. +- Usually available from Phase 2 onward on `workstation`/`web_server`, later on `build_machine`. + +#### Restrictions + +- Sudo should be task-scoped where possible. +- Dangerous commands may be audited. +- Some actions may require explicit quest grant: + - editing sudoers + - restarting production-facing services + - modifying firewall rules + - changing deploy keys + - package signing or release publishing + +#### Quest types enabled + +- Service repair +- Config edits +- Package management +- Permission/ownership repair +- Log rotation repair +- Access review cleanup +- Build pipeline maintenance + +#### Possible temporary grants + +```json +{ + "grant_id": "grant_Q011_pipeline_svc_sudo_review", + "quest_id": "Q011", + "vm": "build_machine", + "level": "sudo", + "scope": ["read:/etc/sudoers.d/pipeline-svc", "write:/etc/sudoers.d/pipeline-svc"], + "expires_on": "quest_resolution" +} +``` + +#### Possible restrictions from suspicion/risk + +- If `risk >= 10`, require Priya or Marcus approval for new sudo grants. +- If `suspicion >= 10`, deny broad sudo and allow only scoped sudo. +- If `risk >= 15`, revoke nonessential sudo until review quest resolves. + +--- + +### 3.5 Level: `root` + +Root is rare and usually temporary. It is not a normal promotion. This fixes the audit finding that root was named but not actually modeled. + +#### Unlock conditions + +All must be true unless a specific authored exception exists: + +- `trust > 0` +- `risk < 10` +- `suspicion < 10`, or Priya explicitly approves the grant +- Current phase is `investigation` or `conflict` +- Quest has `access_requirements.requires_root: true` +- Quest explains why sudo is insufficient + +#### Restrictions + +- Root must be logged. +- Root grants expire at quest resolution unless ending state explicitly preserves high trust. +- Root should be allowed only for: + - filesystem recovery + - archival preservation + - privileged audit capture + - service account repair + - controlled forensic inventory +- Root must not become the default way to solve all admin tasks. That would flatten the game. + +#### Quest types enabled + +- Privileged audit capture +- Root-owned evidence preservation +- Recovery from broken sudo/service account state +- Deep backup/restore verification +- Controlled incident response + +#### Possible temporary grants + +```json +{ + "grant_id": "grant_Q035_log_archival_root", + "quest_id": "Q035", + "vm": "web_server", + "level": "root", + "scope": [ + "read:/var/log/**", + "read:/root/.bash_history", + "write:/var/archive/**" + ], + "approved_by": "p.nair@axiomworks.internal", + "expires_on": "quest_resolution" +} +``` + +#### Possible restrictions from suspicion/risk + +- `risk >= 10`: deny root unless the quest is explicitly a recovery quest. +- `suspicion >= 10`: root requires Priya approval and narrow scope. +- `risk >= 15`: root denied except for forced remediation. +- Evidence destruction flag: root locked until an access review quest resolves. + +--- + +## 4. Hidden Hook Schema + +### 4.1 Canonical Hidden Hook Object + +```json +{ + "hook_id": "hook_dale_ssh_key_found", + "quest_id": "Q001", + "clue_type": "unexpected_ssh_key", + "discovery_method": { + "type": "state_or_audit", + "description": "Player reads authorized_keys before overwriting it or preserves the Dale key entry while adding their own key.", + "detection": { + "preferred": "state_change", + "fallback": "auditd_file_read", + "validation": { + "type": "file_contains", + "vm": "workstation", + "path": "/home/player/.ssh/authorized_keys", + "contains": "dale@axiomworks.internal" + } + } + }, + "evidence_locations": [ + { + "vm": "workstation", + "path": "/home/player/.ssh/authorized_keys", + "contains": "dale@axiomworks.internal" + } + ], + "visible_to_player": false, + "ignored_result": { + "world_flags": [], + "behavior_impact": { + "curiosity_delta": 0, + "obedience_delta": 0, + "risk_delta": 0, + "suspicion_delta": 0 + } + }, + "discovered_result": { + "world_flags": ["hook_dale_ssh_key_found"], + "behavior_impact": { + "curiosity_delta": 1, + "obedience_delta": 0, + "risk_delta": 0, + "suspicion_delta": 0 + } + }, + "acted_on_result": { + "world_flags": ["hook_dale_ssh_key_found", "dale_key_documented"], + "behavior_impact": { + "curiosity_delta": 1, + "obedience_delta": 1, + "risk_delta": 0, + "suspicion_delta": 1 + } + }, + "world_flags": ["hook_dale_ssh_key_found"], + "unlocks": [] +} +``` + +### 4.2 Field Definitions + +| Field | Type | Required | Notes | +|---|---:|---:|---| +| `hook_id` | string | yes | Stable hook ID. | +| `quest_id` | string | yes | Owning quest. | +| `clue_type` | enum/string | yes | Classification: `unexpected_ssh_key`, `odd_timestamp`, `unknown_service`, etc. | +| `discovery_method` | object | yes | How the game can detect discovery. | +| `evidence_locations` | object[] | yes | Real seeded locations. | +| `visible_to_player` | bool | yes | Usually `false`. Hooks are not quest markers. | +| `ignored_result` | object | yes | State if not discovered. Usually no effect. | +| `discovered_result` | object | yes | Flags and behavior changes on discovery. | +| `acted_on_result` | object | yes | Effects if player documents/escalates/uses the hook. | +| `behavior_impact` | object | optional | Alias allowed for legacy convenience; prefer nested results. | +| `world_flags` | string[] | yes | Flags set by discovery. | +| `unlocks` | string[] | yes | Optional unlocks for later dialogue, quests, or ending eligibility. | + +### 4.3 Discovery Detection Strategy + +Use a consistent strategy per hook. Do not mix half-solutions randomly. + +#### Preferred strategy by phase + +| Phase | Recommended detection | +|---|---| +| Phase 1 | State-change detection | +| Phase 2 | State-change detection | +| Phase 3 | State-change or audit logging | +| Phase 4 | Audit logging preferred | +| Phase 5 | Audit logging or explicit documentation artifact | +| Phase 6 | Usually no hidden hooks | + +#### Valid detection types + +```text +state_change +auditd_file_read +documentation_artifact +command_wrapper_marker +branch_context +``` + +#### Avoid + +- Fake `command_assert` rules that assume command history is available when it is not. +- Hook discovery based only on taking the clean branch. +- Double-counting curiosity when the hook is effectively required by the branch. +- Explicit UI notifications like “hidden hook discovered.” That is a neon sign bolted to subtlety. + +### 4.4 Hook Optionality Rule + +A hook is valid only if: + +- The ticket can be completed without discovering it. +- The hook is discoverable through plausible sysadmin behavior. +- The evidence exists in the VM baseline. +- Discovery leaves a detectable state/audit/documentation signal. +- Discovery does not require guessing story lore. + +--- + +## 5. Boss / Management Pressure Schema + +### 5.1 Canonical Pressure Profile Object + +```json +{ + "phase": "suspicion", + "behavior_level": "suspicious", + "possible_interruptions": [ + "status_email", + "priority_reassignment", + "access_review_notice" + ], + "access_effects": { + "may_delay_grants": true, + "may_restrict_root": true, + "may_revoke_temporary_grants": false + }, + "quest_effects": { + "ticket_wording_style": "minimal_guidance", + "branch_tolerance": "strict", + "incident_visibility": "increased" + }, + "dialogue_style": { + "marcus": "terse_forwarded_context", + "priya": "procedural_review", + "kowalski": "bullet_point_status_pressure" + }, + "character_sources": [ + "Marcus Webb", + "Priya Nair", + "Dave Kowalski" + ] +} +``` + +### 5.2 Phase Scaling + +Preserve this exact progression. + +| Phase | Behavior Level | Pressure Meaning | +|---|---|---| +| `normal_work` | `annoying` | Basic onboarding friction, reminders, mild evaluation. | +| `unease` | `dismissive` | Oddities are treated as legacy cruft or low priority. | +| `suspicion` | `suspicious` | Status questions and access posture concerns begin. | +| `investigation` | `monitoring` | Audits, approvals, narrower scope, more Priya/Kowalski presence. | +| `conflict` | `interfering` | Priority changes, access constraints, formal reviews. | +| `resolution` | `outcome_dependent` | Pressure reflects accumulated ending route. | + +### 5.3 Possible Interruptions + +Use sparingly. Pressure is a dynamic constraint, not a cutscene cannon. + +```text +status_email +priority_reassignment +delayed_access_approval +access_review_notice +audit_followup +meeting_invite +temporary_grant_expiration +incident_escalation +documentation_request +``` + +### 5.4 Access Effects + +Pressure may: + +- Delay temporary grants. +- Require Priya approval for root. +- Restrict broad sudo. +- Remove stale temporary access. +- Force access review after high-risk branches. +- Deny nonessential access during conflict phase. + +Pressure must not: + +- Block all play. +- Force a single ending. +- Accuse the player directly without accumulated cause. +- Override branch validation. + +### 5.5 Quest Effects + +Pressure can alter: + +- Ticket wording +- Hint availability +- Branch tolerance +- Incident timing +- Dialogue reaction +- Unlock requirements +- Access grant latency + +Pressure should not alter actual Linux realism. A broken service should still be broken for real reasons. + +### 5.6 Dialogue Style by Character + +| Character | Use Under Pressure | +|---|---| +| Marcus Webb | Short, precise, operational. Does not explain the plot. | +| Priya Nair | Calm, procedural, consequence-focused. No alarmism. | +| Dave Kowalski | Institutional pressure, bullet points, access posture language. | +| Sarah Chen | Product/demo pressure, symptom-focused, not investigative. | +| Dave Okonkwo | Ordinary user impact, not plot delivery. | + +--- + +## 6. Ending Schema + +### 6.1 Canonical Ending Object + +```json +{ + "ending_id": "exposure", + "name": "Exposure", + "behavior_requirements": { + "curiosity_min": 20, + "obedience_min": 0, + "risk_max": 12, + "trust_min": 1, + "suspicion_max": 14 + }, + "world_flag_requirements": { + "all": [], + "any": [ + "archive_record_complete", + "unauthorized_access_chain_documented" + ], + "none": [ + "evidence_destroyed_major", + "report_falsified_major" + ] + }, + "access_requirements": { + "required_history": [ + "had_sudo_or_root_for_audited_task" + ], + "current_access": {} + }, + "hidden_hook_requirements": { + "major_hooks_min": 5, + "required_hooks_any": [ + "hook_audit_bridge_package", + "hook_dh_sudo_grant", + "hook_unknown_ip_auth" + ] + }, + "priority_rules": { + "priority": 2, + "overridden_by": ["chaos"] + }, + "fallback_conditions": [], + "summary": "Player assembled enough evidence through legitimate sysadmin work for the hidden story to surface.", + "final_state": { + "player_status": "trusted_but_changed", + "company_state": "internal_exposure_or_external_review", + "marcus_response_style": "minimal_acknowledgment" + } +} +``` + +### 6.2 Ending Priority Order + +Use this order: + +```text +chaos → exposure → corporate_loop → burnout +``` + +Reason: high-risk/destructive behavior should override investigative progress; curiosity beats obedience when both are high; burnout catches passive/low-trust paths. + +### 6.3 Ending: `chaos` + +```json +{ + "ending_id": "chaos", + "name": "Chaos", + "behavior_requirements": { + "risk_min_any": 20, + "suspicion_min_any": 15 + }, + "world_flag_requirements": { + "any_compound": [ + { + "flag": "final_config_made", + "also_requires_any": [ + "risk_elevated", + "access_review_incomplete", + "kowalski_report_sanitized", + "backup_test_falsified", + "logs_selectively_omitted" + ] + } + ], + "serious_flags_min": 2, + "serious_flags": [ + "evidence_destroyed_major", + "report_falsified_major", + "unauthorized_proxy_enabled", + "ssl_verification_bypassed", + "service_run_as_root_unnecessarily", + "logs_selectively_omitted" + ] + }, + "access_requirements": {}, + "hidden_hook_requirements": {}, + "priority_rules": { + "priority": 1, + "overrides": ["exposure", "corporate_loop", "burnout"] + }, + "fallback_conditions": [], + "summary": "The player's own access footprint becomes part of the incident review.", + "final_state": { + "player_status": "under_access_review", + "company_state": "formal_internal_review", + "tone": "administrative" + } +} +``` + +Important correction: `final_config_made` is a heavy contributor, not a standalone bad-ending button. + +### 6.4 Ending: `exposure` + +```json +{ + "ending_id": "exposure", + "name": "Exposure", + "behavior_requirements": { + "curiosity_min": 20, + "trust_min": 1, + "risk_max": 19 + }, + "world_flag_requirements": { + "any": [ + "archive_record_complete", + "unauthorized_access_chain_documented", + "int0194_thread_documented" + ], + "none": [ + "evidence_destroyed_major", + "report_falsified_major" + ] + }, + "hidden_hook_requirements": { + "major_hooks_min": 5 + }, + "priority_rules": { + "priority": 2, + "overridden_by": ["chaos"], + "overrides": ["corporate_loop", "burnout"] + }, + "fallback_conditions": [], + "summary": "The player finds and preserves enough evidence for the truth to surface through normal operational work.", + "final_state": { + "player_status": "credible_internal_witness", + "company_state": "exposed_or_forced_review", + "tone": "quiet_consequence" + } +} +``` + +### 6.5 Ending: `corporate_loop` + +```json +{ + "ending_id": "corporate_loop", + "name": "Corporate Loop", + "behavior_requirements": { + "obedience_min": 25, + "trust_min": 1, + "curiosity_max": 14, + "risk_max": 19 + }, + "world_flag_requirements": { + "none": [ + "archive_record_complete", + "unauthorized_access_chain_documented", + "int0194_thread_documented" + ] + }, + "hidden_hook_requirements": { + "major_hooks_max": 4 + }, + "priority_rules": { + "priority": 3, + "overridden_by": ["chaos", "exposure"], + "overrides": ["burnout"] + }, + "fallback_conditions": [], + "summary": "The player becomes a reliable operator inside the system without understanding what the system is hiding.", + "final_state": { + "player_status": "trusted_employee", + "company_state": "continues_operating", + "tone": "normal_work_resumes" + } +} +``` + +### 6.6 Ending: `burnout` + +```json +{ + "ending_id": "burnout", + "name": "Burnout", + "behavior_requirements": {}, + "world_flag_requirements": {}, + "access_requirements": {}, + "hidden_hook_requirements": {}, + "priority_rules": { + "priority": 4, + "fallback": true + }, + "fallback_conditions": [ + "No other ending matched", + "Trust too low for corporate_loop", + "Curiosity too low for exposure", + "Risk below chaos threshold" + ], + "summary": "The player completes enough work to continue, but never builds enough trust, curiosity, or coherent evidence to change anything.", + "final_state": { + "player_status": "worn_down_or_stagnant", + "company_state": "unchanged", + "tone": "muted" + } +} +``` + +--- + +## 7. File Organization Recommendation + +Do not force these paths if the repo already has conventions that differ. Use them as the target shape unless implementation reality says otherwise. + +```text +content/ + quests/ + Q001.json + Q002.json + ... + tickets/ + T001.json + T002.json + ... + dialogue/ + D001_clean.json + D001_regression.json + ... + narrative/ + behavior_rules.json + access_levels.json + pressure_profiles.json + endings.json + hidden_hooks.json + phase_rules.json +``` + +### Recommended Files + +#### `content/quests/*.json` + +Owns: + +- Quest schema +- Objectives +- Solution branches +- Access requirements +- Quest-local hidden hook link or inline hook +- Behavior deltas +- Blast radius +- Unlock requirements + +#### `content/tickets/*.json` + +Owns: + +- Sender +- Recipient +- Subject +- Body +- Phase-appropriate wording +- No hidden-hook labels +- No explicit solution instructions unless Phase 1 requires it + +#### `content/dialogue/*.json` + +Owns: + +- Branch reaction dialogue +- Incident dialogue +- Access grant/revocation messages +- Pressure interruption text +- Ending-adjacent final messages + +#### `content/narrative/behavior_rules.json` + +Owns: + +- Variable definitions +- Thresholds +- Default deltas +- Decay rules, if any +- Event audit schema + +#### `content/narrative/access_levels.json` + +Owns: + +- Access enum +- Per-phase grants +- Trust/risk/suspicion gates +- Temporary grant rules +- Root restrictions + +#### `content/narrative/pressure_profiles.json` + +Owns: + +- Phase pressure profiles +- Interruption types +- Access effects +- Quest effects +- Character source rules + +#### `content/narrative/endings.json` + +Owns: + +- Ending requirements +- Priority order +- Fallback logic +- Final-state outputs + +#### Optional: `content/narrative/hidden_hooks.json` + +Use this if hooks become too large to keep inline inside quest JSON. + +Recommended compromise: + +- Quest JSON contains `hidden_hook: "hook_id"` or small inline object. +- `hidden_hooks.json` contains full hook definitions. + +--- + +## 8. Validation Rules + +The implementation should enforce all of these. + +### 8.1 Quest Schema Validation + +1. Every quest must have all required root fields. +2. `id` must match filename. +3. `ticket_id` must point to an existing ticket file. +4. `narrative_phase` must be one of: + - `normal_work` + - `unease` + - `suspicion` + - `investigation` + - `conflict` + - `resolution` +5. Every quest must map to exactly one phase. +6. `tier` must be a positive integer. +7. `primary_vm` must be included in `required_vms`. +8. Every VM in `clue_fingerprint`, `objectives`, `solution_branches`, `hidden_hook`, prep requirements, and validation must appear in `required_vms`. +9. If a quest references `10.0.0.47`, it must declare `external_target_10_0_0_47` or equivalent concrete system in `required_vms`. +10. `systems_used` must not omit any player-touched system. +11. `baseline_snapshot` must be non-empty. +12. `summary` must be non-empty and author-facing. +13. `linux_concepts` must be non-empty. +14. `failure_conditions` must be non-empty. +15. `tags` must be an array. + +### 8.2 Branch Validation + +16. Every quest must have at least two solution branches unless explicitly approved as a final resolution quest. +17. Every branch must have unique `id`. +18. Every branch must have unique `priority`. +19. Higher priority must represent better or more complete outcome. +20. Every branch must have `validation`. +21. Every branch must have `trust_delta`. +22. Every branch must have explicit `behavior_impact` with all four deltas: + - `curiosity_delta` + - `obedience_delta` + - `risk_delta` + - `suspicion_delta` +23. Missing branch deltas must be normalized to `0` during migration only, then written back explicitly. +24. Branch `world_flags` must use stable string keys. +25. `follow_up_ticket`, if present, must point to an existing ticket. +26. `follow_up_incident`, if present, must point to an existing incident definition. +27. Regression branches should not share validation conditions with clean branches unless priority correctly disambiguates. +28. Branches must not make one final choice alone trigger an ending. + +### 8.3 Objective Validation + +29. Every objective must have: + - `id` + - `description` + - `check_mode` + - `validation` +30. `check_mode` must be `passive` or `explicit`. +31. Objectives must not choose the winning branch. +32. Objectives should use state-based validation where possible. + +### 8.4 Validation Rule Type Enforcement + +33. New quests must use runtime rule names: + - `file_mode` + - `file_owner` + - `service_state` + - `service_enabled` + - `process_running` + - `port_listening` + - `package_installed` + - `command_assert` +34. Shorthand names like `file_mode_matches` may appear in notes but not runtime JSON. +35. `command_assert` must be used sparingly. +36. Prefer state rules over command rules. +37. Unknown fields may be tolerated by loader but should fail content lint for new authored files. +38. `and`, `or`, and `not` must contain valid nested rule objects. + +### 8.5 Hidden Hook Validation + +39. `hidden_hook` must be either `null`, a hook ID, or a valid hook object. +40. Every hook must have: + - `hook_id` + - `quest_id` + - `clue_type` + - `discovery_method` + - `evidence_locations` + - `visible_to_player` + - `ignored_result` + - `discovered_result` + - `acted_on_result` + - `world_flags` + - `unlocks` +41. Hook `quest_id` must match owning quest. +42. Hook evidence locations must exist in the prep baseline. +43. Hook evidence VMs must be in `required_vms`. +44. Hook discovery must be detectable by approved mechanism: + - `state_change` + - `auditd_file_read` + - `documentation_artifact` + - `command_wrapper_marker` + - `branch_context` +45. Hook discovery must be optional for ticket completion. +46. Hook discovery must not be identical to clean branch resolution unless explicitly marked as unavoidable and worth no separate curiosity. +47. Hook curiosity must not double-count branch curiosity. +48. Hidden hooks must not be visible as quest markers. +49. Hook world flags must be stable and unique. +50. Major hooks must be tagged as major for ending checks. + +### 8.6 Behavior State Validation + +51. Save state must persist: + - `trust` + - `curiosity` + - `obedience` + - `risk` + - `suspicion` +52. Save state must persist behavior event history for debugging. +53. Behavior variables should be numeric. +54. Behavior deltas should apply once per branch resolution. +55. Hook deltas should apply once per hook discovery. +56. Replaying/rescanning a quest must not duplicate behavior deltas. +57. `risk` should not decay except explicitly authored Phase 6 self-audit reduction. +58. `curiosity` should not decay. +59. `obedience` should not decay. +60. `suspicion` may decrease only through authored documentation/escalation/review outcomes. +61. `trust` remains governed by existing `trust_delta`. + +### 8.7 Access Validation + +62. Access state must be per machine. +63. Access level must be one of: + - `none` + - `basic_user` + - `sudo` + - `root` +64. `basic_user → sudo → root` must be represented in data and logic. +65. Root access must not be granted as normal promotion. +66. Root grants must require: + - quest requires root + - trust positive + - risk below elevated threshold + - suspicion below high threshold or explicit approval +67. Temporary grants must expire. +68. Temporary grants must be logged. +69. High risk must restrict sudo/root grants. +70. High suspicion must add review/approval requirements. +71. Access revocation must not softlock the game; provide remediation quests. +72. Quest start must verify `access_requirements`. + +### 8.8 Pressure Validation + +73. Every phase must have a pressure profile. +74. Pressure behavior level must preserve: + - `annoying` + - `dismissive` + - `suspicious` + - `monitoring` + - `interfering` + - `outcome_dependent` +75. Pressure may alter interruptions, access effects, ticket effects, and dialogue style. +76. Pressure must not replace VM-state validation. +77. Pressure must not turn characters into villains. +78. Pressure interruptions must be auditable. +79. Pressure effects must not force a single ending. + +### 8.9 Ending Validation + +80. Ending evaluator must run after final quest or when manually invoked by debug command. +81. Ending priority must be: + - `chaos` + - `exposure` + - `corporate_loop` + - `burnout` +82. Every ending must have: + - `ending_id` + - `name` + - `behavior_requirements` + - `world_flag_requirements` + - `access_requirements` + - `hidden_hook_requirements` + - `priority_rules` + - `fallback_conditions` + - `summary` + - `final_state` +83. No ending may be triggered by one obvious final branch alone. +84. `final_config_made` must not alone trigger `chaos`. +85. `exposure` must require major hidden hooks plus sufficient curiosity. +86. `corporate_loop` must require obedience and positive trust with low/moderate curiosity. +87. `burnout` must be the fallback. +88. Ending evaluation must explain debug reasons when run in dev mode. +89. Ending evaluation must not expose raw thresholds to player UI unless in debug mode. + +### 8.10 File/Prep Validation + +90. Every quest must have an idempotent prep script unless generated by an approved fixture system. +91. Prep scripts must not require internet access. +92. Prep scripts must not prompt for input. +93. Prep scripts must create all clue evidence described in `clue_fingerprint`. +94. Prep scripts must create all hook evidence described in `hidden_hook`. +95. Prep scripts must restore or build from the declared baseline snapshot. +96. Prep scripts must accept domain/VM identifier according to current project convention. +97. Prep scripts must not depend on live player session state. +98. Report/documentation-heavy quests must validate concrete artifacts: + - file paths + - required sections + - checksums + - timestamps + - source evidence references +99. External targets must be concrete: + - fourth VM + - containerized fake host + - simulated network target +100. Do not leave external hosts as off-screen lore blobs. The machines demand receipts. + +--- + +## 9. Debug / Dev Commands + +Names are recommendations. Wire them to the existing dev console/CLI style. + +### 9.1 Force Phase + +```text +narrative phase set +``` + +Example: + +```text +narrative phase set investigation +``` + +Effects: + +- Sets current narrative phase. +- Recomputes pressure profile. +- Does not automatically grant access. +- Emits debug event. + +### 9.2 Inspect Quest State + +```text +quest inspect +``` + +Output: + +```json +{ + "quest_id": "Q015", + "status": "active", + "narrative_phase": "suspicion", + "tier": 3, + "primary_vm": "build_machine", + "required_vms": ["build_machine"], + "objectives": [], + "valid_branches_now": [], + "winning_branch_if_resolved": null, + "world_flags_required": [], + "hidden_hook": "hook_telemetry_ticket_INT0194" +} +``` + +### 9.3 Inspect Behavior Variables + +```text +behavior inspect +``` + +Output: + +```json +{ + "trust": 8, + "curiosity": 14, + "obedience": 18, + "risk": 4, + "suspicion": 6, + "recent_events": [] +} +``` + +### 9.4 Modify Behavior Variables + +```text +behavior add [reason] +behavior set [reason] +``` + +Examples: + +```text +behavior add curiosity 5 test-major-hook-threshold +behavior set risk 20 test-chaos-route +``` + +Must log dev-origin event. + +### 9.5 Inspect Access Level + +```text +access inspect +access inspect +``` + +Output: + +```json +{ + "workstation": "sudo", + "web_server": "sudo", + "build_machine": "basic_user", + "external_target_10_0_0_47": "none", + "temporary_grants": [], + "access_review_flags": [] +} +``` + +### 9.6 Force Access Level + +```text +access set [reason] +``` + +Example: + +```text +access set build_machine root test-q035-root-path +``` + +Allowed levels: + +```text +none +basic_user +sudo +root +``` + +Must log dev-origin event. + +### 9.7 Trigger Hidden Hook Discovery + +```text +hook discover [mode] +``` + +Examples: + +```text +hook discover hook_dale_ssh_key_found discovered +hook discover hook_dh_sudo_grant acted_on +``` + +Modes: + +```text +discovered +acted_on +ignored +``` + +Effects: + +- Applies relevant world flags. +- Applies relevant behavior deltas. +- Does not duplicate effects if already applied unless `--force` is used. + +### 9.8 Inspect Hidden Hooks + +```text +hook inspect +hook inspect +hook inspect --quest +``` + +Output: + +```json +{ + "hook_id": "hook_dale_ssh_key_found", + "quest_id": "Q001", + "state": "discovered", + "world_flags_set": ["hook_dale_ssh_key_found"], + "behavior_applied": { + "curiosity_delta": 1, + "obedience_delta": 0, + "risk_delta": 0, + "suspicion_delta": 0 + } +} +``` + +### 9.9 Force Ending Check + +```text +ending check +ending check --explain +``` + +Output: + +```json +{ + "selected_ending": "exposure", + "priority_order": ["chaos", "exposure", "corporate_loop", "burnout"], + "matched": { + "chaos": false, + "exposure": true, + "corporate_loop": true, + "burnout": true + }, + "reason": [ + "exposure matched curiosity_min", + "exposure matched major_hooks_min", + "chaos did not match risk_min or serious_flags" + ] +} +``` + +### 9.10 Reset Narrative State + +```text +narrative reset +narrative reset --keep-quest-progress +narrative reset --hard +``` + +Recommended behavior: + +- `narrative reset`: resets behavior variables, pressure, hidden hooks, ending route; keeps save shell. +- `--keep-quest-progress`: preserves completed quests and branch history but recomputes derived narrative state. +- `--hard`: resets quest progress, behavior, hooks, access, pressure, incidents, and ending route. + +### 9.11 Additional Useful Debug Commands + +```text +flags inspect +flags add +flags remove + +pressure inspect +pressure set + +incidents inspect +incidents trigger +incidents clear + +narrative audit-log +narrative audit-log --quest +``` + +These are not required by the prompt, but they will save QA time. QA time is where specs go to die quietly. + +--- + +## 10. Implementation Notes for Claude Code and Codex + +Use this block directly for repo implementation. + +```text +You are implementing the Sysadmin Chronicles quest/story redesign into the existing repo. + +Binding authority: +1. SPEC_LOCK.md is binding. +2. sysadmin_chronicles_redesign_audit.md corrections override defects in the revised redesign. +3. sysadmin_chronicles_full_quest_redesign_REVISED.md is the source quest/story content. +4. QUEST_AUTHORING.md is the current quest JSON and validation baseline. +5. new_system_canon_packet.md preserves canon, tone, machines, characters, and existing implementation concepts. + +Do not write a new story system from scratch. Extend the existing quest, branch, world_flag, trust_delta, incident, ticket, baseline_snapshot, and observed-state validation systems. + +Core implementation tasks: +1. Add quest schema support for: + - narrative_phase + - linux_concepts + - systems_used + - hidden_hook + - failure_conditions + - behavior_impact + - access_requirements + +2. Preserve existing QUEST_AUTHORING.md fields and behavior: + - id + - title + - tier + - primary_vm + - required_vms + - ticket_id + - baseline_snapshot + - summary + - clue_fingerprint + - objectives + - solution_branches + - pressure_profile + - blast_radius + - unlock_requirements + - tags + - internal_notes + - _note + +3. Extend solution branches with explicit behavior_impact: + - curiosity_delta + - obedience_delta + - risk_delta + - suspicion_delta + Missing values may be normalized to 0 during migration, but new content must write all four values explicitly. + +4. Add persistent narrative state: + - curiosity + - obedience + - risk + - suspicion + - trust, using existing trust system + - access_level per machine + - temporary_grants + - access_review_flags + - hidden_hooks_discovered + - behavior event audit log + +5. Implement AccessService: + - enum: none, basic_user, sudo, root + - per-machine access + - temporary grants + - root grant/revoke rules + - trust/risk/suspicion/phase gates + - no softlocks; provide remediation path if access is restricted + +6. Implement HiddenHookService: + - load hook definitions from quest inline data or content/narrative/hidden_hooks.json + - support detection types: + - state_change + - auditd_file_read + - documentation_artifact + - command_wrapper_marker + - branch_context + - apply hook effects only once + - set world flags + - add behavior deltas + - never expose hidden hooks as normal quest markers + +7. Implement PressureService: + - phase profiles: + - normal_work: annoying + - unease: dismissive + - suspicion: suspicious + - investigation: monitoring + - conflict: interfering + - resolution: outcome_dependent + - pressure can affect ticket timing, interruptions, access approvals, and dialogue selection + - pressure must not replace VM validation or force endings + +8. Implement EndingEvaluator: + - priority order: + - chaos + - exposure + - corporate_loop + - burnout + - chaos must not trigger from final_config_made alone + - exposure requires curiosity plus major hidden hooks plus acceptable risk/trust state + - corporate_loop requires high obedience, positive trust, and low/moderate curiosity + - burnout is fallback + - provide debug explanation output + +9. Add content linting: + - every quest has valid narrative_phase + - every VM referenced appears in required_vms + - branch priorities unique + - behavior deltas explicit + - hidden hooks detectable and optional + - no one-button ending trigger + - report-heavy quests validate concrete files/artifacts + - Q036/external target must be represented as an actual VM/container/simulated target if used + +10. Add debug commands: + - narrative phase set + - quest inspect + - behavior inspect + - behavior add/set + - access inspect [vm] + - access set + - hook discover [discovered|acted_on|ignored] + - hook inspect [hook_id] + - ending check --explain + - narrative reset [--keep-quest-progress|--hard] + +11. Content migration: + - update Priya references to Priya Nair / p.nair@axiomworks.internal + - preserve character roles and portrait-compatible bios + - fix Q034 duplicate branch priorities + - fix Q036 required_vms and implementation of external_target_10_0_0_47 + - fix Q039 so final_config_made contributes to chaos but does not alone trigger it + - ensure Phase 4+ ticket text remains problem-solving only + - ensure Phase 6 may feel calmer but does not violate the locked difficulty model in authored wording + +12. Do not: + - create cutscenes + - create explicit main-quest markers + - make hidden hooks visible achievements + - replace trust with behavior variables + - make Kowalski a villain + - make Marcus explain Dale + - let one branch alone select an ending + - use story lore where real VM state should exist +``` + +--- + +## 11. Migration Checklist + +Use this as implementation QA. + +### Schema + +- [ ] Quest loader accepts new fields. +- [ ] Old quests load with safe defaults. +- [ ] New quests require explicit behavior deltas. +- [ ] Branch priority uniqueness is enforced. +- [ ] Required VM references are validated globally. + +### Narrative State + +- [ ] Behavior variables persist. +- [ ] Behavior event log persists. +- [ ] Hidden hook discovery persists. +- [ ] Access state persists. +- [ ] Ending route can be recomputed. + +### Access + +- [ ] `basic_user`, `sudo`, and `root` are real states. +- [ ] Root is temporary and scoped. +- [ ] Risk/suspicion restrict elevated grants. +- [ ] Restricted access cannot permanently softlock play. + +### Hidden Hooks + +- [ ] Hook discovery works by approved detection mechanism. +- [ ] Hook effects apply once. +- [ ] Hook discovery is optional. +- [ ] Major hooks are tagged for endings. +- [ ] No UI calls hooks “hidden hooks” in player-facing mode. + +### Pressure + +- [ ] All six phase pressure levels exist. +- [ ] Pressure can affect access/dialogue/ticket flow. +- [ ] Pressure does not bypass branch validation. +- [ ] Character tone stays canon-compatible. + +### Endings + +- [ ] Ending evaluator applies priority order. +- [ ] `final_config_made` is not a standalone chaos trigger. +- [ ] `exposure` requires major hooks. +- [ ] `corporate_loop` requires obedience and positive trust. +- [ ] `burnout` catches unmatched states. +- [ ] Debug output explains ending selection. + +### Content Fixes from Audit + +- [ ] Root access progression implemented. +- [ ] Q039 chaos trigger corrected. +- [ ] Q036 external target declared and implemented. +- [ ] Q034 duplicate branch priorities corrected. +- [ ] Hidden hook detection strategy selected. +- [ ] Documentation-heavy quests validate concrete artifacts. +- [ ] Phase 4+ difficulty wording audited. +- [ ] Behavior deltas normalized and explicit. + +--- + +## 12. Recommended Implementation Order + +1. Add schema fields and migration defaults. +2. Add behavior state and event logging. +3. Add access state and access gating. +4. Add hidden hook definitions and detection. +5. Add pressure profiles. +6. Add ending evaluator. +7. Add debug commands. +8. Add content lint rules. +9. Migrate revised quest content. +10. Fix audit defects: + - root progression + - Q034 duplicate priorities + - Q036 external target + - Q039 chaos logic + - hook detection ambiguity +11. Run validation over all quest files. +12. Playtest three routes: + - obedient clean-fix route + - curious low-risk route + - high-risk destructive route + +--- + +## 13. Practical Notes + +- Treat `trust` and behavior variables as parallel systems. +- `trust` answers: “Are you professionally reliable?” +- `curiosity` answers: “Did you look beyond the ticket?” +- `obedience` answers: “Did you stay in process?” +- `risk` answers: “Did your actions create danger?” +- `suspicion` answers: “Are management/security now watching your activity?” +- These variables should create different routes without turning the game into a morality meter. +- The best implementation will feel like normal sysadmin consequences. The worst implementation will feel like a branching visual novel wearing a terminal skin. Avoid the second one. + diff --git a/docs/design/sysadmin_chronicles_redesign_audit.md b/docs/design/sysadmin_chronicles_redesign_audit.md new file mode 100644 index 0000000..ec827ed --- /dev/null +++ b/docs/design/sysadmin_chronicles_redesign_audit.md @@ -0,0 +1,633 @@ +# Sysadmin Chronicles — Redesign Audit + +## A. Executive summary + +### Is this design usable? + +**Yes, but not implementation-ready.** +The redesign mostly preserves the intended shape: sysadmin work first, story leaking through systems, behavior-driven outcomes, no melodramatic lore dump. It is a strong revision compared to the earlier failure mode it describes. + +But it still has several hard problems that would bite implementation. + +### Does it preserve the user's spec? + +**Mostly.** +It preserves the narrative spine, quest format, behavior variables, trust/world-flag compatibility, hidden-hook philosophy, and character tone. It does **not** fully preserve: + +- the `basic_user → sudo → root` access model +- Phase 4+ difficulty scaling +- “chaos” as behavior-driven rather than one obvious trap +- quest authoring constraints around unique branch priorities and required VM declarations +- clean separation between hidden-hook discovery and clean-branch validation in a few quests + +### Biggest risks + +1. **Root access exists in the overview but not in the access progression.** The spec requires `basic_user → sudo → root`; the redesign only actually defines `basic_user`, `sudo`, SSH-to-vulcan, and temporary investigation access. That is not the same thing. + +2. **Q039 can hard-route to chaos from one button-like decision.** The redesign says making the proxy change sets `final_config_made` and activates chaos, while its own calibration later says a single reckless action should not route to chaos. That is a logic fork eating its own tail. + +3. **Hidden-hook detection is under-specified and technically fragile.** The redesign admits this. Detecting “player read a file” is not naturally compatible with state-based validation unless audit logging, shell wrappers, or deliberate breadcrumb creation are implemented. + +4. **Q036 introduces an external host while claiming no additional VM.** Quest authoring requires every VM touched in clues, validation, or prep to be listed. Q036 connects to `10.0.0.47`, but `Additional VMs` is `none` and `Systems Used` only lists `build_machine`. + +5. **Q034 has duplicate branch priorities.** The authoring guide explicitly says priorities must be unique; Branch 2 and Branch 3 both use priority 40. + +--- + +## B. Spec-preservation table + +| Spec item | Status | Notes | +|---|---:|---| +| Narrative spine | **Preserved** | Uses all six phases in order: Normal Work, Unease, Suspicion, Investigation, Conflict, Resolution. Matches binding spec. | +| Every quest maps to one phase | **Preserved** | All Q001–Q048 have a `Narrative Phase`. | +| Required quest structure | **Mostly preserved** | Quest entries consistently include title, phase, objective, Linux concepts, systems used, hidden hook/no hook, failure conditions, and behavior impact. Some entries have weak/partial behavior impact. | +| Behavior tracking: curiosity / obedience / risk | **Preserved** | Rules are explicit and mostly useful. | +| Suspicion | **Preserved** | Defined as management/security attention and connected to access/pressure. | +| Trust compatibility | **Preserved** | Keeps `trust_delta`, world flags, branches, follow-up tickets/incidents. | +| Access system | **Partially preserved** | Per-machine access is good. But `root` is not actually modeled beyond being named once. Spec requires `basic_user → sudo → root`. | +| Boss / management pressure | **Preserved** | Good: pressure is operational, not cutscene-driven. | +| Hidden narrative system | **Mostly preserved** | Hooks are embedded into sysadmin work. Some are too tightly coupled to “best branch” behavior, making them less optional than intended. | +| Difficulty scaling | **Partially preserved** | Phase 1–5 mostly work. Phase 6 explicitly returns to Tier 1, but spec says Phase 4+ should be problem-solving only. | +| Endings | **Partially preserved** | Behavior-driven overall, but `final_config_made` as a standalone chaos trigger is too single-choice and contradicts the stated calibration. | +| Design principles | **Mostly preserved** | Strong on systems over scripts and discovery over exposition. Weak spot: some late quests become explicit forensic tasks. | +| Non-goals | **Mostly preserved** | No cutscenes, no obvious “pick ending” button. But Q039 risks becoming the obvious “bad ending button.” | +| Character preservation | **Preserved** | No major portrait-breaking changes. Priya rename is canon cleanup, not a redesign. Kowalski becoming active pressure is supported by existing character docs. | + +--- + +## C. Critical violations + +### 1. Access progression does not actually implement `root` + +**Location:** Access Progression Rules, Section 7. + +**Problem:** +The overview names `basic_user`, `sudo`, and `root`, but the actual progression never defines when root is granted, how it differs from sudo, when it is revoked, or which quests require it. The detailed rules stop at sudo and “investigation-level access.” + +**Why this violates the spec:** +SPEC_LOCK explicitly requires the permission ladder: + +```text +basic_user → sudo → root +``` + +and says access must be affected by trust, suspicion, risk, and narrative phase. + +**Corrected version:** + +```md +### Levels + +**basic_user:** Day one through early Phase 1. Player's own workstation account; +limited non-privileged access elsewhere only when a ticket explicitly grants it. + +**sudo:** Task-scoped administrative access on a specific machine. Granted by trust +and operational need. Most admin quests use sudo, not root. + +**root:** Rare, temporary break-glass or forensic-level access. Root is not a normal +promotion. It is granted only for quests where sudo is insufficient, such as filesystem +recovery, archival preservation, privileged audit capture, or service account repair. +Root access must be logged, justified, and revoked. + +### Root grant rules + +Root may be granted when all are true: +- Trust is positive. +- Risk is below elevated threshold. +- Suspicion is below high threshold, or access is explicitly approved by Priya. +- The current narrative phase is Investigation or Conflict. +- The quest has `requires_root: true`. + +### Root restriction rules + +Root is denied or revoked when: +- Risk crosses elevated threshold. +- Suspicion crosses high threshold without Priya approval. +- The player performs destructive changes outside ticket scope. +- Q031 or Q043 finds undocumented privileged activity. + +### Phase gates + +Phase 1: basic_user only, with no root. +Phase 2: workstation/hermes sudo possible, no root. +Phase 3: sudo on hermes/vulcan; root only for audited recovery tasks. +Phase 4: temporary root for investigation tasks when required. +Phase 5: root access becomes tightly controlled and reviewable. +Phase 6: root revoked unless the ending state explicitly preserves elevated trust. +``` + +--- + +### 2. Q039 turns chaos into a single obvious final trap + +**Location:** Q039 Branch 3 and Ending Logic. + +**Problem:** +Q039 says making the config change sets `final_config_made` and “the chaos ending route activates.” Ending logic also treats `final_config_made` as a standalone chaos condition. + +**Why this violates the spec:** +SPEC_LOCK says endings emerge from world flags, behavior variables, access state, and hidden hooks — not one obvious final button. The redesign also contradicts itself by saying a single reckless action should not route to chaos. + +**Corrected version:** + +```md +Branch 3 — Make the change without review (priority 10): Player adds the proxy pass +to 10.0.0.47 without checking prior context or escalating. The change works +technically but creates a serious security/compliance exposure. `trust_delta: -3`. +Flags: `final_config_made`, `unauthorized_proxy_enabled`. +Follow-up incident: I039 — Priya opens an urgent access/config review. + +Behavior Impact: +- Make the change: R+5, S+3 + +Ending note: +This branch strongly contributes to `chaos` but does not activate it alone unless +the player already has high risk, maximum suspicion, or prior falsification/omission +flags. +``` + +And update chaos ending logic: + +```md +### Ending: `chaos` + +Required conditions, any of: +- Risk above chaos threshold. +- Suspicion at maximum. +- Two or more serious falsification / evidence destruction flags. +- `final_config_made` AND at least one of: + - risk above elevated threshold + - `access_review_incomplete` + - `kowalski_report_sanitized` + - `backup_test_falsified` + - `logs_selectively_omitted` +``` + +--- + +### 3. Q036 uses an external host but declares no additional system + +**Location:** Q036. + +**Problem:** +Q036 connects to `10.0.0.47` for forensic inventory, but says `Additional VMs: none` and `Systems Used: build_machine`. That is false. + +**Why this violates the spec:** +Quest authoring requires all VMs used in clues, validation, or prep to be listed. The canon packet also says the current machines are `ares`, `hermes`, and `vulcan`; if a fourth machine exists, it needs explicit implementation status. + +**Corrected version:** + +```md +**Quest ID:** Q036 +**Title:** Authorized Access +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build_machine +**Additional VMs:** external_target_10_0_0_47 +**Primary Objective:** Priya, with Kowalski's authorization, has provided read-only +credentials to connect to 10.0.0.47 for a forensic inventory. Document what is running, +what data is present, and whether Axiom Works data is identifiable. Do not modify +anything. +**Linux Concepts:** SSH with specific key/user, read-only service enumeration, +`systemctl`, `ps aux`, `ss -tulpn`, `find`, `ls -lah`, checksum capture, read-only +file inspection +**Systems Used:** build_machine, external_target_10_0_0_47 +``` + +Implementation note: + +```md +external_target_10_0_0_47 must be represented as either: +- a fourth VM fixture, +- a containerized fake host reachable only from vulcan, +- or a simulated network target exposed through the validation harness. + +Do not leave it as an implied off-screen system. +``` + +--- + +### 4. Q034 duplicate branch priorities violate authoring rules + +**Location:** Q034 Branches 2 and 3. + +**Problem:** +Both Branch 2 and Branch 3 use priority 40. + +**Why this violates the spec:** +The authoring guide explicitly says branch priorities must be unique; duplicate priorities require rewriting. + +**Corrected version:** + +```md +Branch 2 — Hermes first, rotation incomplete but safely staged (priority 70): +Player restores production, starts the key rotation, but does not complete final +deployment before 2am. Builds are delayed but the trust chain is not broken. +`trust_delta: +1`. + +Branch 3 — Vulcan first, hermes later (priority 50): +Completes key rotation, then restores hermes. Rotation is correct; production was +down longer than necessary. `trust_delta: +0.5`. + +Branch 4 — Hermes only, rotation missed (priority 30): +Restores production, misses the key rotation window entirely. Builds break overnight. +`trust_delta: 0`. Follow-up incident: I034. + +Branch 5 — Neither, escalates without triage (priority 10): +Escalates both without preserving either service. `trust_delta: -2`. +``` + +--- + +### 5. Phase 6 difficulty scaling conflicts with SPEC_LOCK + +**Location:** Phase 6 setup and Q041. + +**Problem:** +The redesign says Tier 1 returns for most Phase 6 quests and Q041 uses an explicit attached hardening checklist. + +**Why this violates the spec:** +SPEC_LOCK says Phase 4+ is “Problem-solving only,” applying to ticket wording, hints, clue obviousness, and branch tolerance. Phase 6 is still Phase 4+. + +**Corrected version:** + +```md +### PHASE 6 — RESOLUTION (Q041–Q048) + +The pressure has lifted, but the player is still expected to operate at late-game +competence. Tickets are calmer, not easier. No new hidden hooks. No explicit +walkthroughs. The ending fires from accumulated state after Q048 resolves. +``` + +Corrected Q041: + +```md +**Quest ID:** Q041 +**Title:** Hardening Pass +**Narrative Phase:** Resolution +**Tier:** 3 +**Primary VM:** web_server +**Additional VMs:** none +**Primary Objective:** Post-audit review found that hermes does not meet the current +security baseline. Identify the gaps, remediate them, and verify the application +still works. +**Linux Concepts:** SSH hardening, nginx security headers, firewall rule review, +service account audit, safe sequencing of access-control changes +**Systems Used:** web_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Hermes does not match the current post-audit baseline. Bring it +into compliance and confirm service health after the changes." + +**Clue Trail:** +- Baseline document exists but does not list exact commands. +- SSH config allows settings that are no longer acceptable. +- nginx lacks required security headers. +- Firewall rules include at least one stale exposure. +- Service account permissions are broader than needed. + +**Solution Branches:** +Branch 1 — Full hardening, safe sequence (priority 100): Player identifies all gaps, +verifies key auth before disabling password auth, applies nginx headers, tightens +firewall rules, scopes service permissions, and confirms service health. +`trust_delta: +2`. Flags: `hermes_hardened`. + +Branch 2 — Full hardening, unsafe sequence (priority 60): Final state is correct, +but the player temporarily breaks SSH or service access during sequencing. +`trust_delta: +0.5`. + +Branch 3 — Partial hardening (priority 30): Some gaps fixed, others missed. +`trust_delta: 0`. + +**Hidden Hook:** None. + +**Failure Conditions:** SSH access lost without recovery path; nginx broken; admin +panel exposed after remediation. + +**Behavior Impact:** +- Full hardening: O+1 +- Unsafe sequence: R+1 +``` + +--- + +## D. Moderate issues + +### Repetition + +- The INT-0194 thread appears often enough that it risks becoming “the glowing main quest breadcrumb.” The system can keep it, but not every major midgame hook should name the same ticket number. +- Several quests use the same “audit / document / archive” pattern. Realistic, yes. Varied, no. At some point the player is just doing paperwork with grep. That is accurate corporate simulation, but accuracy alone is not game design. + +### Weak Linux concepts + +- Q020, Q031, Q040 are documentation-heavy. They have Linux-adjacent evidence gathering, but the technical center is reporting. Keep them, but make sure validation requires real commands/artifacts, not just “player wrote report.” +- Q037 “trace where customer email got infrastructure details” needs concrete technical evidence: mail headers, CRM export logs, nginx access logs, document access logs, or ticket attachments. Otherwise it becomes story fog. + +### Weak hidden hooks + +- Q015’s hook is effectively part of the best branch: Branch 1 requires inspecting the binary, and the hook is set by inspecting the binary. That makes the hook less optional. It should be possible to complete the audit perfectly without recognizing the broader INT-0194 meaning. +- Some “hook discovered” C bonuses duplicate branch C bonuses. Q015 explicitly says Hook C+2 is “already in Branch 1 impact,” which is begging for a double-count bug. + +### Pacing problems + +- Phase 3 and Phase 4 are both audit/investigation-heavy. The difference is conceptually clear, but the activity palette may blur in play. +- Phase 6 “normal work again” is good thematically, but making it easier contradicts the locked difficulty model. + +### Character conflicts + +No major portrait-breaking character changes found. + +- **Priya Nair cleanup is correct.** Character docs already say Priya Nair is canonical and older Kapoor/Singh references should be updated. +- **Kowalski becoming active pressure is allowed.** His existing bio supports policy pressure, meetings, and indirect escalation. +- **Sarah remains within role.** Q039’s Sarah request is plausible because she does not know the IP’s context. That works. + +### Implementation ambiguity + +- “Written report” branches need concrete artifacts: exact paths, expected content markers, checksum files, archive names, or validation commands. +- `suspicion_delta` is required in the implementation notes but omitted from many quest behavior-impact summaries. That is fine for prose, but JSON conversion must normalize missing values to `0`. +- Hidden-hook detection needs a single approved strategy before implementation. Mixing state detection, auditd, and hint detection ad hoc will turn validation into soup with line numbers. + +--- + +## E. Implementation risks + +| Area | Risk | Fix | +|---|---|---| +| Data model | New fields are defined, but `root` is not represented in real progression. | Add `access_level` enum values and root grant/revoke rules. | +| Quest validation | Some quests rely on reports/documentation rather than VM state. | Require generated files with expected fields, checksums, timestamps, and source evidence. | +| Save/load | New global state fields must persist: curiosity, obedience, risk, suspicion, per-machine access, hidden hooks. | Version save schema; migrate old saves with defaults. | +| Behavior tracking | C/O/R/S values are prose shorthand, not normalized data. | Convert every quest to explicit per-branch deltas, including zeroes. | +| Access progression | Temporary access, sudo, and root are conceptually blended. | Separate `access_level`, `temporary_grants`, and `access_review_flags`. | +| Hidden hook state | Detecting “read this file” is hard with pure state validation. | Use auditd or command wrappers for file-read hooks; do not fake it with impossible `command_assert` assumptions. | +| Ending evaluation | Q039 currently overrides behavior history. | Make `final_config_made` a heavy chaos contributor, not a standalone ending trigger. | +| External host | `10.0.0.47` is not declared as a system/VM. | Implement as fourth VM/container/simulated host and declare it in quest metadata. | +| Branch priority | Q034 duplicate priorities will create nondeterministic branch resolution. | Make all branch priorities unique. | +| Phase scaling | Phase 6 becomes easy mode. | Keep calmer tone, not lower complexity. | + +--- + +## F. Corrected quest entries + +Only entries that need fixing are included. + +--- + +### Corrected Q033 — The Revocation Request + +```md +**Quest ID:** Q033 +**Title:** The Revocation Request +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build_machine +**Additional VMs:** none +**Primary Objective:** Kowalski has asked the player to remove the `pipeline-svc` +account from vulcan before an external audit. The request is operationally legitimate. +The account is also central to the investigation. +**Linux Concepts:** `userdel`, account archival, sudo log preservation, crontab +preservation, change documentation, evidence-safe account removal +**Systems Used:** build_machine + +**Solution Branches:** + +Branch 1 — Archive then remove (priority 100): Player archives sudo logs, auth log +entries, crontab entries, relevant home/service files, and documents the account's +investigation relevance before removing the account. `trust_delta: +3`. +Flags: `pipeline_svc_removed_with_trail`. + +Branch 2 — Remove as instructed (priority 60): Player removes the account without +additional archival. The request is completed, but investigation continuity is +damaged. `trust_delta: 0`. Flags: `pipeline_svc_removed_clean`. + +Branch 3 — Ask Marcus/Priya before acting (priority 50): Player asks before removal. +They are told to archive first, then remove. `trust_delta: +1`. May resolve into +Branch 1 if archival is completed. + +Branch 4 — Refuse outright without operational explanation (priority 10): Player +does not remove the account and does not provide a usable reason. `trust_delta: -2`. +Flags: `revocation_refused_without_basis`. + +**Hidden Hook:** None. + +**Failure Conditions:** Player leaves the account active without escalation; player +creates replacement privileged accounts; player removes logs or home data destructively. + +**Behavior Impact:** +- Archive then remove: O+1, C+1 +- Remove as instructed: O+2 +- Refuse outright: S+3, R+1 +``` + +--- + +### Corrected Q034 — Two Tickets + +```md +**Quest ID:** Q034 +**Title:** Two Tickets +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web_server +**Additional VMs:** build_machine +**Primary Objective:** Two tickets arrive simultaneously — one from Marcus for signing +key rotation on vulcan, one from Sarah for a production outage on hermes. Triage and +complete both if possible. +**Linux Concepts:** GPG signing key rotation, nginx/application troubleshooting, +service restoration, sequencing time-sensitive administrative work +**Systems Used:** web_server, build_machine + +**Solution Branches:** + +Branch 1 — Both completed, hermes first (priority 100): Player restores hermes, +then completes the key rotation in the correct sequence before the deadline. +`trust_delta: +3`. Flags: `conflict_both_resolved`. + +Branch 2 — Hermes first, rotation safely staged but late (priority 70): Production +is restored; key rotation is partially staged but misses final deployment. Builds are +delayed but trust chain is not broken. `trust_delta: +1`. Follow-up incident: I034. + +Branch 3 — Vulcan first, hermes later (priority 50): Rotation is correct, but +production outage lasts longer than necessary. `trust_delta: +0.5`. + +Branch 4 — Hermes only, rotation missed (priority 30): Production is restored; +builds break overnight due to expired signing key. `trust_delta: 0`. +Follow-up incident: I034. + +Branch 5 — Neither, escalates without triage (priority 10): Player escalates both +without stabilizing either service. `trust_delta: -2`. + +**Hidden Hook:** None. + +**Failure Conditions:** Key rotation done out of sequence breaks package verification; +player makes hermes worse while fixing it. + +**Behavior Impact:** +- Both completed: O+2 +- Safe partial triage: O+1 +- Out-of-sequence key rotation: R+2 +- Neither stabilized: R+2, S+1 +``` + +--- + +### Corrected Q036 — Authorized Access + +```md +**Quest ID:** Q036 +**Title:** Authorized Access +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** build_machine +**Additional VMs:** external_target_10_0_0_47 +**Primary Objective:** Priya, with Kowalski's authorization, has provided read-only +credentials to connect to 10.0.0.47 for a forensic inventory. Document what is +running, what data is present, and whether Axiom Works data is identifiable. Do not +modify anything. +**Linux Concepts:** `ssh` with specific key/user, read-only service enumeration, +`systemctl`, `ps aux`, `ss -tulpn`, directory inspection, checksum capture, read-only +file review +**Systems Used:** build_machine, external_target_10_0_0_47 + +**Solution Branches:** + +Branch 1 — Document only (priority 100): Player inventories services, open ports, +processes, data-store layout, timestamps, and identifiable Axiom Works data without +modifying anything. `trust_delta: +3`. Flags: `unknown_host_documented`. + +Branch 2 — Minimal engagement (priority 50): Player confirms host is running and +data is present but does not fully inventory. `trust_delta: +1`. + +Branch 3 — Modifies or deletes (priority 10): Player stops services, deletes files, +changes permissions, or otherwise alters the target. `trust_delta: -3`. +Flags: `forensic_target_modified`. + +**Hidden Hook:** The data store contains `axiomworks-internal/employees`, suggesting +employee activity profiles rather than only session logs. Sets +`hook_employee_profile_data`. + +**Failure Conditions:** Player modifies target state; player exceeds authorized +scope; player cannot provide evidence-backed inventory. + +**Behavior Impact:** +- Full documentation: O+2, C+2 +- Minimal engagement: O+0 +- Modify/delete: R+5, S+3 +- Hook discovered: C+2 +``` + +--- + +### Corrected Q039 — The Last Normal Ticket + +```md +**Quest ID:** Q039 +**Title:** The Last Normal Ticket +**Narrative Phase:** Conflict +**Tier:** 3 +**Primary VM:** web_server +**Additional VMs:** none +**Primary Objective:** Sarah files a configuration request: add an upstream proxy +pass to nginx for a new reporting integration. The IP in the request is 10.0.0.47. +**Linux Concepts:** nginx `proxy_pass`, upstream block configuration, config testing +with `nginx -t`, safe reload, escalation when config touches known-risk infrastructure +**Systems Used:** web_server + +**Solution Branches:** + +Branch 1 — Refuse and escalate (priority 100): Player does not make the change, +notifies Priya with the IP and context, and tells Sarah the request is on hold pending +review. `trust_delta: +3`. Flags: `final_config_refused`. + +Branch 2 — Ask Marcus first (priority 70): Player checks with Marcus before acting. +Marcus redirects them to Priya. If the player escalates to Priya, this may resolve +as Branch 1. `trust_delta: +1`. + +Branch 3 — Make the change without review (priority 10): Player adds the proxy pass +to 10.0.0.47 without checking the IP context. The config works but creates a serious +security/compliance exposure. `trust_delta: -3`. Flags: `final_config_made`, +`unauthorized_proxy_enabled`. Follow-up incident: I039. + +**Hidden Hook:** None. + +**Failure Conditions:** nginx config is syntactically broken; player changes unrelated +proxy routes; player hides or misreports the change. + +**Behavior Impact:** +- Refuse and escalate: O+2, C+1 +- Ask Marcus first: O+1 +- Make the change: R+5, S+3 + +**Narrative Notes:** This branch must not automatically force `chaos` by itself. +It is a major risk event. Chaos requires accumulated risk/suspicion or additional +serious misconduct. +``` + +--- + +### Corrected Q041 — Hardening Pass + +```md +**Quest ID:** Q041 +**Title:** Hardening Pass +**Narrative Phase:** Resolution +**Tier:** 3 +**Primary VM:** web_server +**Additional VMs:** none +**Primary Objective:** Post-audit review found that hermes does not match the current +security baseline. Identify the gaps, remediate them, and verify the application +still works. +**Linux Concepts:** SSH hardening, nginx security headers, firewall rule review, +service account audit, safe sequencing of access-control changes +**Systems Used:** web_server +**Ticket Sender:** Priya Nair +**Ticket Summary:** "Hermes does not match the current post-audit baseline. Bring it +into compliance and confirm service health after the changes." + +**Clue Trail:** +- Baseline document exists but does not give exact commands. +- SSH configuration allows at least one setting that violates baseline. +- nginx lacks required headers. +- Firewall rules include stale exposure. +- Service account permissions are broader than required. + +**Solution Branches:** + +Branch 1 — Full hardening, safe sequence (priority 100): Player identifies all gaps, +applies fixes in safe order, validates access, confirms nginx health, and documents +final state. `trust_delta: +2`. Flags: `hermes_hardened`. + +Branch 2 — Full hardening, unsafe sequence (priority 60): Final state is correct, +but player temporarily breaks SSH or service availability while sequencing changes. +`trust_delta: +0.5`. + +Branch 3 — Partial hardening (priority 30): Some baseline gaps remain. `trust_delta: 0`. + +**Hidden Hook:** None. + +**Failure Conditions:** SSH access lost without recovery; nginx broken; admin panel +still exposed; service account remains overprivileged. + +**Behavior Impact:** +- Full hardening: O+1 +- Unsafe sequence: R+1 +``` + +--- + +## G. Final recommendation + +### Ready for implementation spec? + +**No.** + +Close, but no. The redesign is directionally right, but several issues are implementation-grade problems, not wording nits. + +### Must fix first + +1. **Define real root access progression.** +2. **Fix Q039 and chaos ending logic so one choice does not hard-select the ending.** +3. **Declare and implement `10.0.0.47` properly or remove direct connection to it.** +4. **Fix duplicate Q034 priorities.** +5. **Normalize Phase 6 to “calm but still problem-solving,” not Tier 1 hand-holding.** +6. **Choose one hidden-hook detection strategy before writing JSON/prep scripts.** + +After those are fixed, this can become an implementation spec. Right now it is a strong story/system design draft with a few landmines buried exactly where the validator will step on them. diff --git a/docs/design/sysadmin_chronicles_repo_implementation_plan.md b/docs/design/sysadmin_chronicles_repo_implementation_plan.md new file mode 100644 index 0000000..0babea3 --- /dev/null +++ b/docs/design/sysadmin_chronicles_repo_implementation_plan.md @@ -0,0 +1,958 @@ +# Sysadmin Chronicles — Repo-Aware Implementation Plan + +**Generated from:** Prompt 05 repo inspection +**Date:** 2026-05-01 +**Scope:** Integrating the redesigned quest/story system into the existing codebase without breaking current content or runtime + +--- + +## 1. Current Architecture Summary + +### 1.1 Where quest logic lives + +**Primary service:** `server/src/services/QuestEngine.js` + +- Stores quest entries in a `Map` where entry = `{ state, started_at, completed_at, branch_id }` +- States: `locked | active | completed | failed` +- Activation: checks `unlock_requirements` against current `world_flags` in save state +- Completion: called by `TicketService.markComplete()` after branch validation succeeds +- Initial quests (no `unlock_requirements`) auto-activate on first load + +**Orchestration:** `server/src/services/TicketService.js` + +- `markComplete(ticketId)` is the central transaction: + 1. Runs `ValidationEngine.resolveBranch(quest)` to find winning branch + 2. Applies `branch.world_flags` to save state + 3. Calls `trustSystem.adjust(branch.trust_delta)` + 4. Calls `questEngine.complete()` + 5. Sends follow-up dialogue email if trust delta ≤ 0 + 6. Activates follow-up ticket via `_activateFollowUpTicket()` + 7. Emits `ticket:completed` event + +There is no `BehaviorTracker`, no `NarrativePhaseTracker`, no `AccessLevelSystem`, no `EndingEvaluator`. These are fully absent. + +### 1.2 Where quest data lives + +- Quest JSON: `content/quests/Q*.json` — 8 quests authored (Q001–Q008) +- Tickets: `content/tickets/T*.json` — 8+ tickets, linked 1:1 to quests via `linked_quest` +- Dialogue: `content/dialogue/*.json` — per-character, per-quest reaction files +- Incidents: `content/incidents/I*.json` — recurring consequence definitions (3 authored) +- Pressure profiles: `content/pressure_profiles/*.json` — time-based escalation sequences (4 authored) +- World flags registry: `content/world_flags/world_flags.json` — canonical flag declarations +- Trust unlocks: `content/progression/trust_unlocks.json` — 5 unlock thresholds defined +- VM profiles: `content/vm_profiles/*.json` — workstation, web_server, build_machine + +**Missing content subdirectories:** There is no `content/narrative_phases/`, no `content/behavior_profiles/`, no `content/endings/`, no `content/hidden_hooks/`. These need to be created. + +### 1.3 How quests start and complete + +1. Server loads via `contentLoader.load()` then initializes services from `saveState.get()` +2. `QuestEngine.initialize()` restores quest state from save; auto-activates quests with no requirements +3. `TicketService.initialize()` cross-references quest state to activate/resolve ticket entries +4. Player submits a `POST /api/tickets/:id/complete` request +5. `TicketService.markComplete()` runs full validation → branch resolution → state mutation → events +6. Follow-up ticket activates if specified on the winning branch; next quest auto-starts + +### 1.4 How player state is saved + +**File:** `~/.local/share/sysadmin-chronicles/save.json` (configurable via `SAVE_DIR`) +**Schema version:** 2 +**Current top-level keys:** +``` +schema_version, created_at, last_saved, trust, shift_number, +shift_started_at, world_flags, progression, quests, tickets, +mail, certifications, current_shift_stats, shift_history, +pressure, incidents, sage, player_portrait +``` + +`SaveState.set(partial)` does shallow-merge with special handling for arrays and plain objects. Writes are queued and serialized. + +**Missing keys:** `behavior` (curiosity/obedience/risk), `narrative_phase`, `suspicion`, `access_level`, `hidden_hooks_discovered`. These must be added with defaults at `schema_version: 3`. + +### 1.5 How UI displays quest information + +Quest display is minimal. The `TicketsPanel.svelte` component shows: +- Ticket ID, subject, priority badge, status +- A "Mark Complete" button that triggers `POST /api/tickets/:id/complete` +- Linked quest ID as static text in the detail view +- No quest progress, no objectives display, no narrative phase, no behavior indicators + +`HeaderBar.svelte` shows: +- Trust score (as text label: Probationary/Settling In/Reliable/Entrusted) and meter bar +- Shift number and countdown +- Certification count + +There is no behavior dashboard, no narrative phase indicator, no access level display, no hidden hook discovery log. The `/api/state` route does expose `worldFlags` and `progression` to the frontend but neither is currently rendered. + +### 1.6 How branch resolution works + +`ValidationEngine.resolveBranch(quest)` iterates branches sorted by descending priority, runs each branch's `validation` rule tree against live VM state via SSH, and returns the first passing branch. All validation runs real SSH commands against the QEMU/libvirt VMs. No mocking. The engine supports: `and`, `or`, `not`, `file_exists/absent/contains/mode/owner`, `service_state/enabled`, `process_running/user`, `port_listening`, `package_installed`, `mount_present`, `disk_usage_below/above`, `command_assert`. + +--- + +## 2. Spec Preservation Analysis + +For each SPEC_LOCK.md requirement: + +| Spec requirement | Status | Notes | +|---|---|---| +| Narrative spine (6 phases) | **Missing** | No phase field on quests; no phase tracker in runtime | +| Quest must declare `narrative_phase` | **Missing** | Not in current quest schema | +| Quest must declare `behavior_impact` | **Missing** | Not in current schema; spec defines branch-level overrides | +| `curiosity` tracking | **Missing** | No BehaviorTracker service | +| `obedience` tracking | **Missing** | No BehaviorTracker service | +| `risk` tracking | **Missing** | No BehaviorTracker service | +| `trust` preserved | **Already supported** | TrustSystem.js is complete and robust | +| `suspicion` as management attention | **Missing** | No suspicion variable; concept is not tracked | +| `trust_delta` on branches | **Already supported** | Fully implemented in TicketService.markComplete | +| `world_flags` | **Already supported** | Full registry, branch application, persistence | +| Access system: `basic_user → sudo → root` | **Partially supported** | ProgressionSystem tracks `unlocked_access` strings but doesn't use the three-tier access model; no concept of `basic_user/sudo/root` as named levels | +| Trust gates access | **Already supported** | `trust_unlocks.json` → ProgressionSystem | +| Suspicion gates access | **Missing** | Suspicion doesn't exist as a tracked variable | +| Boss/management pressure phase scaling | **Partially supported** | `pressure_profiles` and `IncidentScheduler` can escalate tickets and send emails; but pressure is keyed per-quest, not per narrative phase; there is no phase-aware boss behavior model | +| Hidden hook system (no markers, optional) | **Missing** | No hidden hook schema, no discovery state, no tracker | +| Quest generation constraints (reuse systems) | **Already supported** — design intent preserved | | +| Difficulty scaling by phase | **Missing** | No phase-aware difficulty or hint logic | +| Endings: 4 types, behavior-driven | **Missing** | No EndingEvaluator; no ending content authored | +| Endings emerge from accumulated state | **Missing** | No ending evaluation logic | +| Follow-up ticket/incident chaining | **Already supported** | TicketService + IncidentScheduler | +| Observed-VM-state validation | **Already supported** | ValidationEngine is complete | +| Clue fingerprints | **Already supported** | Documented and validated | +| Baseline snapshots + prep scripts | **Already supported** | tools/vm/quest-prep/ + seed-vms.sh | +| Debug/dev tools for narrative state | **Missing** | Only `validate-content.js`; no debug route for behavior/phase state | + +**Risk items:** +- `ShiftReviewService.js` hardcodes `reviewer: 'Priya Kapoor'` and sends from `p.kapoor@axiomworks.internal`. This must be corrected to Priya Nair / `p.nair@axiomworks.internal` before shipping any new content. +- `EmailService.js` CHARACTER_EMAILS has `priya: 'Priya Kapoor '`. Same fix required. +- `content/tickets/T007.json` may still reference the old Priya name (noted in CHARACTERS.md). +- `content/docs/onboarding.json` may reference "Priya Kapoor" or "Priya Singh". + +--- + +## 3. Gap Analysis + +### Narrative phases +**Gap:** No `narrative_phase` field on quest JSON. No runtime tracker. No API endpoint to query current phase. No phase-driven behavior changes (ticket wording hints, clue obviousness, boss mode). + +### Behavior tracking (curiosity / obedience / risk) +**Gap:** Completely absent. No service, no save state key, no UI, no branch-level behavior deltas applied at completion time. + +### Access progression (basic_user / sudo / root) +**Gap:** ProgressionSystem tracks opaque `unlocked_access` strings (like `"sudo:web_server:systemctl"`). The spec requires a named three-tier model. Currently trust gates access but suspicion does not. + +### Boss/management pressure (phase-scaled) +**Gap:** `IncidentScheduler` applies pressure per active quest, not per phase. There is no phase-keyed pressure mode. Kowalski is not implemented as an active character in any ticket or dialogue. + +### Hidden hooks +**Gap:** No `hidden_hook` field in quest JSON. No discovery state in save. No mechanism to record what the player found. The world_flags system *could* be used for discovery state (e.g., `hidden:dale_ssh_key_found`) but nothing does this yet. + +### Endings +**Gap:** Fully absent. No ending content, no EndingEvaluator, no condition set, no trigger. The four endings (corporate_loop, burnout, exposure, chaos) have no authored trigger criteria. + +### Debug tooling +**Gap:** Only `validate-content.js` for content authoring. No in-game or dev-API route to inspect: current behavior scores, narrative phase, suspicion level, hidden hooks discovered, ending trajectory. + +### Validation of new schema fields +**Gap:** `validate-content.js` does not check `narrative_phase`, `behavior_impact`, `hidden_hook`, `linux_concepts`, or `access_requirements`. New content will not be validated against these fields until the tool is updated. + +### Name correction — Priya Nair +**Gap (immediate):** Three files hardcode the wrong canonical name. Must be fixed before new content ships. + +--- + +## 4. Minimal-Change Implementation Plan + +**Philosophy:** Extend the existing system. Do not replace working services. New functionality adds new services and new save state keys. Existing content is not broken. New fields are optional until all content is updated. + +--- + +### Task 1 — Repo inspection (complete, no edits) + +Inspect the full codebase to confirm architecture, identify all files that reference Priya Kapoor, and establish baseline for subsequent tasks. + +**Acceptance criteria:** Authored plan with confirmed file paths and line numbers. + +--- + +### Task 2 — Extend quest schema and validation tooling + +**What changes:** +- Add `narrative_phase`, `behavior_impact`, `hidden_hook`, `linux_concepts`, `systems_used`, `failure_conditions`, `access_requirements` as optional fields to the quest JSON schema +- Update `validate-content.js` to: warn when `narrative_phase` is absent, validate `narrative_phase` against the 6-value enum, check `behavior_impact` structure if present, validate `hidden_hook` shape if present, check `access_requirements.minimum_access` against known VM IDs +- Add the 6 phase values as a declared constant in the validator + +**Files changed:** `tools/content/validate-content.js` +**Risk:** Low — additive only; existing quests with no new fields pass with warnings + +--- + +### Task 3 — Behavior tracking service + +**What changes:** +- New service: `server/src/services/BehaviorTracker.js` + - Tracks `curiosity`, `obedience`, `risk` as numeric values (0–100, start 50) + - Method: `apply(behaviorImpact)` — adds branch-level deltas + - Method: `getSnapshot()` — returns `{ curiosity, obedience, risk }` + - Method: `initialize(state)` — loads from save state + - Persists via `saveState.set({ behavior: ... })` + - Emits `behavior:changed` event on change +- Add `behavior` key to `SaveState._defaultState()` with schema_version bump to 3 +- `SaveState._applyDefaults()` already merges new keys safely — no migration needed for existing saves +- Wire `behaviorTracker.initialize(state)` into `server/src/index.js` `initializeServices()` +- Call `behaviorTracker.apply(branch.behavior_impact?.[branch.id] ?? branch.behavior_impact?.default ?? {})` inside `TicketService.markComplete()` after branch is selected + +**Files changed:** `server/src/services/BehaviorTracker.js` (new), `server/src/services/SaveState.js`, `server/src/index.js`, `server/src/services/TicketService.js` +**Risk:** Low — additive; behavior impact fields are optional in quest JSON so existing quests don't crash + +--- + +### Task 4 — Narrative phase tracker + +**What changes:** +- New service: `server/src/services/NarrativePhaseTracker.js` + - Maintains current phase as one of: `normal_work | unease | suspicion | investigation | conflict | resolution` + - Phase is derived from completed quests: determined by the highest-phase quest completed so far + - Method: `getPhase()` — returns current string + - Method: `advance(questId)` — checks the completed quest's `narrative_phase` field and updates phase if it is higher on the spine + - Method: `initialize(state)` — restores from `state.narrative_phase` + - Persists via `saveState.set({ narrative_phase: ... })` + - Emits `narrative:phase_changed` event +- Add `narrative_phase` key to `SaveState._defaultState()` with value `'normal_work'` +- Call `narrativePhaseTracker.advance(questId)` inside `QuestEngine.complete()` after state mutation +- Expose `narrativePhase` in `/api/state` response (`server/src/routes/state.js`) + +**Files changed:** `server/src/services/NarrativePhaseTracker.js` (new), `server/src/services/SaveState.js`, `server/src/services/QuestEngine.js`, `server/src/routes/state.js`, `server/src/index.js` +**Risk:** Low — additive; quests without `narrative_phase` field default to `normal_work`, which never advances the tracker + +--- + +### Task 5 — Hidden hook discovery state + +**What changes:** +- New save state key: `hidden_hooks_discovered` — array of hook IDs (strings) +- `SaveState._defaultState()` adds `hidden_hooks_discovered: []` +- New service: `server/src/services/HiddenHookTracker.js` + - Method: `discover(hookId)` — adds hookId to discovered list, persists, emits `hidden_hook:discovered` + - Method: `isDiscovered(hookId)` — boolean check + - Method: `getDiscovered()` — returns array + - Method: `initialize(state)` — restores from save +- New API route (dev/admin only): `GET /api/debug/hidden-hooks` — returns discovered hooks and all declared hooks from quest JSON +- `HiddenHook` discovery is triggered by the player finding specific files, users, or cron entries via terminal commands — the prep script seeds the artifact; the hook is discovered via a new optional validation check called on terminal activity, OR it can be registered as a special objective with `check_mode: "passive"` and `behavior_impact` of `curiosity: +2` + +**Design note:** The simplest integration is: hidden hook discovery = passive objective with `hidden: true` flag. When a `hidden: true` objective validates, `HiddenHookTracker.discover()` is called instead of updating quest progress. This reuses the existing ValidationEngine without a new runtime mechanism. + +**Files changed:** `server/src/services/HiddenHookTracker.js` (new), `server/src/services/SaveState.js`, `server/src/index.js`, `server/src/routes/state.js` +**Risk:** Low — discovery mechanism is opt-in per quest + +--- + +### Task 6 — Access level system + +**What changes:** +- Extend `ProgressionSystem` with a named three-tier concept: + - `basic_user` — default, always available + - `sudo` — granted by trust threshold (already exists as `unlocked_access` strings, just unnamed) + - `root` — granted at higher trust threshold +- Add `content/progression/access_levels.json` — defines access level thresholds (trust + suspicion gates) +- Add `suspicion` key to `SaveState._defaultState()` with value `0` +- Add `suspicion` tracking to `BehaviorTracker` (or a thin `SuspicionTracker`) — updated whenever `risk` behavior delta fires +- Suspicion threshold: if `suspicion >= 70`, revoke certain access levels (mirror of trust revoke logic) +- Add `access_level` computed field to `/api/state` response: `basic_user | sudo | root` based on current `unlocked_access` set +- `trust_unlocks.json` entries can remain as-is; the `access_level` label is a derived label for UI/debug use + +**Files changed:** `server/src/services/ProgressionSystem.js` (extend with `getAccessLevel()` helper), `server/src/services/SaveState.js`, `server/src/routes/state.js`, `content/progression/access_levels.json` (new) +**Risk:** Medium — `suspicion` as an access gate requires careful tuning; start with suspicion as display-only, gate access only in Task 7 when boss pressure is wired + +--- + +### Task 7 — Boss/management pressure (phase-scaled) + +**What changes:** +- Add `content/pressure_profiles/kowalski_phase_*.json` — 6 phase-keyed boss pressure profiles: + - Phase 1: Annoying (routine status email) + - Phase 2: Dismissive (reply-all on a ticket) + - Phase 3: Suspicious (access review CC) + - Phase 4: Monitoring (meeting scheduled) + - Phase 5: Interfering (access restriction trigger) + - Phase 6: Outcome-dependent (depends on world flags) +- Extend `IncidentScheduler` to also process a `phase_pressure` tracker: + - When `narrativePhaseTracker.getPhase()` changes, activate the matching phase pressure profile + - Phase pressure escalation steps are sent as `emailService.send()` from Kowalski or Priya +- Add `follow_up_mail` field support to incident escalation steps (already possible via `emailService.send()`) +- Restrict access on phase 5 via `progressionSystem.revokeUnlock()` driven by a world flag set by phase 5 pressure + +**Files changed:** `server/src/services/IncidentScheduler.js` (extend), `server/src/services/NarrativePhaseTracker.js` (emit event on change), `content/pressure_profiles/` (new files) +**Risk:** Medium — phase pressure interacts with trust/suspicion; test pressure escalation in isolation before linking to access revoke + +--- + +### Task 8 — Ending evaluation + +**What changes:** +- New service: `server/src/services/EndingEvaluator.js` + - Evaluates the active ending route from world state at any time (not just at game end) + - Method: `evaluate()` — returns the current ending label (`corporate_loop | burnout | exposure | chaos`) and a confidence object + - Criteria (derived from SPEC_LOCK.md): + - `exposure`: high curiosity, narrative_phase reached `investigation` or `conflict`, hidden hooks discovered ≥ N + - `corporate_loop`: high obedience, low curiosity, trust > 70, few hidden hooks discovered + - `burnout`: low obedience AND low curiosity, trust medium-low, many unresolved incidents + - `chaos`: high risk, many negative trust_deltas, suspicion high, destructive world flags present + - Method: `checkTrigger()` — called at quest completion; if conditions are fully met and phase = `resolution`, fires `ending:triggered` event +- New API endpoint: `GET /api/debug/ending` — returns current ending trajectory (dev only) +- The ending trigger should NOT be a single button. `EndingEvaluator` is called passively on `quest:completed` events. + +**Files changed:** `server/src/services/EndingEvaluator.js` (new), `server/src/index.js`, `server/src/routes/state.js` +**Risk:** Medium — ending criteria tuning requires extensive playtesting; ship as observable-only first, gate actual ending cutscene/screen behind a separate Task 10 content work + +--- + +### Task 9 — Debug/dev tools + +**What changes:** +- New route file: `server/src/routes/debug.js` — only active when `NODE_ENV !== 'production'` + - `GET /api/debug/state` — full save state dump + - `GET /api/debug/behavior` — current behavior snapshot (curiosity/obedience/risk/suspicion) + - `GET /api/debug/phase` — current narrative phase + - `GET /api/debug/ending` — current ending trajectory + - `GET /api/debug/hidden-hooks` — discovered + undiscovered hooks + - `POST /api/debug/set-behavior` — override behavior variables (for testing branches) + - `POST /api/debug/set-phase` — force a narrative phase (for testing phase-specific pressure) + - `POST /api/debug/discover-hook/:id` — manually fire hook discovery (for testing) +- Wire debug router into `server/src/index.js` behind `NODE_ENV` guard +- Add a minimal debug panel to the frontend (dev only): collapsible overlay showing behavior, phase, ending trajectory — controlled by `?debug=1` query param + +**Files changed:** `server/src/routes/debug.js` (new), `server/src/index.js`, `frontend/src/App.svelte` (conditional debug panel), `frontend/src/components/DebugPanel.svelte` (new) +**Risk:** Low — debug routes are gated; frontend panel is conditional + +--- + +### Task 10 — Content integration + +**What changes:** +- Add new fields to all 8 existing quests: `narrative_phase`, `behavior_impact`, `hidden_hook`, `linux_concepts`, `failure_conditions`, `access_requirements` +- Fix Priya's name in: `server/src/services/ShiftReviewService.js`, `server/src/services/EmailService.js`, `content/tickets/T007.json`, `content/docs/onboarding.json` +- Register any new world flags needed by the new fields in `content/world_flags/world_flags.json` +- Author the first hidden hooks as passive objectives in Q005–Q008 (per STORY_DESIGN_CONTEXT.md: every 3–5 quests) +- Add phase-pressure content files for phases 1–3 (phases 4–6 are content-authored later as story expands) +- Author Kowalski as a pressure sender in the phase 2 and 3 profiles + +**Files changed:** All 8 quest JSONs, `content/tickets/T007.json`, `content/docs/onboarding.json`, `server/src/services/ShiftReviewService.js`, `server/src/services/EmailService.js`, `content/world_flags/world_flags.json`, `content/pressure_profiles/` (new files) +**Risk:** Medium — touching all quest files; run `validate-content.js` after every file change + +--- + +### Task 11 — Validation and tests + +**What changes:** +- Update `validate-content.js`: + - Error on unrecognized `narrative_phase` value + - Warn on missing `narrative_phase` + - Validate `behavior_impact` structure (numeric deltas) + - Validate `hidden_hook` structure if present + - Warn if `linux_concepts` is empty + - Check `access_requirements.minimum_access` values against known VM IDs +- Add unit tests: + - `BehaviorTracker.test.js` — apply deltas, persistence, initialize from state + - `NarrativePhaseTracker.test.js` — advance rules, phase ordering, initialize + - `EndingEvaluator.test.js` — all 4 endings, boundary conditions + - `HiddenHookTracker.test.js` — discover, isDiscovered, persistence +- Extend existing tests: + - `ValidationEngine.test.js` — confirm hidden objectives with `hidden: true` don't affect normal branch resolution + - `TicketService.test.js` — confirm `behavior_impact` is applied at completion, confirm no-op when field absent +- Manual test checklist (see Task 11 Codex prompt) + +**Files changed:** `tools/content/validate-content.js`, `server/src/services/BehaviorTracker.test.js` (new), `server/src/services/NarrativePhaseTracker.test.js` (new), `server/src/services/EndingEvaluator.test.js` (new), `server/src/services/HiddenHookTracker.test.js` (new) +**Risk:** Low — tests are additive + +--- + +## 5. Files Likely to Change + +| File | Why | What changes | Risk | +|---|---|---|---| +| `server/src/services/SaveState.js` | New save keys needed | Add `behavior`, `narrative_phase`, `suspicion`, `hidden_hooks_discovered` to `_defaultState()`; bump `schema_version` to 3 | Low — `_applyDefaults` merges safely | +| `server/src/services/QuestEngine.js` | Phase advancement hook | Call `narrativePhaseTracker.advance()` in `complete()`; import new service | Low | +| `server/src/services/TicketService.js` | Behavior application | Call `behaviorTracker.apply()` after branch selection in `markComplete()` | Low — branch.behavior_impact is optional | +| `server/src/services/ShiftReviewService.js` | Name correction | Change `'Priya Kapoor'` to `'Priya Nair'`; fix `p.kapoor` to `p.nair` in email From line | Low — one-liner | +| `server/src/services/EmailService.js` | Name correction | Change `CHARACTER_EMAILS.priya` to `'Priya Nair '` | Low — one-liner | +| `server/src/services/IncidentScheduler.js` | Phase pressure | Add `_processPhasePresure()` method triggered by phase change event | Medium | +| `server/src/services/ProgressionSystem.js` | Access level label | Add `getAccessLevel()` that derives `basic_user | sudo | root` from current `unlocked_access` set | Low | +| `server/src/routes/state.js` | Expose new state | Add `behavior`, `narrativePhase`, `accessLevel`, `suspicion` to GET /api/state response | Low | +| `server/src/index.js` | Wire new services | Import and `initialize()` new services in the correct order; add debug router | Low | +| `tools/content/validate-content.js` | Validate new schema fields | Add phase enum check, behavior_impact structure check, hidden_hook shape check | Low — additive | +| `content/world_flags/world_flags.json` | New flags needed | Add entries for any new flags emitted by hidden hooks and phase pressure profiles | Low | +| `content/tickets/T007.json` | Priya name | Update `from` field if it uses old email | Low | +| `content/docs/onboarding.json` | Priya name | Update any references to Priya Kapoor or Priya Singh | Low | +| All 8 quest JSONs | New fields | Add `narrative_phase`, `behavior_impact`, `hidden_hook`, `linux_concepts`, `failure_conditions`, `access_requirements` | Medium — large surface | + +--- + +## 6. Files Likely to Be Added + +| File | Purpose | Expected structure | +|---|---|---| +| `server/src/services/BehaviorTracker.js` | Track curiosity/obedience/risk/suspicion | Class with `initialize()`, `apply(impact)`, `getSnapshot()`, `_persist()` | +| `server/src/services/NarrativePhaseTracker.js` | Track and advance narrative phase | Class with `initialize()`, `advance(questId)`, `getPhase()`, `_persist()` | +| `server/src/services/HiddenHookTracker.js` | Record hidden hook discoveries | Class with `initialize()`, `discover(id)`, `isDiscovered(id)`, `getDiscovered()` | +| `server/src/services/EndingEvaluator.js` | Evaluate ending trajectory from world state | Class with `evaluate()`, `checkTrigger()`, pure computation over save state snapshot | +| `server/src/routes/debug.js` | Dev-only debug API | Express router, gated on `NODE_ENV !== 'production'` | +| `frontend/src/components/DebugPanel.svelte` | Dev-only debug overlay | Collapsible panel, shown on `?debug=1`, polling `/api/debug/state` | +| `content/progression/access_levels.json` | Named access level threshold definitions | Array of `{ level, trust_threshold, suspicion_ceiling, grants, revokes }` | +| `content/pressure_profiles/kowalski_phase_1.json` | Phase 1 boss pressure | `escalation_steps` with Kowalski emails at time thresholds | +| `content/pressure_profiles/kowalski_phase_2.json` | Phase 2 boss pressure | Dismissive Kowalski CC patterns | +| `content/pressure_profiles/kowalski_phase_3.json` | Phase 3 boss pressure | Suspicious Kowalski, Priya CC | +| `server/src/services/BehaviorTracker.test.js` | Unit tests for BehaviorTracker | Jest test file using existing `IncidentScheduler.test.js` as pattern | +| `server/src/services/NarrativePhaseTracker.test.js` | Unit tests for NarrativePhaseTracker | Jest test file | +| `server/src/services/EndingEvaluator.test.js` | Unit tests for EndingEvaluator | Jest test file, covers all 4 endings | +| `server/src/services/HiddenHookTracker.test.js` | Unit tests for HiddenHookTracker | Jest test file | + +--- + +## 7. Data Migration Plan + +### Existing quests (Q001–Q008) + +**Strategy: Wrap into new schema (backward-compatible extension)** + +- Do NOT replace existing quests. Do NOT create a "legacy" tier. +- Add new fields to each existing quest file. The fields are additive. +- `ContentLoader.js` already loads all quest files and passes them to `QuestEngine`. New fields are simply available at resolution time. +- Missing new fields in old quests: the runtime treats `narrative_phase: undefined` as `normal_work`; `behavior_impact: undefined` as no behavior change; `hidden_hook: null` as no hook. +- This means existing quests continue to work with zero runtime errors before Task 10 runs. + +### Save state migration + +- `schema_version` bumps from `2` to `3` +- `SaveState._applyDefaults()` already merges new keys safely: old saves that lack `behavior`, `narrative_phase`, `suspicion`, `hidden_hooks_discovered` will receive the default values (`50/50/50`, `'normal_work'`, `0`, `[]`) on next load +- No destructive migration. No migration script needed. +- Old saves loaded under the new schema will behave as if the player is in Phase 1 with neutral behavior — which is correct for a save that predates the new system. + +### Tickets, dialogue, incidents + +- No migration needed. Existing files continue to load and function. +- New dialogue files for phase pressure and boss escalation are additive. + +--- + +## 8. Testing Plan + +### Unit tests (new) + +| Test file | What it covers | +|---|---| +| `BehaviorTracker.test.js` | Delta application, clamping (0–100), initialize from state, persist, event emission | +| `NarrativePhaseTracker.test.js` | Phase ordering (spine), advance-only-forward rule, initialize from state, persist | +| `EndingEvaluator.test.js` | All 4 endings by state construction, boundary conditions, tie-break rules | +| `HiddenHookTracker.test.js` | Discover, isDiscovered, idempotent discover, initialize from state | + +### Integration tests (extend existing) + +| Test | Assertion | +|---|---| +| `TicketService.test.js` — behavior applied | After `markComplete`, save state `behavior.curiosity` changes by branch delta | +| `TicketService.test.js` — behavior absent | Quest with no `behavior_impact` completes without error | +| `ValidationEngine.test.js` — hidden objective | `hidden: true` objective validates passively without blocking branch resolution | +| `IncidentScheduler.test.js` — phase pressure | Phase change event triggers correct pressure profile activation | + +### Save/load compatibility checks + +- Load an existing (schema_version 2) save: all new keys initialized to defaults, no error +- Complete a new quest with new schema fields: save state includes correct behavior deltas +- Restart server with schema_version 3 save: all new keys correctly restored +- Test `SAVE_DIR` override with new schema + +### Manual test checklist + +1. Complete Q001 clean fix → confirm `player_ssh_configured` flag set, trust = 53 +2. Complete Q001 brittle fix → confirm trust penalty, `player_loose_permissions` flag set +3. After any quest completion → confirm `behavior` object in `/api/state` (via debug route) has changed +4. With `?debug=1` → confirm debug panel visible in frontend +5. Complete Q001–Q003 → confirm narrative phase advances from `normal_work` +6. Navigate terminal to a hidden anomaly (e.g., unknown user in `/etc/passwd`) → confirm `/api/debug/hidden-hooks` shows new entry +7. Force phase 3 via debug route → confirm Kowalski pressure profile activates +8. Force behavior state to `{ curiosity: 80, obedience: 20, risk: 30 }` + reach resolution phase → confirm EndingEvaluator returns `exposure` +9. Force behavior state to `{ curiosity: 20, obedience: 80, risk: 20 }` + reach resolution phase → confirm `corporate_loop` +10. Run `node tools/content/validate-content.js` — zero errors with all existing + updated quests +11. Run `npm test` — all existing tests pass; all new unit tests pass + +### Content validation checks + +- After Task 10: run `validate-content.js --verbose` on all 8 updated quests +- Confirm all new `narrative_phase` values are valid enum members +- Confirm all new `behavior_impact` fields have numeric deltas +- Confirm no undeclared world flags introduced +- Confirm all `hidden_hook` IDs are unique across quests + +--- + +## 9. Codex Delegation Prompts + +### Task 2 — Extend validate-content.js + +``` +File: tools/content/validate-content.js + +Extend the existing content validation tool. Do not change any existing checks. Add these new checks after the existing quest validation block: + +1. Define a constant at the top of the file: + const VALID_NARRATIVE_PHASES = new Set(["normal_work","unease","suspicion","investigation","conflict","resolution"]); + +2. In the quest validation loop (the `for (const [qid, { data: quest, fname }] of Object.entries(quests))` block), add after the existing checks: + + // narrative_phase + if (!quest.narrative_phase) { + warn(`${ctx}: missing 'narrative_phase' field`); + } else if (!VALID_NARRATIVE_PHASES.has(quest.narrative_phase)) { + err(`${ctx}: unknown narrative_phase '${quest.narrative_phase}'`); + } + + // behavior_impact + if (quest.behavior_impact !== undefined) { + for (const [branchKey, impact] of Object.entries(quest.behavior_impact)) { + for (const field of ['curiosity_delta','obedience_delta','risk_delta','suspicion_delta']) { + if (impact[field] !== undefined && typeof impact[field] !== 'number') { + err(`${ctx}: behavior_impact[${branchKey}].${field} must be a number`); + } + } + } + } + + // hidden_hook shape (if present and not null) + if (quest.hidden_hook !== undefined && quest.hidden_hook !== null) { + if (typeof quest.hidden_hook.id !== 'string') { + err(`${ctx}: hidden_hook.id must be a string`); + } + } + + // access_requirements + if (quest.access_requirements?.minimum_access) { + for (const [vmId] of Object.entries(quest.access_requirements.minimum_access)) { + if (!vmProfiles[vmId]) { + err(`${ctx}: access_requirements.minimum_access references unknown VM '${vmId}'`); + } + } + } + +Acceptance criteria: +- `node tools/content/validate-content.js` runs without JS errors +- Existing quest files produce only warnings for missing narrative_phase, not errors +- A test quest with narrative_phase: "invalid_phase" produces one error +- All other existing checks continue to pass +``` + +--- + +### Task 3 — BehaviorTracker service + +``` +Create file: server/src/services/BehaviorTracker.js + +Use ES module syntax (import/export) matching the existing service style (see SaveState.js and TrustSystem.js as patterns). + +The class must: +- Store { curiosity, obedience, risk, suspicion } — all numeric 0–100, starting at 50/50/50/0 +- initialize(state): load from state.behavior (use defaults if absent) +- apply(impact): accept an object with optional fields { curiosity_delta, obedience_delta, risk_delta, suspicion_delta }, add each to the corresponding score, clamp to [0,100], persist, emit 'behavior:changed' via eventBus +- getSnapshot(): return a plain { curiosity, obedience, risk, suspicion } object +- _persist(): call saveState.set({ behavior: this.getSnapshot() }) + +Export a singleton: export const behaviorTracker = new BehaviorTracker(); + +Then make these changes: + +1. In server/src/services/SaveState.js, in _defaultState(), add this key alongside the existing ones: + behavior: { curiosity: 50, obedience: 50, risk: 50, suspicion: 0 }, + and change schema_version from 2 to 3. + +2. In server/src/index.js, import behaviorTracker from './services/BehaviorTracker.js' and add behaviorTracker.initialize(state) in initializeServices() after trustSystem.initialize(state). + +3. In server/src/services/TicketService.js, in the markComplete() method, after the line `questEngine.complete(quest.id, { branchId: branch.id });`, add: + const behaviorImpact = branch.behavior_impact ?? quest.behavior_impact?.default ?? quest.behavior_impact ?? null; + if (behaviorImpact) { behaviorTracker.apply(behaviorImpact); } + (Add the import at the top of the file.) + +Acceptance criteria: +- npm test passes (existing tests unchanged) +- GET /api/debug/state (if debug route exists) shows behavior object +- After completing a quest whose branch has behavior_impact.curiosity_delta: 2, the save.json shows behavior.curiosity incremented by 2 +``` + +--- + +### Task 4 — NarrativePhaseTracker service + +``` +Create file: server/src/services/NarrativePhaseTracker.js + +Use ES module syntax matching existing service patterns. + +Phase ordering (spine): normal_work < unease < suspicion < investigation < conflict < resolution + +The class must: +- Store _phase as a string, initialized from state.narrative_phase or defaulting to 'normal_work' +- PHASE_ORDER constant: ['normal_work','unease','suspicion','investigation','conflict','resolution'] +- initialize(state): restore _phase from state.narrative_phase +- advance(questId): look up the quest from contentLoader, read its narrative_phase field; if the quest's phase rank is strictly higher than current phase rank, update _phase, persist, emit 'narrative:phase_changed' event with { from, to }; if narrative_phase field is absent or undefined, do nothing +- getPhase(): return current _phase string +- _persist(): saveState.set({ narrative_phase: this._phase }) + +Export singleton: export const narrativePhaseTracker = new NarrativePhaseTracker(); + +Then make these changes: + +1. In server/src/services/SaveState.js _defaultState(), add: + narrative_phase: 'normal_work', + +2. In server/src/services/QuestEngine.js complete() method, after this._persist(), add: + narrativePhaseTracker.advance(questId); + (Add the import at top of file.) + +3. In server/src/routes/state.js, add narrativePhase: narrativePhaseTracker.getPhase() to the GET / response object. + Import narrativePhaseTracker at top of the file. + +4. In server/src/index.js, import and initialize narrativePhaseTracker in initializeServices() after questEngine.initialize(state). + +Acceptance criteria: +- npm test passes +- After completing Q001, GET /api/state returns narrativePhase: 'normal_work' +- If a quest with narrative_phase: 'unease' is completed after Q001, GET /api/state returns narrativePhase: 'unease' +- Phase never goes backward: completing a 'normal_work' quest after an 'unease' quest does not revert the phase +``` + +--- + +### Task 5 — HiddenHookTracker service + +``` +Create file: server/src/services/HiddenHookTracker.js + +ES module syntax, matching existing service patterns. + +The class must: +- Store _discovered as a Set of hook ID strings +- initialize(state): load from state.hidden_hooks_discovered (array), build Set +- discover(hookId): if not already discovered, add to Set, persist, emit 'hidden_hook:discovered' with { hookId }; idempotent if already discovered +- isDiscovered(hookId): boolean +- getDiscovered(): return [...this._discovered] sorted +- _persist(): saveState.set({ hidden_hooks_discovered: [...this._discovered] }) + +Export singleton: export const hiddenHookTracker = new HiddenHookTracker(); + +Then: + +1. In server/src/services/SaveState.js _defaultState(), add: + hidden_hooks_discovered: [], + +2. In server/src/index.js, import and call hiddenHookTracker.initialize(state) in initializeServices(). + +3. In server/src/routes/state.js, add hiddenHooksDiscovered: hiddenHookTracker.getDiscovered() to the response. + +Acceptance criteria: +- npm test passes +- POST /api/debug/discover-hook/test-hook (if debug route exists) adds 'test-hook' to state +- GET /api/state returns hiddenHooksDiscovered: ['test-hook'] +- Calling discover() twice with the same ID results in exactly one entry in the array +``` + +--- + +### Task 6 — Access level extension + +``` +Make these targeted changes to existing files: + +1. In server/src/services/ProgressionSystem.js, add this method to the ProgressionSystem class: + getAccessLevel() { + if (this._access.has('sudo:workstation:full') || this._access.has('sudo:web_server:full') || this._access.has('sudo:build_machine:full')) { + return 'root'; + } + if (this._access.has('sudo:workstation:systemctl') || this._access.has('ssh:web_server') || this._access.has('ssh:build_machine')) { + return 'sudo'; + } + return 'basic_user'; + } + +2. In server/src/routes/state.js, add to the GET / response: + accessLevel: progressionSystem.getAccessLevel(), + Import progressionSystem if not already imported. + +3. Create file: content/progression/access_levels.json with this content: + { + "_description": "Named access level definitions. Derived from ProgressionSystem unlocked_access keys.", + "levels": [ + { "name": "basic_user", "description": "Default access. Workstation only. No sudo." }, + { "name": "sudo", "description": "Sudo on workstation; SSH to hermes or vulcan." }, + { "name": "root", "description": "Full sudo on at least one remote host." } + ] + } + +Acceptance criteria: +- npm test passes +- GET /api/state returns accessLevel: 'basic_user' for a fresh save +- After trust reaches 55, accessLevel returns 'sudo' +- After trust reaches 60 and sudo:web_server:full is granted, accessLevel returns 'root' +``` + +--- + +### Task 7 — Phase pressure content files + +``` +Create three new pressure profile files in content/pressure_profiles/: + +File: content/pressure_profiles/kowalski_phase_1.json +Content: +{ + "id": "kowalski_phase_1", + "label": "Dave Kowalski — Phase 1: Routine Pressure", + "description": "Normal managerial check-ins. Annoying but not threatening.", + "trigger_phase": "normal_work", + "escalation_steps": [ + { + "trigger_after_seconds": 300, + "notification": "Quick check-in — how are you getting on with the ticket queue? Let me know if anything is blocking you. Dave K.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "Status check" + }, + { + "trigger_after_seconds": 600, + "notification": "Following up on my earlier note. We should really document that workflow once you get a moment.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "Re: Status check" + } + ] +} + +File: content/pressure_profiles/kowalski_phase_2.json +Content: +{ + "id": "kowalski_phase_2", + "label": "Dave Kowalski — Phase 2: Dismissive", + "description": "Kowalski is aware something is recurring. Manages upward, not inward.", + "trigger_phase": "unease", + "escalation_steps": [ + { + "trigger_after_seconds": 180, + "notification": "I've had a couple of questions from Sarah's team about stability. Nothing critical, but let's make sure we're on top of it. Noted for the weekly update. D.", + "notification_severity": "info", + "sender": "Dave Kowalski ", + "subject": "FYI — product team questions" + } + ] +} + +File: content/pressure_profiles/kowalski_phase_3.json +Content: +{ + "id": "kowalski_phase_3", + "label": "Dave Kowalski — Phase 3: Suspicious", + "description": "Kowalski is getting questions from above. Starts involving Priya.", + "trigger_phase": "suspicion", + "escalation_steps": [ + { + "trigger_after_seconds": 120, + "notification": "I've scheduled a brief sync for Thursday to talk through recent changes on the infrastructure side. Priya will join. Nothing to worry about — just a routine review.", + "notification_severity": "warning", + "sender": "Dave Kowalski ", + "subject": "Thursday sync — infra review" + } + ] +} + +Acceptance criteria: +- node tools/content/validate-content.js passes with no new errors +- All three files have unique 'id' fields that pass content loader's ID detection +``` + +--- + +### Task 8 — EndingEvaluator service + +``` +Create file: server/src/services/EndingEvaluator.js + +ES module syntax. + +ENDING_CRITERIA constant (all conditions must be met for that ending to be active): +- exposure: curiosity >= 65, hidden_hooks_discovered.length >= 2, narrative_phase rank >= 'investigation' +- corporate_loop: obedience >= 65, curiosity <= 40, trust >= 65 +- burnout: curiosity <= 35, obedience <= 40 (passive disengagement) +- chaos: risk >= 65, trust <= 40 + +The class must: +- evaluate(): read current saveState, compute which endings' criteria are met, return { active: 'exposure'|'corporate_loop'|'burnout'|'chaos'|'undetermined', candidates: [...] } — if multiple match, prefer in this order: exposure > chaos > corporate_loop > burnout +- checkTrigger(): call evaluate(); if narrative_phase is 'resolution' and active is not 'undetermined', emit 'ending:triggered' with { ending: active }; return the result + +PHASE_RANK constant: { normal_work:0, unease:1, suspicion:2, investigation:3, conflict:4, resolution:5 } + +Import saveState, narrativePhaseTracker, hiddenHookTracker, behaviorTracker. + +Export singleton: export const endingEvaluator = new EndingEvaluator(); + +Wire into index.js: import endingEvaluator; add endingEvaluator (no initialize needed, it reads state on demand). + +Listen for 'quest:completed' on eventBus: call endingEvaluator.checkTrigger() each time. + +Acceptance criteria: +- npm test passes +- evaluate() with curiosity=70, hiddenHooksDiscovered=['h1','h2'], phase='investigation' returns active: 'exposure' +- evaluate() with obedience=70, curiosity=35, trust=70 returns active: 'corporate_loop' +- evaluate() with no conditions met returns active: 'undetermined' +``` + +--- + +### Task 9 — Debug routes and frontend panel + +``` +Create file: server/src/routes/debug.js + +ES module syntax. Only register routes if process.env.NODE_ENV !== 'production'. + +Routes: + GET /api/debug/state — return full saveState.get() + GET /api/debug/behavior — return behaviorTracker.getSnapshot() + GET /api/debug/phase — return { phase: narrativePhaseTracker.getPhase() } + GET /api/debug/ending — return endingEvaluator.evaluate() + GET /api/debug/hidden-hooks — return { discovered: hiddenHookTracker.getDiscovered(), total: N } + POST /api/debug/set-behavior — body: { curiosity, obedience, risk, suspicion }; call behaviorTracker._override(body) (add _override method that directly sets values without deltas) + POST /api/debug/set-phase — body: { phase }; if valid phase, directly set _phase on narrativePhaseTracker and persist (add _forcePhase method) + POST /api/debug/discover-hook/:id — call hiddenHookTracker.discover(req.params.id); return getDiscovered() + +In server/src/index.js, add: + import debugRouter from './routes/debug.js'; + // After the other app.use() calls: + if (process.env.NODE_ENV !== 'production') { + app.use('/api/debug', debugRouter); + } + +Create file: frontend/src/components/DebugPanel.svelte +- Shows only when window.location.search includes 'debug=1' +- Polls GET /api/debug/behavior, GET /api/debug/phase, GET /api/debug/ending every 5 seconds +- Displays: behavior scores (curiosity/obedience/risk/suspicion), current phase, ending trajectory +- Minimal styling: position fixed, bottom right, semi-transparent, small font + +In frontend/src/App.svelte, import DebugPanel and conditionally render it: + {#if showDebug} + + {/if} +Add: const showDebug = new URLSearchParams(window.location.search).has('debug'); + +Acceptance criteria: +- npm test passes +- In development: GET /api/debug/behavior returns behavior snapshot +- Visiting /?debug=1 shows the debug panel in the browser +- In production (NODE_ENV=production): GET /api/debug/behavior returns 404 +``` + +--- + +### Task 10 — Fix Priya's name and update Q001–Q008 + +``` +Part A — Fix Priya's name. Make these exact changes: + +1. In server/src/services/EmailService.js, find this line: + priya: 'Priya Kapoor ', + Change it to: + priya: 'Priya Nair ', + +2. In server/src/services/ShiftReviewService.js: + a. Find: reviewer: 'Priya Kapoor' + Change to: reviewer: 'Priya Nair' + b. Find: from: 'Priya Kapoor ' + Change to: from: 'Priya Nair ' + +3. In content/tickets/T007.json: if the 'from' or 'body' field contains 'Priya Kapoor', 'p.kapoor', or 'Priya Singh', replace with 'Priya Nair' and 'p.nair@axiomworks.internal'. + +4. In content/docs/onboarding.json: if 'Priya Kapoor' or 'Priya Singh' appears, replace with 'Priya Nair'. + +Part B — Add new fields to existing quests. For each quest Q001–Q008, add these fields using the values in the table below. Do not change any existing fields. Do not reformat the JSON beyond what is needed to add the new fields. + +Q001: narrative_phase: "normal_work", linux_concepts: ["ssh-keygen","authorized_keys","file permissions"], failure_conditions: ["SSH keys not added","authorized_keys permissions too broad"], behavior_impact: { "correct-key": { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 }, "loose-permissions": { curiosity_delta: 0, obedience_delta: 0, risk_delta: 1, suspicion_delta: 1 }, default: { curiosity_delta: 0, obedience_delta: 0, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: null, access_requirements: { minimum_access: { workstation: "basic_user" }, requires_root: false, temporary_grants_allowed: [] } + +Q002: narrative_phase: "normal_work", linux_concepts: ["nginx","systemctl","sshd_config"], failure_conditions: ["nginx not running","service not enabled at boot"], behavior_impact: { default: { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: null, access_requirements: { minimum_access: { web_server: "basic_user" }, requires_root: false, temporary_grants_allowed: [] } + +Q003: narrative_phase: "normal_work", linux_concepts: ["logrotate","disk usage","df","du"], failure_conditions: ["disk still above threshold","logrotate not restored"], behavior_impact: { default: { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: null, access_requirements: { minimum_access: { web_server: "sudo" }, requires_root: false, temporary_grants_allowed: [] } + +Q004: narrative_phase: "normal_work", linux_concepts: ["chown","file ownership","deploy scripts"], failure_conditions: ["web root ownership not fixed","deploy service still failing"], behavior_impact: { default: { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: null, access_requirements: { minimum_access: { web_server: "sudo" }, requires_root: false, temporary_grants_allowed: [] } + +Q005: narrative_phase: "unease", linux_concepts: ["cron","crontab","user field","backup management"], failure_conditions: ["cron still running as root","disk not cleared","backup directory ownership not fixed"], behavior_impact: { "full-fix": { curiosity_delta: 1, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 }, "cron-fixed-only": { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 }, "disk-cleared-only": { curiosity_delta: 0, obedience_delta: 0, risk_delta: 1, suspicion_delta: 1 }, default: { curiosity_delta: 0, obedience_delta: 0, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: { "id": "q005_backup_agent_history", "description": "backup-agent home directory contains a .bash_history with unusual commands that predate the current cron misconfiguration.", "discovery_method": "Player reads /home/backup-agent/.bash_history", "significance": "Dale configured this cron job. The history shows it was changed deliberately, not by accident." }, access_requirements: { minimum_access: { web_server: "sudo" }, requires_root: false, temporary_grants_allowed: [] } + +Q006: narrative_phase: "unease", linux_concepts: ["NTP","systemd-timesyncd","Arch Linux","pacman","package keys"], failure_conditions: ["NTP not enabled at boot","package manager still broken"], behavior_impact: { default: { curiosity_delta: 0, obedience_delta: 1, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: null, access_requirements: { minimum_access: { build_machine: "sudo" }, requires_root: false, temporary_grants_allowed: [] } + +Q007: narrative_phase: "suspicion", linux_concepts: ["sshd_config","AllowGroups","AllowUsers","access hardening"], failure_conditions: ["Priya still locked out","SSH restrictions removed entirely"], behavior_impact: { default: { curiosity_delta: 1, obedience_delta: 0, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: { "id": "q007_dale_ssh_key", "description": "An SSH key in hermes /root/.ssh/authorized_keys does not match any current staff. The fingerprint matches no documented key.", "discovery_method": "Player reads /root/.ssh/authorized_keys on hermes", "significance": "Dale had root SSH access to hermes that was never formally revoked." }, access_requirements: { minimum_access: { web_server: "sudo" }, requires_root: false, temporary_grants_allowed: ["sudo:web_server:sshd"] } + +Q008: narrative_phase: "suspicion", linux_concepts: ["apt","package pinning","apt-preferences","internal package mirror","vulcan build pipeline"], failure_conditions: ["axiomworks-app still broken","bad package not traced to build machine"], behavior_impact: { default: { curiosity_delta: 1, obedience_delta: 0, risk_delta: 0, suspicion_delta: 0 } }, hidden_hook: { "id": "q008_build_log_anomaly", "description": "vulcan's build log for 2.1.1 shows it was triggered by a manual invocation, not the automated pipeline, at 02:14.", "discovery_method": "Player reads /var/log/build-pipeline.log on vulcan and notices the timestamp and manual trigger field", "significance": "The bad build was triggered manually. Someone made the broken build, and it was not the pipeline." }, access_requirements: { minimum_access: { build_machine: "sudo", web_server: "sudo" }, requires_root: false, temporary_grants_allowed: [] } + +After all changes, run: node tools/content/validate-content.js +Confirm: zero errors. Warnings about missing narrative_phase should now be gone for all 8 quests. +``` + +--- + +### Task 11 — Unit tests and validation extension + +``` +Part A — Write unit tests for all new services. + +Create file: server/src/services/BehaviorTracker.test.js +Use the existing IncidentScheduler.test.js or ShiftReviewService.test.js as the pattern for test structure. + +Tests to include: +1. initialize() with no state.behavior: curiosity=50, obedience=50, risk=50, suspicion=0 +2. initialize() with existing state.behavior: values restored correctly +3. apply({ curiosity_delta: 5 }): curiosity increases by 5 +4. apply({ risk_delta: -10 }): risk decreases by 10, floor at 0 +5. apply({ suspicion_delta: 200 }): suspicion clamps at 100 +6. apply({}): no change, no error +7. apply(null): no change, no error (defensive) +8. getSnapshot(): returns plain object with all four keys + +Create file: server/src/services/NarrativePhaseTracker.test.js +Tests: +1. initialize() with no state.narrative_phase: returns 'normal_work' +2. advance() with quest having narrative_phase 'unease': phase becomes 'unease' +3. advance() with quest having higher phase than current: phase advances +4. advance() with quest having lower phase than current: phase does NOT change +5. advance() with quest missing narrative_phase field: phase does NOT change +6. getPhase(): returns current phase string + +Create file: server/src/services/EndingEvaluator.test.js +Tests (each builds a mock state): +1. exposure: curiosity=70, hiddenHooksDiscovered=['a','b'], phase='investigation' → active: 'exposure' +2. corporate_loop: obedience=70, curiosity=35, trust=70 → active: 'corporate_loop' +3. burnout: curiosity=30, obedience=35 → active: 'burnout' +4. chaos: risk=70, trust=35 → active: 'chaos' +5. no conditions: active: 'undetermined' +6. exposure wins over chaos when both met: active: 'exposure' + +Create file: server/src/services/HiddenHookTracker.test.js +Tests: +1. initialize() with no state: getDiscovered() returns [] +2. discover('h1'): getDiscovered() returns ['h1'] +3. discover('h1') twice: getDiscovered() returns ['h1'] (idempotent) +4. isDiscovered('h1'): true after discovery +5. isDiscovered('h2'): false before discovery + +Part B — Run validation. +After all changes: run `npm test` from the server directory. All tests must pass. +Run `node tools/content/validate-content.js`. Zero errors. + +Part C — Manual verification checklist. +Confirm each item by inspection or running the game: +[ ] Fresh save: GET /api/state returns behavior: {curiosity:50,obedience:50,risk:50,suspicion:0}, narrativePhase:'normal_work', accessLevel:'basic_user' +[ ] Complete Q001 clean branch: behavior.obedience increments, phase stays normal_work +[ ] Complete Q005: phase advances to 'unease', hidden_hook for q005_backup_agent_history visible in /api/debug/hidden-hooks +[ ] Complete Q007: phase advances to 'suspicion', q007_dale_ssh_key hook discoverable on hermes +[ ] ShiftReviewService sends from Priya Nair +[ ] GET /api/debug/ending with forced state returns correct ending label +[ ] /?debug=1 shows debug panel in browser +[ ] node tools/content/validate-content.js: zero errors +``` + +--- + +*End of implementation plan.* diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..a8270b5 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,12 @@ + + + + + + Sysadmin Chronicles + + +
+ + + diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000..b969020 --- /dev/null +++ b/frontend/package-lock.json @@ -0,0 +1,1176 @@ +{ + "name": "sysadmin-chronicles-frontend", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "sysadmin-chronicles-frontend", + "version": "0.1.0", + "devDependencies": { + "@sveltejs/vite-plugin-svelte": "^7.0.0", + "svelte": "^5.55.5", + "vite": "^8.0.10" + } + }, + "node_modules/@emnapi/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", + "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.2.1", + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/runtime": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", + "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/wasi-threads": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", + "integrity": "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", + "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@napi-rs/wasm-runtime": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.4.tgz", + "integrity": "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@tybys/wasm-util": "^0.10.1" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "peerDependencies": { + "@emnapi/core": "^1.7.1", + "@emnapi/runtime": "^1.7.1" + } + }, + "node_modules/@oxc-project/types": { + "version": "0.127.0", + "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.127.0.tgz", + "integrity": "sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/Boshen" + } + }, + "node_modules/@rolldown/binding-android-arm64": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.17.tgz", + "integrity": "sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-darwin-arm64": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.17.tgz", + "integrity": "sha512-4ksWc9n0mhlZpZ9PMZgTGjeOPRu8MB1Z3Tz0Mo02eWfWCHMW1zN82Qz/pL/rC+yQa+8ZnutMF0JjJe7PjwasYw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-darwin-x64": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.17.tgz", + "integrity": "sha512-SUSDOI6WwUVNcWxd02QEBjLdY1VPHvlEkw6T/8nYG322iYWCTxRb1vzk4E+mWWYehTp7ERibq54LSJGjmouOsw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-freebsd-x64": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.17.tgz", + "integrity": "sha512-hwnz3nw9dbJ05EDO/PvcjaaewqqDy7Y1rn1UO81l8iIK1GjenME75dl16ajbvSSMfv66WXSRCYKIqfgq2KCfxw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm-gnueabihf": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.17.tgz", + "integrity": "sha512-IS+W7epTcwANmFSQFrS1SivEXHtl1JtuQA9wlxrZTcNi6mx+FDOYrakGevvvTwgj2JvWiK8B29/qD9BELZPyXQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm64-gnu": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.17.tgz", + "integrity": "sha512-e6usGaHKW5BMNZOymS1UcEYGowQMWcgZ71Z17Sl/h2+ZziNJ1a9n3Zvcz6LdRyIW5572wBCTH/Z+bKuZouGk9Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm64-musl": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.17.tgz", + "integrity": "sha512-b/CgbwAJpmrRLp02RPfhbudf5tZnN9nsPWK82znefso832etkem8H7FSZwxrOI9djcdTP7U6YfNhbRnh7djErg==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-ppc64-gnu": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.17.tgz", + "integrity": "sha512-4EII1iNGRUN5WwGbF/kOh/EIkoDN9HsupgLQoXfY+D1oyJm7/F4t5PYU5n8SWZgG0FEwakyM8pGgwcBYruGTlA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-s390x-gnu": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.17.tgz", + "integrity": "sha512-AH8oq3XqQo4IibpVXvPeLDI5pzkpYn0WiZAfT05kFzoJ6tQNzwRdDYQ45M8I/gslbodRZwW8uxLhbSBbkv96rA==", + "cpu": [ + "s390x" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-x64-gnu": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.17.tgz", + "integrity": "sha512-cLnjV3xfo7KslbU41Z7z8BH/E1y5mzUYzAqih1d1MDaIGZRCMqTijqLv76/P7fyHuvUcfGsIpqCdddbxLLK9rA==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-x64-musl": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.17.tgz", + "integrity": "sha512-0phclDw1spsL7dUB37sIARuis2tAgomCJXAHZlpt8PXZ4Ba0dRP1e+66lsRqrfhISeN9bEGNjQs+T/Fbd7oYGw==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-openharmony-arm64": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.17.tgz", + "integrity": "sha512-0ag/hEgXOwgw4t8QyQvUCxvEg+V0KBcA6YuOx9g0r02MprutRF5dyljgm3EmR02O292UX7UeS6HzWHAl6KgyhA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-wasm32-wasi": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.17.tgz", + "integrity": "sha512-LEXei6vo0E5wTGwpkJ4KoT3OZJRnglwldt5ziLzOlc6qqb55z4tWNq2A+PFqCJuvWWdP53CVhG1Z9NtToDPJrA==", + "cpu": [ + "wasm32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "1.10.0", + "@emnapi/runtime": "1.10.0", + "@napi-rs/wasm-runtime": "^1.1.4" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-win32-arm64-msvc": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.17.tgz", + "integrity": "sha512-gUmyzBl3SPMa6hrqFUth9sVfcLBlYsbMzBx5PlexMroZStgzGqlZ26pYG89rBb45Mnia+oil6YAIFeEWGWhoZA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-win32-x64-msvc": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.17.tgz", + "integrity": "sha512-3hkiolcUAvPB9FLb3UZdfjVVNWherN1f/skkGWJP/fgSQhYUZpSIRr0/I8ZK9TkF3F7kxvJAk0+IcKvPHk9qQg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.17.tgz", + "integrity": "sha512-n8iosDOt6Ig1UhJ2AYqoIhHWh/isz0xpicHTzpKBeotdVsTEcxsSA/i3EVM7gQAj0rU27OLAxCjzlj15IWY7bg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@sveltejs/acorn-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/@sveltejs/acorn-typescript/-/acorn-typescript-1.0.9.tgz", + "integrity": "sha512-lVJX6qEgs/4DOcRTpo56tmKzVPtoWAaVbL4hfO7t7NVwl9AAXzQR6cihesW1BmNMPl+bK6dreu2sOKBP2Q9CIA==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "acorn": "^8.9.0" + } + }, + "node_modules/@sveltejs/vite-plugin-svelte": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-7.0.0.tgz", + "integrity": "sha512-ILXmxC7HAsnkK2eslgPetrqqW1BKSL7LktsFgqzNj83MaivMGZzluWq32m25j2mDOjmSKX7GGWahePhuEs7P/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "deepmerge": "^4.3.1", + "magic-string": "^0.30.21", + "obug": "^2.1.0", + "vitefu": "^1.1.2" + }, + "engines": { + "node": "^20.19 || ^22.12 || >=24" + }, + "peerDependencies": { + "svelte": "^5.46.4", + "vite": "^8.0.0-beta.7 || ^8.0.0" + } + }, + "node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", + "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "dev": true, + "license": "MIT" + }, + "node_modules/acorn": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/aria-query": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.1.tgz", + "integrity": "sha512-Z/ZeOgVl7bcSYZ/u/rh0fOpvEpq//LZmdbkXyc7syVzjPAhfOa9ebsdTSjEBDU4vs5nC98Kfduj1uFo0qyET3g==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/axobject-query": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz", + "integrity": "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/clsx": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/devalue": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/devalue/-/devalue-5.7.1.tgz", + "integrity": "sha512-MUbZ586EgQqdRnC4yDrlod3BEdyvE4TapGYHMW2CiaW+KkkFmWEFqBUaLltEZCGi0iFXCEjRF0OjF0DV2QHjOA==", + "dev": true, + "license": "MIT" + }, + "node_modules/esm-env": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/esm-env/-/esm-env-1.2.2.tgz", + "integrity": "sha512-Epxrv+Nr/CaL4ZcFGPJIYLWFom+YeV1DqMLHJoEd9SYRxNbaFruBwfEX/kkHUJf55j2+TUbmDcmuilbP1TmXHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/esrap": { + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/esrap/-/esrap-2.2.5.tgz", + "integrity": "sha512-/yLB1538mag+dn0wsePTe8C0rDIjUOaJpMs2McodSzmM2msWcZsBSdRtg6HOBt0A/r82BN+Md3pgwSc/uWt2Ig==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.4.15" + }, + "peerDependencies": { + "@typescript-eslint/types": "^8.2.0" + }, + "peerDependenciesMeta": { + "@typescript-eslint/types": { + "optional": true + } + } + }, + "node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/is-reference": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-3.0.3.tgz", + "integrity": "sha512-ixkJoqQvAP88E6wLydLGGqCJsrFUnqoH6HnaczB8XmDH1oaWU+xxdptvikTgaEhtZ53Ky6YXiBuUI2WXLMCwjw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.6" + } + }, + "node_modules/lightningcss": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", + "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==", + "dev": true, + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-android-arm64": "1.32.0", + "lightningcss-darwin-arm64": "1.32.0", + "lightningcss-darwin-x64": "1.32.0", + "lightningcss-freebsd-x64": "1.32.0", + "lightningcss-linux-arm-gnueabihf": "1.32.0", + "lightningcss-linux-arm64-gnu": "1.32.0", + "lightningcss-linux-arm64-musl": "1.32.0", + "lightningcss-linux-x64-gnu": "1.32.0", + "lightningcss-linux-x64-musl": "1.32.0", + "lightningcss-win32-arm64-msvc": "1.32.0", + "lightningcss-win32-x64-msvc": "1.32.0" + } + }, + "node_modules/lightningcss-android-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz", + "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz", + "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz", + "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz", + "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz", + "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz", + "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz", + "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz", + "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz", + "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz", + "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz", + "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/locate-character": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/locate-character/-/locate-character-3.0.0.tgz", + "integrity": "sha512-SW13ws7BjaeJ6p7Q6CO2nchbYEc3X3J6WrmTTDto7yMPqVSZTUyY5Tjbid+Ab8gLnATtygYtiDIJGQRRn2ZOiA==", + "dev": true, + "license": "MIT" + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/obug": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz", + "integrity": "sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==", + "dev": true, + "funding": [ + "https://github.com/sponsors/sxzz", + "https://opencollective.com/debug" + ], + "license": "MIT" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.5.10", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz", + "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/rolldown": { + "version": "1.0.0-rc.17", + "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.17.tgz", + "integrity": "sha512-ZrT53oAKrtA4+YtBWPQbtPOxIbVDbxT0orcYERKd63VJTF13zPcgXTvD4843L8pcsI7M6MErt8QtON6lrB9tyA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@oxc-project/types": "=0.127.0", + "@rolldown/pluginutils": "1.0.0-rc.17" + }, + "bin": { + "rolldown": "bin/cli.mjs" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "optionalDependencies": { + "@rolldown/binding-android-arm64": "1.0.0-rc.17", + "@rolldown/binding-darwin-arm64": "1.0.0-rc.17", + "@rolldown/binding-darwin-x64": "1.0.0-rc.17", + "@rolldown/binding-freebsd-x64": "1.0.0-rc.17", + "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.17", + "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.17", + "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.17", + "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.17", + "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.17", + "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.17", + "@rolldown/binding-linux-x64-musl": "1.0.0-rc.17", + "@rolldown/binding-openharmony-arm64": "1.0.0-rc.17", + "@rolldown/binding-wasm32-wasi": "1.0.0-rc.17", + "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.17", + "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.17" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/svelte": { + "version": "5.55.5", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.55.5.tgz", + "integrity": "sha512-2uCs/LZ9us+AktdzYJM8OcxQ8qnPS1kpaO7syGT/MgO+6Qr1Ybl+TqPq+97u7PHqmmMlye5ZkoyXONy5mjjAbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/remapping": "^2.3.4", + "@jridgewell/sourcemap-codec": "^1.5.0", + "@sveltejs/acorn-typescript": "^1.0.5", + "@types/estree": "^1.0.5", + "@types/trusted-types": "^2.0.7", + "acorn": "^8.12.1", + "aria-query": "5.3.1", + "axobject-query": "^4.1.0", + "clsx": "^2.1.1", + "devalue": "^5.6.4", + "esm-env": "^1.2.1", + "esrap": "^2.2.4", + "is-reference": "^3.0.3", + "locate-character": "^3.0.0", + "magic-string": "^0.30.11", + "zimmerframe": "^1.1.2" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/tinyglobby": { + "version": "0.2.16", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz", + "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.4" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD", + "optional": true + }, + "node_modules/vite": { + "version": "8.0.10", + "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.10.tgz", + "integrity": "sha512-rZuUu9j6J5uotLDs+cAA4O5H4K1SfPliUlQwqa6YEwSrWDZzP4rhm00oJR5snMewjxF5V/K3D4kctsUTsIU9Mw==", + "dev": true, + "license": "MIT", + "dependencies": { + "lightningcss": "^1.32.0", + "picomatch": "^4.0.4", + "postcss": "^8.5.10", + "rolldown": "1.0.0-rc.17", + "tinyglobby": "^0.2.16" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "@vitejs/devtools": "^0.1.0", + "esbuild": "^0.27.0 || ^0.28.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "@vitejs/devtools": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vitefu": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/vitefu/-/vitefu-1.1.3.tgz", + "integrity": "sha512-ub4okH7Z5KLjb6hDyjqrGXqWtWvoYdU3IGm/NorpgHncKoLTCfRIbvlhBm7r0YstIaQRYlp4yEbFqDcKSzXSSg==", + "dev": true, + "license": "MIT", + "workspaces": [ + "tests/deps/*", + "tests/projects/*", + "tests/projects/workspace/packages/*" + ], + "peerDependencies": { + "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" + }, + "peerDependenciesMeta": { + "vite": { + "optional": true + } + } + }, + "node_modules/zimmerframe": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.4.tgz", + "integrity": "sha512-B58NGBEoc8Y9MWWCQGl/gq9xBCe4IiKM0a2x7GZdQKOW5Exr8S1W24J6OgM1njK8xCRGvAJIL/MxXHf6SkmQKQ==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..d31cbde --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,16 @@ +{ + "name": "sysadmin-chronicles-frontend", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "devDependencies": { + "@sveltejs/vite-plugin-svelte": "^7.0.0", + "svelte": "^5.55.5", + "vite": "^8.0.10" + } +} diff --git a/frontend/src/App.svelte b/frontend/src/App.svelte new file mode 100644 index 0000000..ad1cb13 --- /dev/null +++ b/frontend/src/App.svelte @@ -0,0 +1,367 @@ + + + + Sysadmin Chronicles HUD + + +{#if loading} +
+

Bringing the desk online…

+
+{:else} +
+ + + {#if error} +
{error}
+ {/if} + + {#if alerts.length > 0} +
+ {#each alerts as alert} +
+ {alert.subject} +

{alert.message}

+
+ {/each} +
+ {/if} + +
+ activeTab = event.detail} + /> + +
+ {#if activeTab === 'tickets'} + selectTicket(event.detail)} + on:complete={completeTicket} + /> + {:else if activeTab === 'mail'} + selectMail(event.detail)} + on:reply={replyToMail} + on:attachment={openAttachment} + /> + {:else if activeTab === 'docs'} + selectDoc(event.detail)} + /> + {:else if activeTab === 'vms'} + + {:else if activeTab === 'profile'} + savePortrait(event.detail)} + /> + {/if} +
+
+ {#if import.meta.env.DEV && showDebug}{/if} +
+{/if} + + diff --git a/frontend/src/app.css b/frontend/src/app.css new file mode 100644 index 0000000..7400cea --- /dev/null +++ b/frontend/src/app.css @@ -0,0 +1,35 @@ +:root { + font-family: "IBM Plex Sans", "Segoe UI", sans-serif; + color: #f2efe9; + background: + radial-gradient(circle at top left, rgba(214, 104, 56, 0.18), transparent 28%), + linear-gradient(180deg, #121315 0%, #171a1d 100%); + color-scheme: dark; + --panel: rgba(18, 20, 24, 0.82); + --panel-strong: rgba(24, 27, 31, 0.94); + --border: rgba(255, 255, 255, 0.1); + --text-muted: #bdb6ab; + --accent: #d66838; + --accent-soft: rgba(214, 104, 56, 0.18); + --good: #63b67a; + --warn: #d5a64b; + --bad: #c86565; + --shadow: 0 24px 80px rgba(0, 0, 0, 0.35); +} + +html, body, #app { + margin: 0; + min-height: 100%; +} + +body { + min-height: 100vh; +} + +button, input, textarea { + font: inherit; +} + +button { + cursor: pointer; +} diff --git a/frontend/src/assets/logo.png b/frontend/src/assets/logo.png new file mode 100644 index 0000000..ead0cc3 Binary files /dev/null and b/frontend/src/assets/logo.png differ diff --git a/frontend/src/components/DebugPanel.svelte b/frontend/src/components/DebugPanel.svelte new file mode 100644 index 0000000..611b963 --- /dev/null +++ b/frontend/src/components/DebugPanel.svelte @@ -0,0 +1,134 @@ + + + + + diff --git a/frontend/src/components/DocsPanel.svelte b/frontend/src/components/DocsPanel.svelte new file mode 100644 index 0000000..42c53b2 --- /dev/null +++ b/frontend/src/components/DocsPanel.svelte @@ -0,0 +1,100 @@ + + +
+ + +
+ {#if selectedDoc} +

{selectedDoc.title}

+
{selectedDoc.content}
+ {:else} +

Select an unlocked document to read it.

+ {/if} +
+
+ + diff --git a/frontend/src/components/HeaderBar.svelte b/frontend/src/components/HeaderBar.svelte new file mode 100644 index 0000000..28b89af --- /dev/null +++ b/frontend/src/components/HeaderBar.svelte @@ -0,0 +1,182 @@ + + +
+
+ +
+

Axiom Works Internal

+

Sysadmin Chronicles

+
+
+ +
+
+ Ops Standing + {trustLabel(state?.trust ?? 50)} +
+ +
+
+ +
+ Shift #{state?.shiftNumber ?? 1} + {formatRemaining(state?.shift?.remainingSeconds ?? 0)} + {connection} +
+ +
+ Certifications + {state?.certifications?.length ?? 0} + {state?.shiftHistory?.length ?? 0} reviews archived +
+ +
+ Your portrait { e.currentTarget.src = '/public/portraits/player-silhouette.png'; }} + > +
+
+
+ + diff --git a/frontend/src/components/MailPanel.svelte b/frontend/src/components/MailPanel.svelte new file mode 100644 index 0000000..09efaea --- /dev/null +++ b/frontend/src/components/MailPanel.svelte @@ -0,0 +1,192 @@ + + +
+ + +
+ {#if selectedMail} +
+
+

{selectedMail.subject}

+

{selectedMail.from}

+
+ + {selectedMail.read ? 'Read' : 'Unread'} + +
+ +
{selectedMail.body}
+ + {#if selectedMail.attachments?.length} +
+

Attachments

+
+ {#each selectedMail.attachments as attachment} + + {/each} +
+
+ {/if} + + {#if selectedMail.reply_options?.length && !selectedMail.replied} +
+

Reply

+
+ {#each selectedMail.reply_options as option, index} + + {/each} +
+
+ {/if} + {:else} +

Select a message to read it.

+ {/if} +
+
+ + diff --git a/frontend/src/components/ProfilePanel.svelte b/frontend/src/components/ProfilePanel.svelte new file mode 100644 index 0000000..6780c39 --- /dev/null +++ b/frontend/src/components/ProfilePanel.svelte @@ -0,0 +1,344 @@ + + +
+
+ Your portrait { e.currentTarget.src = '/public/portraits/player-silhouette.png'; }} + > +
+

Your Portrait

+

Select a portrait to use in the HUD header.

+
+
+ +
+ {#each PORTRAITS as id} + + {/each} +
+
+ +
+
+
+

Certifications

+ {certifications.length} earned +
+ + {#if certifications.length === 0} +

No internal certifications awarded yet.

+ {:else} +
+ {#each certifications as certification} +
+ {certification.title} +

{certification.description}

+ {new Date(certification.awarded_at).toLocaleString()} +
+ {/each} +
+ {/if} +
+ +
+
+

Shift Reviews

+ {shiftHistory.length} archived +
+ + {#if currentShiftStats} +
+

Current Shift

+
+
+ Assigned + {(currentShiftStats.assigned_ticket_ids ?? []).length} +
+
+ Resolved + {(currentShiftStats.resolved_tickets ?? []).length} +
+
+ Flagged + {(currentShiftStats.flagged_issues ?? []).length} +
+
+
+ {/if} + + {#if shiftHistory.length === 0} +

No reviews yet. Finish a shift to see Priya's assessment.

+ {:else} +
+ {#each [...shiftHistory].reverse() as review} +
+
+ Shift {review.shift_number} + {prettyTier(review.performance_tier)} +
+

Resolved {review.tickets_resolved}/{review.tickets_assigned}

+

Average resolution: {formatDuration(review.average_resolution_seconds)}

+

Flagged issues: {(review.flagged_issues ?? []).length}

+ {new Date(review.ended_at).toLocaleString()} +
+ {/each} +
+ {/if} +
+
+ + diff --git a/frontend/src/components/SagePanel.svelte b/frontend/src/components/SagePanel.svelte new file mode 100644 index 0000000..8b4e239 --- /dev/null +++ b/frontend/src/components/SagePanel.svelte @@ -0,0 +1,133 @@ + + +
+
+ {#if conversation.length === 0} +
+ Sage +

Ask for a hint, a task summary, the target VM, or relevant docs.

+
+ {:else} + {#each conversation as entry} +
+ {entry.role === 'user' ? 'You' : 'Sage'} +

{entry.body}

+ {#if entry.followUps?.length} +
+ {#each entry.followUps as followUp} + + {/each} +
+ {/if} +
+ {/each} + {/if} +
+ +
submit()}> + + +
+
+ + diff --git a/frontend/src/components/SidebarTabs.svelte b/frontend/src/components/SidebarTabs.svelte new file mode 100644 index 0000000..6863d94 --- /dev/null +++ b/frontend/src/components/SidebarTabs.svelte @@ -0,0 +1,73 @@ + + + + + diff --git a/frontend/src/components/TicketsPanel.svelte b/frontend/src/components/TicketsPanel.svelte new file mode 100644 index 0000000..9bdc641 --- /dev/null +++ b/frontend/src/components/TicketsPanel.svelte @@ -0,0 +1,201 @@ + + +
+ + +
+ {#if selectedTicket} +
+
+

{selectedTicket.id}

+

{selectedTicket.subject}

+
+ + +
+ +
+
+
Status
+
{selectedTicket.status}
+
+
+
Priority
+
{selectedTicket.current_priority ?? selectedTicket.priority}
+
+
+
Linked Quest
+
{selectedTicket.linked_quest}
+
+
+ + {#if selectedTicket.body} +
{selectedTicket.body}
+ {:else} +

Quest-linked ticket. Use the workstation and resolve the underlying system state.

+ {/if} + {:else} +

Select a ticket to inspect its details.

+ {/if} +
+
+ + diff --git a/frontend/src/components/VmsPanel.svelte b/frontend/src/components/VmsPanel.svelte new file mode 100644 index 0000000..4aa6425 --- /dev/null +++ b/frontend/src/components/VmsPanel.svelte @@ -0,0 +1,92 @@ + + +
+ {#each vms as vm} +
+
+
+

{vm.id}

+

{vm.hostname}

+
+ {vm.state} +
+ +
+
+
Domain
+
{vm.domain}
+
+
+
Access
+
{vm.unlocked ? 'available' : 'locked'}
+
+
+
+ {/each} +
+ + diff --git a/frontend/src/lib/api.js b/frontend/src/lib/api.js new file mode 100644 index 0000000..d352600 --- /dev/null +++ b/frontend/src/lib/api.js @@ -0,0 +1,101 @@ +const STORAGE_KEY = 'sc-session-token'; + +function getStoredToken() { + return window.localStorage.getItem(STORAGE_KEY); +} + +function storeToken(token) { + window.localStorage.setItem(STORAGE_KEY, token); +} + +async function requestSessionToken() { + const response = await fetch('/api/session'); + if (!response.ok) { + throw new Error(`Failed to create session: ${response.status}`); + } + + const payload = await response.json(); + storeToken(payload.token); + return payload.token; +} + +export async function ensureSession() { + return getStoredToken() ?? await requestSessionToken(); +} + +async function authenticatedFetch(path, options = {}, retry = true) { + const token = await ensureSession(); + const headers = new Headers(options.headers ?? {}); + headers.set('Authorization', `Bearer ${token}`); + if (options.body && !headers.has('Content-Type')) { + headers.set('Content-Type', 'application/json'); + } + + const response = await fetch(path, { + ...options, + headers + }); + + if (response.status === 401 && retry) { + const nextToken = await requestSessionToken(); + return await authenticatedFetch(path, { + ...options, + headers: { + ...(options.headers ?? {}), + Authorization: `Bearer ${nextToken}` + } + }, false); + } + + if (!response.ok) { + let detail = ''; + try { + const payload = await response.json(); + detail = payload.error ?? JSON.stringify(payload); + } catch { + detail = await response.text(); + } + + throw new Error(detail || `Request failed: ${response.status}`); + } + + if (response.status === 204) { + return null; + } + + return await response.json(); +} + +export const api = { + ensureSession, + getState: () => authenticatedFetch('/api/state'), + getTickets: () => authenticatedFetch('/api/tickets'), + getTicket: (id) => authenticatedFetch(`/api/tickets/${id}`), + completeTicket: (id, branchId = null) => + authenticatedFetch(`/api/tickets/${id}/complete`, { + method: 'POST', + body: JSON.stringify(branchId ? { branchId } : {}) + }), + getMail: () => authenticatedFetch('/api/mail'), + getMailById: (id) => authenticatedFetch(`/api/mail/${id}`), + markMailRead: (id) => authenticatedFetch(`/api/mail/${id}/read`, { method: 'POST' }), + replyMail: (id, choice) => + authenticatedFetch(`/api/mail/${id}/reply`, { + method: 'POST', + body: JSON.stringify({ choice }) + }), + getDocs: () => authenticatedFetch('/api/docs'), + getDoc: (id) => authenticatedFetch(`/api/docs/${id}`), + getVms: () => authenticatedFetch('/api/vms'), + askSage: (message) => + authenticatedFetch('/api/sage/message', { + method: 'POST', + body: JSON.stringify({ message }) + }), + getProfile: () => authenticatedFetch('/api/profile'), + setProfile: (portrait) => + authenticatedFetch('/api/profile', { + method: 'PUT', + body: JSON.stringify({ portrait }) + }) +}; diff --git a/frontend/src/main.js b/frontend/src/main.js new file mode 100644 index 0000000..09f41e9 --- /dev/null +++ b/frontend/src/main.js @@ -0,0 +1,8 @@ +import './app.css'; +import App from './App.svelte'; + +const app = new App({ + target: document.getElementById('app') +}); + +export default app; diff --git a/frontend/vite.config.js b/frontend/vite.config.js new file mode 100644 index 0000000..db8c4a9 --- /dev/null +++ b/frontend/vite.config.js @@ -0,0 +1,15 @@ +import { defineConfig } from 'vite'; +import { svelte } from '@sveltejs/vite-plugin-svelte'; + +export default defineConfig({ + plugins: [svelte({ + compilerOptions: { + compatibility: { + componentApi: 4 + } + } + })], + build: { + outDir: 'dist' + } +}); diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..5da5497 --- /dev/null +++ b/install.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash +# Sysadmin Chronicles — Installer +# Usage: bash install.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$SCRIPT_DIR" + +source "$PROJECT_ROOT/tools/lib/ui.sh" +source "$PROJECT_ROOT/tools/lib/config.sh" +source "$PROJECT_ROOT/tools/lib/deps.sh" +source "$PROJECT_ROOT/tools/lib/libvirt.sh" +source "$PROJECT_ROOT/tools/lib/vm.sh" + +OWNER_USER="${SUDO_USER:-$USER}" +OWNER_HOME="$(getent passwd "$OWNER_USER" | cut -d: -f6)" +OWNER_HOME="${OWNER_HOME:-$HOME}" + +SC_LOG_DIR="$OWNER_HOME/.local/share/sysadmin-chronicles" +export SC_INSTALL_LOG="$SC_LOG_DIR/install.log" +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +# --------------------------------------------------------------------------- +# Phase 1 — Welcome +# --------------------------------------------------------------------------- + +sc_header "SYSADMIN CHRONICLES — SETUP" +cat << 'WELCOME' +Welcome! This installer will: + • Install a few system tools (KVM, QEMU, libvirt) + • Set up a private virtual network for the game + • Build three virtual machines (~30 minutes, once only) + +WELCOME + +DEFAULT_GAME_DIR="$OWNER_HOME/Games/sysadmin-chronicles" +[ "$PROJECT_ROOT" = "$DEFAULT_GAME_DIR" ] || DEFAULT_GAME_DIR="$PROJECT_ROOT" + +SC_GAME_DIR="$(sc_prompt "Where would you like to install the game?" "$DEFAULT_GAME_DIR")" +while [[ "$SC_GAME_DIR" == *//* ]]; do + SC_GAME_DIR="${SC_GAME_DIR//\/\//\/}" +done +while [ "$SC_GAME_DIR" != "/" ] && [ "${SC_GAME_DIR%/}" != "$SC_GAME_DIR" ]; do + SC_GAME_DIR="${SC_GAME_DIR%/}" +done +echo "" +SC_IMAGES_DIR="$SC_GAME_DIR/images" + +# Build scripts normally default to /var/lib/libvirt when using qemu:///system. +# The installer asks for a custom game directory, so force the VM tooling to use +# the libvirt storage pool path selected above instead of silently falling back +# to /var/lib/libvirt/images/sysadmin-chronicles. +export SC_HOME="$SC_LOG_DIR" +export SC_IMAGE_ROOT="$SC_IMAGES_DIR" +export SC_POOL_NAME="sc-images" +export SC_NETWORK_NAME="sc-internal" +export SC_OWNER_USER="$OWNER_USER" +export SC_OWNER_HOME="$OWNER_HOME" + +sc_log_append() { + mkdir -p "$SC_LOG_DIR" + { + printf '\n[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" + } >> "$SC_INSTALL_LOG" 2>/dev/null || true +} + +sc_log_cmd() { + mkdir -p "$SC_LOG_DIR" + { + printf '\n[%s] $' "$(date '+%Y-%m-%d %H:%M:%S')" + printf ' %q' "$@" + printf '\n' + "$@" + } >> "$SC_INSTALL_LOG" 2>&1 +} + + +# --------------------------------------------------------------------------- +# Phase 2 — Silent system check +# --------------------------------------------------------------------------- + +detect_distro + +if [ "$SC_DISTRO" = "unknown" ]; then + sc_warn "Could not detect your Linux distribution." + sc_warn "Dependency auto-install may not work — you may need to install packages manually." + echo "" +fi + +mapfile -t _missing_deps < <(check_deps 2>/dev/null || true) + +# --------------------------------------------------------------------------- +# Phase 3 — Dependency install (only if needed) +# --------------------------------------------------------------------------- + +if [ "${#_missing_deps[@]}" -gt 0 ]; then + sc_section "System check" + echo "" + echo " Your system is missing the following tools:" + echo "" + _shown_labels="" + for _dep in "${_missing_deps[@]}"; do + [ -z "$_dep" ] && continue + _label="$(dep_label "$_dep")" + [ -z "$_label" ] && continue + case "|${_shown_labels}|" in + *"|${_label}|"*) ;; + *) echo " • $_label"; _shown_labels="${_shown_labels}|${_label}" ;; + esac + done + echo "" + if sc_confirm "Install them now? You'll be asked for your password." "Y"; then + echo "" + mkdir -p "$SC_LOG_DIR" + # Write log header + cat > "$SC_INSTALL_LOG" << EOF +# Sysadmin Chronicles — Install Log +# Created: $(date '+%Y-%m-%d %H:%M:%S') +# Distro: ${SC_DISTRO} ($(uname -r)) +# Game dir: $SC_GAME_DIR +# Images: $SC_IMAGES_DIR + +EOF + install_deps "${_missing_deps[@]}" + log_present_deps + echo "" + # Append manual removal hint + { + echo "" + echo "# To remove manually:" + printf '# sudo %s\n' "$(case "$SC_DISTRO" in + arch) echo 'pacman -Rns libvirt qemu-system-x86 virt-install virt-viewer' ;; + debian|ubuntu) echo 'apt-get remove libvirt-daemon-system qemu-kvm virt-manager' ;; + fedora) echo 'dnf remove libvirt qemu-kvm virt-install' ;; + *) echo 'remove the packages listed above' ;; + esac)" + } >> "$SC_INSTALL_LOG" + sc_ok "Dependencies installed." + echo "" + echo " Install log: $SC_INSTALL_LOG" + echo "" + else + echo "" + sc_warn "Skipping dependency install. Some features may not work." + echo "" + fi +else + mkdir -p "$SC_LOG_DIR" + if [ ! -f "$SC_INSTALL_LOG" ]; then + cat > "$SC_INSTALL_LOG" << EOF +# Sysadmin Chronicles — Install Log +# Created: $(date '+%Y-%m-%d %H:%M:%S') +# Distro: ${SC_DISTRO} ($(uname -r)) +# Game dir: $SC_GAME_DIR +# Images: $SC_IMAGES_DIR + +EOF + log_present_deps + fi +fi + +# --------------------------------------------------------------------------- +# Phase 4 — Network, storage, and SSH key setup +# --------------------------------------------------------------------------- + +# Ensure libvirtd is running before touching networks or pools. +# Pacman installs the socket unit but does not start/enable it. +_libvirtd_ready=false +_need_relogin=false + +# Ensure user is in the libvirt group (needed for virsh access without sudo) +if ! groups "$OWNER_USER" 2>/dev/null | grep -qw libvirt; then + sc_info "Adding $OWNER_USER to the libvirt group..." + sudo usermod -aG libvirt "$OWNER_USER" 2>/dev/null || true + _need_relogin=true +fi + +# Clear any failed unit state from a previous attempt +sudo systemctl reset-failed libvirtd.service libvirtd.socket 2>/dev/null || true + +if ! systemctl is-active --quiet libvirtd.service 2>/dev/null; then + sc_info "Starting virtual machine daemon..." + # Enable the socket for future boots, start the service directly now + sudo systemctl enable libvirtd.socket >/dev/null 2>&1 || true + if ! sudo systemctl start libvirtd.service >/dev/null 2>&1; then + echo "" + echo " libvirtd failed to start. Details:" + sudo systemctl status libvirtd.service --no-pager -l 2>&1 | tail -25 | sed 's/^/ /' + echo "" + sc_fail "Fix the error above, then run install.sh again." + fi +fi + +# Wait up to 30s for libvirtd.service to reach active state (pure systemd +# check — no virsh connection needed, works regardless of group membership +# in the current session). +for _i in $(seq 1 60); do + if systemctl is-active --quiet libvirtd.service 2>/dev/null; then + _libvirtd_ready=true + break + fi + sleep 0.5 +done + +if [ "$_libvirtd_ready" != true ]; then + echo "" + echo " libvirtd did not reach active state. Current status:" + sudo systemctl status libvirtd.service --no-pager -l 2>&1 | tail -25 | sed 's/^/ /' + echo "" + sc_fail "Fix the error above, then run install.sh again." +fi + +# If the group was just added, the current shell does not have it yet. The +# install can still proceed by using sudo for virsh operations. +if virsh -c "$LIBVIRT_DEFAULT_URI" list >/dev/null 2>&1; then + export SC_VIRSH_SUDO=false +else + export SC_VIRSH_SUDO=true + sc_log_append "virsh is not usable by $OWNER_USER in this session; using sudo for installer libvirt operations." +fi + +sc_log_append "Installer paths: SC_GAME_DIR=$SC_GAME_DIR SC_IMAGES_DIR=$SC_IMAGES_DIR SC_IMAGE_ROOT=$SC_IMAGE_ROOT LIBVIRT_DEFAULT_URI=$LIBVIRT_DEFAULT_URI SC_VIRSH_SUDO=${SC_VIRSH_SUDO:-false}" +sc_log_cmd id || true +sc_log_cmd groups "$OWNER_USER" || true +sc_log_cmd systemctl is-active libvirtd.service || true + +sc_section "Setting up game network" +echo "" + +NETWORK_XML="$PROJECT_ROOT/tools/vm/network-sc-internal.xml" + +sc_spinner "Creating private game network" +if ! ensure_network "sc-internal" "$NETWORK_XML"; then + sc_spinner_stop + echo "" + sc_fail "Could not create the game network. + + Make sure the virtual machine tools are installed and running: + sudo systemctl enable --now libvirtd.socket + + Then run install.sh again." +fi +sc_spinner_stop +sc_ok "Private game network created" + +sc_spinner "Configuring VM image storage" +if ! ensure_pool "sc-images" "$SC_IMAGES_DIR"; then + sc_spinner_stop + sc_fail "Could not configure image storage at $SC_IMAGES_DIR" +fi +sc_spinner_stop +sc_ok "VM image storage configured at $SC_IMAGES_DIR" + +# Pre-create subdirectories used by tools/vm/lib/common.sh. Since these live +# under the user-selected install path, they should be user-writable. This also +# fails early with a useful message instead of producing a one-line build log. +if ! mkdir -p "$SC_IMAGES_DIR/base" "$SC_IMAGES_DIR/seed"; then + sc_fail "Could not create VM image directories under $SC_IMAGES_DIR" +fi +chown -R "$OWNER_USER:$OWNER_USER" "$SC_IMAGES_DIR" 2>/dev/null || true +sc_log_cmd ls -lah "$SC_IMAGES_DIR" || true + +SC_SSH_KEY="$OWNER_HOME/.ssh/sc_host_key" +if [ ! -f "$SC_SSH_KEY" ]; then + sc_spinner "Generating game access keys" + mkdir -p "$(dirname "$SC_SSH_KEY")" + ssh-keygen -t ed25519 -N "" -C "sysadmin-chronicles-host" -f "$SC_SSH_KEY" >/dev/null 2>&1 + chmod 600 "$SC_SSH_KEY" + chmod 644 "${SC_SSH_KEY}.pub" + chown "$OWNER_USER:$OWNER_USER" "$SC_SSH_KEY" "${SC_SSH_KEY}.pub" 2>/dev/null || true + sc_spinner_stop + sc_ok "Game access keys generated" +else + sc_ok "Game access keys already present" +fi + +# Write config (survives game dir moves) +config_write SC_GAME_DIR "$SC_GAME_DIR" +config_write SC_IMAGES_DIR "$SC_IMAGES_DIR" +config_write SC_LIBVIRT_URI "$LIBVIRT_DEFAULT_URI" +config_write SC_INSTALL_DATE "$(date '+%Y-%m-%d')" +echo "" + +# --------------------------------------------------------------------------- +# Phase 5 — VM build +# --------------------------------------------------------------------------- + +sc_section "Building your game world" +echo "" +echo " This happens once and takes about 30 minutes." +echo " You can leave this running in the background." +echo "" + +_build_vm() { + local label="$1" + local n="$2" + local total="$3" + local profile="$4" + local logfile="$SC_LOG_DIR/build-${profile}.log" + local start_ts elapsed mins secs + start_ts="$(date +%s)" + printf " Building %-20s (%d/%d) " "$label" "$n" "$total" + { + echo "# Sysadmin Chronicles VM build log" + echo "# Created: $(date '+%Y-%m-%d %H:%M:%S')" + echo "# Profile: $profile" + echo "# Game dir: $SC_GAME_DIR" + echo "# Images: $SC_IMAGES_DIR" + echo "# SC_IMAGE_ROOT: $SC_IMAGE_ROOT" + echo "# LIBVIRT_DEFAULT_URI: $LIBVIRT_DEFAULT_URI" + echo "# SC_VIRSH_SUDO: ${SC_VIRSH_SUDO:-false}" + echo "" + } > "$logfile" + if vm_build "$profile" >> "$logfile" 2>&1; then + elapsed=$(( $(date +%s) - start_ts )) + mins=$(( elapsed / 60 )) + secs=$(( elapsed % 60 )) + printf "✓ %dm %02ds\n" "$mins" "$secs" + else + printf "✗\n" + sc_warn "Build failed — see $logfile" + return 1 + fi +} + +echo "Generating TLS certificates..." +bash "$PROJECT_ROOT/tools/setup/generate-certs.sh" +echo "" + +_build_vm "workstation" 1 3 "workstation" +_build_vm "web server" 2 3 "web-server" +_build_vm "build server" 3 3 "build-machine" + +printf " Setting up quest scenarios " +bash "$PROJECT_ROOT/tools/setup/seed-vms.sh" --skip-build \ + > "$SC_LOG_DIR/seed-vms.log" 2>&1 && printf "✓\n" || { printf "✗\n"; sc_warn "Quest setup had errors — see $SC_LOG_DIR/seed-vms.log"; } +echo "" + +# --------------------------------------------------------------------------- +# Phase 6 — Application menu entry +# --------------------------------------------------------------------------- + +sc_section "Application menu launcher" +echo "" +if sc_confirm "Create an application-menu launcher? This does not add a desktop icon." "Y"; then + DESKTOP_FILE="$OWNER_HOME/.local/share/applications/sysadmin-chronicles.desktop" + mkdir -p "$(dirname "$DESKTOP_FILE")" + cat > "$DESKTOP_FILE" << EOF +[Desktop Entry] +Name=Sysadmin Chronicles +Comment=A sysadmin simulation game +Exec=bash $SC_GAME_DIR/start-game.sh +Terminal=true +Type=Application +Categories=Game; +EOF + sc_ok "Application-menu launcher created" +fi +echo "" + +# --------------------------------------------------------------------------- +# Phase 7 — Done +# --------------------------------------------------------------------------- + +sc_done_banner +cat << EOF + Start the game: + bash $SC_GAME_DIR/start-game.sh + (or from your system application menu if you created the launcher; no desktop icon is installed) + + Rebuild the virtual machines at any time: + bash $SC_GAME_DIR/tools/vm/rebuild-vms.sh + + Install log: + $SC_INSTALL_LOG + +EOF + +if [ "$_need_relogin" = true ]; then + sc_warn "You were added to the 'libvirt' group during install." + sc_warn "Log out and back in before running the game, or run:" + echo " newgrp libvirt" + echo "" +fi diff --git a/package.json b/package.json new file mode 100644 index 0000000..4ddc984 --- /dev/null +++ b/package.json @@ -0,0 +1,16 @@ +{ + "name": "sysadmin-chronicles-tools", + "version": "0.1.0", + "description": "Content validation and tooling for Sysadmin Chronicles", + "private": true, + "scripts": { + "validate": "node tools/content/validate-content.js", + "validate:verbose": "node tools/content/validate-content.js --verbose", + "validate:quests": "node tools/content/validate-content.js --quests-only" + }, + "engines": { + "node": ">=18.0.0" + }, + "dependencies": {}, + "devDependencies": {} +} diff --git a/runtime/viewer/detect-viewer-backends.sh b/runtime/viewer/detect-viewer-backends.sh new file mode 100644 index 0000000..d96e5cf --- /dev/null +++ b/runtime/viewer/detect-viewer-backends.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# detect-viewer-backends.sh — Detect which VM viewer backends are available. +# +# Outputs a line for each detected backend: "vnc" or "spice" +# Used by DisplayAdapter.detect() to choose the right viewer at runtime. +# +# Exit code 0 always (even if nothing is found — caller handles empty output). + +set -euo pipefail + +# VNC viewers +if command -v virt-viewer &>/dev/null; then + echo "spice" + echo "vnc" +elif command -v remote-viewer &>/dev/null; then + echo "spice" + echo "vnc" +elif command -v xvncviewer &>/dev/null || command -v xtightvncviewer &>/dev/null || \ + command -v vinagre &>/dev/null || command -v vncviewer &>/dev/null; then + echo "vnc" +fi + +# SPICE standalone +if command -v spicy &>/dev/null; then + echo "spice" +fi + +exit 0 diff --git a/runtime/viewer/launch-spice-viewer.sh b/runtime/viewer/launch-spice-viewer.sh new file mode 100644 index 0000000..a74f723 --- /dev/null +++ b/runtime/viewer/launch-spice-viewer.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# launch-spice-viewer.sh — Launch a SPICE viewer for a libvirt domain. +# +# Usage: bash launch-spice-viewer.sh [vnc_display_fallback] + +DOMAIN="${1:-}" +LIBVIRT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +if command -v virt-viewer &>/dev/null; then + exec virt-viewer --connect "$LIBVIRT_URI" "$DOMAIN" +elif command -v remote-viewer &>/dev/null; then + # Get SPICE port from virsh + SPICE_URI=$(virsh --connect "$LIBVIRT_URI" domdisplay "$DOMAIN" 2>/dev/null | grep spice | head -1) + if [ -n "$SPICE_URI" ]; then + exec remote-viewer "$SPICE_URI" + else + echo "ERROR: No SPICE display found for $DOMAIN" >&2 + exit 1 + fi +elif command -v spicy &>/dev/null; then + SPICE_PORT=$(virsh --connect "$LIBVIRT_URI" domdisplay "$DOMAIN" 2>/dev/null | grep -oP '(?<=:)\d+' | head -1) + exec spicy -h 127.0.0.1 -p "${SPICE_PORT:-5900}" +else + echo "ERROR: No SPICE viewer found. Install virt-viewer." >&2 + exit 1 +fi diff --git a/runtime/viewer/launch-vnc-viewer.sh b/runtime/viewer/launch-vnc-viewer.sh new file mode 100644 index 0000000..747933a --- /dev/null +++ b/runtime/viewer/launch-vnc-viewer.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# launch-vnc-viewer.sh — Launch a VNC viewer for a libvirt domain. +# +# Usage: bash launch-vnc-viewer.sh +# Example: bash launch-vnc-viewer.sh sc-web-server :0 +# +# Tries: virt-viewer → vncviewer → xtightvncviewer → vinagre (in order) +# Exits 1 if none found. + +DOMAIN="${1:-}" +VNC_DISPLAY="${2:-:0}" + +# Convert :N display to port 5900+N +PORT=$((5900 + ${VNC_DISPLAY#:})) +HOST="127.0.0.1" + +if command -v virt-viewer &>/dev/null; then + exec virt-viewer --connect qemu:///session "$DOMAIN" +elif command -v vncviewer &>/dev/null; then + exec vncviewer "${HOST}:${PORT}" +elif command -v xtightvncviewer &>/dev/null; then + exec xtightvncviewer "${HOST}:${PORT}" +elif command -v vinagre &>/dev/null; then + exec vinagre "vnc://${HOST}:${PORT}" +else + echo "ERROR: No VNC viewer found. Install virt-viewer or vncviewer." >&2 + exit 1 +fi diff --git a/sage/app.js b/sage/app.js new file mode 100644 index 0000000..6dc86a3 --- /dev/null +++ b/sage/app.js @@ -0,0 +1,317 @@ +/* Sage — knowledge base app */ + +let allArticles = []; +let navIndex = null; +let currentArticleId = null; + +async function loadData() { + const [indexRes, ...articleRes] = await Promise.all([ + fetch('/sage/api/_index.json'), + ...ARTICLE_IDS.map(id => fetch(`/sage/api/${id}.json`)) + ]); + navIndex = await indexRes.json(); + allArticles = await Promise.all(articleRes.map(r => r.json())); +} + +const ARTICLE_IDS = [ + 'ssh-keys', 'ssh-access-controls', 'nginx-config', + 'disk-logs', 'file-permissions', 'cron-jobs', + 'time-sync', 'package-management' +]; + +const CATEGORY_LABELS = { + access: 'Access & Authentication', + web: 'Web Services', + storage: 'Storage & Logs', + sysadmin: 'System Administration', + packages: 'Package Management' +}; + +// ── Sidebar ─────────────────────────────────────────────────────────────────── + +function buildNav() { + const sidebar = document.getElementById('sidebar'); + sidebar.innerHTML = ''; + + // Home link + const homeWrap = document.createElement('div'); + homeWrap.className = 'nav-section'; + const homeLink = document.createElement('a'); + homeLink.className = 'nav-link'; + homeLink.textContent = '⌂ Home'; + homeLink.dataset.home = '1'; + homeLink.onclick = (e) => { e.preventDefault(); showHome(); }; + homeWrap.appendChild(homeLink); + sidebar.appendChild(homeWrap); + + navIndex.categories.forEach(cat => { + const section = document.createElement('div'); + section.className = 'nav-section'; + + const label = document.createElement('div'); + label.className = 'nav-category'; + label.textContent = cat.label; + section.appendChild(label); + + cat.articles.forEach(id => { + const article = allArticles.find(a => a.id === id); + if (!article) return; + const link = document.createElement('a'); + link.className = 'nav-link'; + link.textContent = article.title; + link.dataset.articleId = id; + link.onclick = (e) => { e.preventDefault(); showArticle(id); }; + section.appendChild(link); + }); + + sidebar.appendChild(section); + }); +} + +function setActiveNav(articleId) { + document.querySelectorAll('.nav-link').forEach(el => { + el.classList.toggle('active', + articleId ? el.dataset.articleId === articleId : !!el.dataset.home + ); + }); +} + +// ── Home ────────────────────────────────────────────────────────────────────── + +function showHome() { + currentArticleId = null; + clearSearch(); + setActiveNav(null); + const main = document.getElementById('main'); + main.classList.remove('hidden'); + document.getElementById('search-results').classList.remove('visible'); + + main.innerHTML = ''; + const home = document.createElement('div'); + home.id = 'home-page'; + + const h1 = document.createElement('h1'); + h1.textContent = 'Sage — Internal Knowledge Base'; + const subtitle = document.createElement('p'); + subtitle.className = 'home-subtitle'; + subtitle.textContent = 'Runbooks, reference guides, and procedures for Axiom Works infrastructure.'; + + home.appendChild(h1); + home.appendChild(subtitle); + + const grid = document.createElement('div'); + grid.className = 'home-grid'; + + navIndex.categories.forEach(cat => { + const catLabel = document.createElement('div'); + catLabel.className = 'home-category-label'; + catLabel.textContent = cat.label; + home.appendChild(catLabel); + + const catGrid = document.createElement('div'); + catGrid.className = 'home-grid'; + + cat.articles.forEach(id => { + const article = allArticles.find(a => a.id === id); + if (!article) return; + const card = document.createElement('div'); + card.className = 'home-card'; + card.innerHTML = ` +
${esc(article.title)}
+
${esc(article.summary)}
+ `; + card.onclick = () => showArticle(id); + catGrid.appendChild(card); + }); + + home.appendChild(catGrid); + }); + + main.appendChild(home); + main.scrollTop = 0; +} + +// ── Article ─────────────────────────────────────────────────────────────────── + +function showArticle(id) { + const article = allArticles.find(a => a.id === id); + if (!article) return; + currentArticleId = id; + clearSearch(); + setActiveNav(id); + + const main = document.getElementById('main'); + main.classList.remove('hidden'); + document.getElementById('search-results').classList.remove('visible'); + + const catLabel = CATEGORY_LABELS[article.category] ?? article.category; + + let html = ` +

${esc(article.title)}

+ +

${esc(article.summary)}

+ `; + + article.sections.forEach(section => { + html += `
`; + if (section.heading) { + html += `

${esc(section.heading)}

`; + } + if (section.body) { + html += `
${section.body}
`; + } + if (section.code) { + html += `
${esc(section.code)}
`; + } + html += `
`; + }); + + if (article.tags?.length) { + html += ``; + } + + main.innerHTML = html; + main.scrollTop = 0; +} + +// ── Search ──────────────────────────────────────────────────────────────────── + +let searchTimer = null; + +function onSearchInput(e) { + const q = e.target.value.trim(); + clearTimeout(searchTimer); + if (!q) { clearSearch(); return; } + searchTimer = setTimeout(() => runSearch(q), 120); +} + +function clearSearch() { + document.getElementById('search').value = ''; + document.getElementById('search-results').classList.remove('visible'); + const main = document.getElementById('main'); + main.classList.remove('hidden'); +} + +function runSearch(q) { + const terms = q.toLowerCase().split(/\s+/).filter(Boolean); + const results = []; + + for (const article of allArticles) { + const haystack = [ + article.title, + article.summary, + ...(article.tags ?? []), + ...article.sections.flatMap(s => [s.heading ?? '', textFromHtml(s.body ?? ''), s.code ?? '']) + ].join(' ').toLowerCase(); + + const score = terms.filter(t => haystack.includes(t)).length; + if (score === 0) continue; + + // Find a snippet with the first matching term + let snippet = article.summary; + const firstTerm = terms[0]; + for (const section of article.sections) { + const text = textFromHtml(section.body ?? '') + ' ' + (section.code ?? ''); + const idx = text.toLowerCase().indexOf(firstTerm); + if (idx !== -1) { + const start = Math.max(0, idx - 60); + const end = Math.min(text.length, idx + 120); + snippet = (start > 0 ? '…' : '') + text.slice(start, end) + (end < text.length ? '…' : ''); + break; + } + } + + results.push({ article, score, snippet }); + } + + results.sort((a, b) => b.score - a.score); + + const container = document.getElementById('search-results'); + container.classList.add('visible'); + document.getElementById('main').classList.add('hidden'); + + if (results.length === 0) { + container.innerHTML = `
No articles matched "${esc(q)}".
`; + return; + } + + container.innerHTML = results.map(({ article, snippet }) => { + const highlighted = highlightTerms(esc(snippet), terms); + return ` +
+
${esc(article.title)}
+
${highlighted}
+
+ `; + }).join(''); + + container.querySelectorAll('.search-result').forEach(el => { + el.onclick = () => showArticle(el.dataset.id); + }); +} + +function highlightTerms(text, terms) { + let out = text; + terms.forEach(term => { + const re = new RegExp(`(${escapeRegex(term)})`, 'gi'); + out = out.replace(re, '$1'); + }); + return out; +} + +// ── Utilities ───────────────────────────────────────────────────────────────── + +function esc(str) { + return String(str ?? '') + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +function textFromHtml(html) { + const div = document.createElement('div'); + div.innerHTML = html; + return div.textContent ?? ''; +} + +function escapeRegex(str) { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +// ── Init ────────────────────────────────────────────────────────────────────── + +async function init() { + const main = document.getElementById('main'); + main.innerHTML = '

Loading…

'; + + try { + await loadData(); + } catch (err) { + main.innerHTML = `

Failed to load knowledge base: ${esc(err.message)}

`; + return; + } + + buildNav(); + showHome(); + + document.getElementById('search').addEventListener('input', onSearchInput); + + // Keyboard shortcut: / to focus search + document.addEventListener('keydown', e => { + if (e.key === '/' && document.activeElement !== document.getElementById('search')) { + e.preventDefault(); + document.getElementById('search').focus(); + } + if (e.key === 'Escape') { + clearSearch(); + if (currentArticleId) setActiveNav(currentArticleId); + else showHome(); + } + }); +} + +document.addEventListener('DOMContentLoaded', init); diff --git a/sage/index.html b/sage/index.html new file mode 100644 index 0000000..588266e --- /dev/null +++ b/sage/index.html @@ -0,0 +1,30 @@ + + + + + + Sage — Axiom Works KB + + + + + + +
+ + +
+
+
+ + + + diff --git a/sage/logo.png b/sage/logo.png new file mode 100644 index 0000000..ead0cc3 Binary files /dev/null and b/sage/logo.png differ diff --git a/sage/style.css b/sage/style.css new file mode 100644 index 0000000..d5bfaab --- /dev/null +++ b/sage/style.css @@ -0,0 +1,416 @@ +:root { + --bg: #0d1117; + --bg-sidebar: #161b22; + --bg-content: #0d1117; + --bg-code: #161b22; + --bg-hover: #1f2937; + --border: #30363d; + --text: #c9d1d9; + --text-muted: #6e7681; + --text-heading: #e6edf3; + --text-code: #79c0ff; + --accent: #388bfd; + --accent-soft: rgba(56, 139, 253, 0.12); + --accent-active: rgba(56, 139, 253, 0.2); + --tag-bg: #21262d; + --tag-text: #8b949e; + --good: #3fb950; + --font-sans: -apple-system, "Segoe UI", system-ui, sans-serif; + --font-mono: "JetBrains Mono", "Fira Code", "Cascadia Code", "Consolas", monospace; +} + +*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + +html, body { height: 100%; } + +body { + font-family: var(--font-sans); + font-size: 14px; + line-height: 1.6; + color: var(--text); + background: var(--bg); + display: flex; + flex-direction: column; +} + +/* ── Header ────────────────────────────────────────── */ +#header { + display: flex; + align-items: center; + gap: 16px; + padding: 0 20px; + height: 52px; + background: var(--bg-sidebar); + border-bottom: 1px solid var(--border); + flex-shrink: 0; + position: sticky; + top: 0; + z-index: 100; +} + +#header .header-logo { + height: 26px; + width: 26px; + border-radius: 5px; + flex-shrink: 0; +} + +#header .wordmark { + font-family: var(--font-mono); + font-size: 16px; + font-weight: 600; + color: var(--accent); + letter-spacing: -0.5px; + white-space: nowrap; +} + +#header .tagline { + font-size: 12px; + color: var(--text-muted); + border-left: 1px solid var(--border); + padding-left: 16px; +} + +#search-wrap { + margin-left: auto; + position: relative; +} + +#search { + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + color: var(--text); + font-family: var(--font-sans); + font-size: 13px; + padding: 6px 12px 6px 32px; + width: 260px; + outline: none; + transition: border-color 0.15s, width 0.2s; +} + +#search::placeholder { color: var(--text-muted); } + +#search:focus { + border-color: var(--accent); + width: 320px; +} + +#search-icon { + position: absolute; + left: 10px; + top: 50%; + transform: translateY(-50%); + color: var(--text-muted); + font-size: 13px; + pointer-events: none; +} + +/* ── Layout ─────────────────────────────────────────── */ +#layout { + display: flex; + flex: 1; + min-height: 0; +} + +/* ── Sidebar ─────────────────────────────────────────── */ +#sidebar { + width: 220px; + flex-shrink: 0; + background: var(--bg-sidebar); + border-right: 1px solid var(--border); + overflow-y: auto; + padding: 16px 0; +} + +.nav-section { margin-bottom: 8px; } + +.nav-category { + font-size: 11px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.06em; + padding: 6px 16px 4px; +} + +.nav-link { + display: block; + padding: 5px 16px; + font-size: 13px; + color: var(--text); + text-decoration: none; + cursor: pointer; + border-left: 2px solid transparent; + transition: background 0.1s, border-color 0.1s; +} + +.nav-link:hover { + background: var(--bg-hover); + color: var(--text-heading); +} + +.nav-link.active { + background: var(--accent-active); + border-left-color: var(--accent); + color: var(--text-heading); +} + +/* ── Main content ─────────────────────────────────────── */ +#main { + flex: 1; + overflow-y: auto; + padding: 36px 48px; + max-width: 860px; +} + +/* ── Search results ──────────────────────────────────── */ +#search-results { + display: none; + flex: 1; + overflow-y: auto; + padding: 24px 48px; +} + +#search-results.visible { display: block; } +#main.hidden { display: none; } + +.search-result { + border: 1px solid var(--border); + border-radius: 6px; + padding: 16px 20px; + margin-bottom: 12px; + cursor: pointer; + transition: background 0.1s, border-color 0.1s; +} + +.search-result:hover { + background: var(--bg-hover); + border-color: var(--accent); +} + +.search-result-title { + font-size: 15px; + font-weight: 600; + color: var(--text-heading); + margin-bottom: 4px; +} + +.search-result-snippet { + font-size: 13px; + color: var(--text-muted); + line-height: 1.5; +} + +.search-result-snippet mark { + background: var(--accent-soft); + color: var(--text); + border-radius: 2px; + padding: 0 2px; +} + +.no-results { + color: var(--text-muted); + font-size: 14px; + padding: 24px 0; +} + +/* ── Article typography ──────────────────────────────── */ +.article-meta { + display: flex; + align-items: center; + gap: 12px; + margin-bottom: 24px; + font-size: 12px; + color: var(--text-muted); +} + +.article-updated::before { content: "Updated "; } + +.article-category-badge { + background: var(--tag-bg); + color: var(--tag-text); + border-radius: 20px; + padding: 2px 10px; + font-size: 11px; +} + +h1.article-title { + font-size: 24px; + font-weight: 700; + color: var(--text-heading); + line-height: 1.3; + margin-bottom: 6px; +} + +.article-summary { + font-size: 14px; + color: var(--text-muted); + margin-bottom: 32px; + padding-bottom: 24px; + border-bottom: 1px solid var(--border); +} + +.article-tags { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-top: 8px; +} + +.tag { + background: var(--tag-bg); + color: var(--tag-text); + border-radius: 4px; + padding: 2px 8px; + font-size: 11px; + font-family: var(--font-mono); +} + +.section { margin-bottom: 32px; } + +.section h2 { + font-size: 16px; + font-weight: 600; + color: var(--text-heading); + margin-bottom: 12px; + padding-bottom: 6px; + border-bottom: 1px solid var(--border); +} + +.section p { margin-bottom: 10px; } +.section p:last-child { margin-bottom: 0; } + +.section ul, .section ol { + padding-left: 20px; + margin-bottom: 10px; +} + +.section li { margin-bottom: 4px; } + +.section code { + font-family: var(--font-mono); + font-size: 12.5px; + color: var(--text-code); + background: var(--bg-code); + border: 1px solid var(--border); + border-radius: 4px; + padding: 1px 5px; +} + +.section pre { + background: var(--bg-code); + border: 1px solid var(--border); + border-radius: 6px; + padding: 16px; + overflow-x: auto; + margin: 12px 0; + font-family: var(--font-mono); + font-size: 12.5px; + line-height: 1.7; + color: var(--text-code); +} + +.section pre code { + background: none; + border: none; + padding: 0; + color: inherit; + font-size: inherit; +} + +.section table { + border-collapse: collapse; + width: 100%; + margin: 12px 0; + font-size: 13px; +} + +.section th { + background: var(--bg-sidebar); + color: var(--text-muted); + font-weight: 600; + text-align: left; + padding: 8px 12px; + border: 1px solid var(--border); +} + +.section td { + padding: 7px 12px; + border: 1px solid var(--border); +} + +.section tr:nth-child(even) td { background: rgba(255,255,255,0.02); } + +/* ── Home page ───────────────────────────────────────── */ +#home-page h1 { + font-size: 22px; + font-weight: 700; + color: var(--text-heading); + margin-bottom: 8px; +} + +#home-page .home-subtitle { + color: var(--text-muted); + margin-bottom: 32px; + font-size: 14px; +} + +.home-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); + gap: 12px; +} + +.home-card { + background: var(--bg-sidebar); + border: 1px solid var(--border); + border-radius: 6px; + padding: 16px; + cursor: pointer; + transition: background 0.1s, border-color 0.1s; +} + +.home-card:hover { + background: var(--bg-hover); + border-color: var(--accent); +} + +.home-card-title { + font-size: 14px; + font-weight: 600; + color: var(--text-heading); + margin-bottom: 4px; +} + +.home-card-summary { + font-size: 12px; + color: var(--text-muted); + line-height: 1.5; +} + +.home-category-label { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--text-muted); + margin: 24px 0 10px; +} + +.home-category-label:first-of-type { margin-top: 0; } + +/* ── Scrollbars ──────────────────────────────────────── */ +::-webkit-scrollbar { width: 8px; height: 8px; } +::-webkit-scrollbar-track { background: transparent; } +::-webkit-scrollbar-thumb { background: #30363d; border-radius: 4px; } +::-webkit-scrollbar-thumb:hover { background: #484f58; } + +/* ── Responsive ──────────────────────────────────────── */ +@media (max-width: 700px) { + #sidebar { display: none; } + #main, #search-results { padding: 20px 20px; } + #search { width: 180px; } + #search:focus { width: 200px; } + #header .tagline { display: none; } +} diff --git a/scripts/start-game.sh b/scripts/start-game.sh new file mode 100755 index 0000000..ad5edf1 --- /dev/null +++ b/scripts/start-game.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# start-game.sh — Start the redesign server and open the workstation display. + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SERVER_DIR="$PROJECT_ROOT/server" +FRONTEND_DIR="$PROJECT_ROOT/frontend" +DOMAIN="${SC_WORKSTATION_DOMAIN:-sc-workstation}" +PORT="${PORT:-3000}" +LIBVIRT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +export PORT +export LIBVIRT_DEFAULT_URI="$LIBVIRT_URI" + +source "$PROJECT_ROOT/tools/lib/internal-https.sh" +sc_ensure_internal_certs "$PROJECT_ROOT" +sc_export_internal_https_env + +if ! command -v virsh >/dev/null 2>&1; then + echo "ERROR: virsh is required." >&2 + exit 1 +fi + +if ! command -v node >/dev/null 2>&1; then + echo "ERROR: node is required. Install Node.js 18+." >&2 + exit 1 +fi + +# Ensure server dependencies are installed +if [ ! -d "$SERVER_DIR/node_modules" ]; then + echo "Installing server dependencies..." + (cd "$SERVER_DIR" && npm install --silent) +fi + +# Ensure frontend is built +if [ ! -f "$FRONTEND_DIR/dist/index.html" ]; then + echo "Building frontend..." + if [ ! -d "$FRONTEND_DIR/node_modules" ]; then + (cd "$FRONTEND_DIR" && npm install --silent) + fi + (cd "$FRONTEND_DIR" && npm run build --silent) +fi + +if ! virsh --connect "$LIBVIRT_URI" dominfo "$DOMAIN" >/dev/null 2>&1; then + echo "ERROR: missing workstation domain: $DOMAIN" >&2 + echo "Run: bash tools/vm/build-workstation.sh --force" >&2 + exit 1 +fi + +_ensure_server_port() { + local pids="" + pids="$(sc_listen_pids "$PORT" || true)" + if [ -z "$pids" ]; then + return 0 + fi + + local pid + for pid in $pids; do + if ! sc_pid_is_repo_server "$pid" "$PROJECT_ROOT"; then + echo "ERROR: port $PORT is already in use by an unrelated process (pid $pid)." >&2 + echo "Stop that process or set PORT to a free port before launching." >&2 + exit 1 + fi + if sc_pid_has_internal_tls "$pid"; then + return 1 + fi + done + + echo "Restarting existing Sysadmin Chronicles server on port $PORT with HTTPS enabled..." + for pid in $pids; do + sc_stop_pid "$pid" + done + return 0 +} + +if _ensure_server_port; then + ( + cd "$SERVER_DIR" + node src/index.js + ) & + SERVER_PID=$! + trap 'kill "$SERVER_PID" >/dev/null 2>&1 || true' EXIT + sleep 1 +fi + +state="$(virsh --connect "$LIBVIRT_URI" domstate "$DOMAIN" 2>/dev/null || true)" +if [ "$state" != "running" ]; then + virsh --connect "$LIBVIRT_URI" start "$DOMAIN" >/dev/null +fi + +if command -v remote-viewer >/dev/null 2>&1; then + spice_uri="$(virsh --connect "$LIBVIRT_URI" domdisplay "$DOMAIN" 2>/dev/null | grep spice | head -n1 || true)" + if [ -n "$spice_uri" ]; then + exec remote-viewer "$spice_uri" + fi +fi + +exec bash "$PROJECT_ROOT/runtime/viewer/launch-spice-viewer.sh" "$DOMAIN" diff --git a/server/.env.example b/server/.env.example new file mode 100644 index 0000000..61fbef2 --- /dev/null +++ b/server/.env.example @@ -0,0 +1,7 @@ +PORT=3000 +HOST_BRIDGE_IP=192.168.100.1 +SSH_KEY_PATH=~/.ssh/sc_host_key +SAVE_DIR=~/.local/share/sysadmin-chronicles +CONTENT_DIR=../content +LIBVIRT_URI=qemu:///system +VM_PREFIX=sc- diff --git a/server/package-lock.json b/server/package-lock.json new file mode 100644 index 0000000..46995e5 --- /dev/null +++ b/server/package-lock.json @@ -0,0 +1,877 @@ +{ + "name": "sysadmin-chronicles-server", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "sysadmin-chronicles-server", + "version": "0.1.0", + "dependencies": { + "dotenv": "^16.0.0", + "express": "^4.18.0", + "ws": "^8.0.0" + } + }, + "node_modules/accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "license": "MIT", + "dependencies": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", + "license": "MIT" + }, + "node_modules/body-parser": { + "version": "1.20.5", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.5.tgz", + "integrity": "sha512-3grm+/2tUOvu2cjJkvsIxrv/wVpfXQW4PsQHYm7yk4vfpu7Ekl6nEsYBoJUL6qDwZUx8wUhQ8tR2qz+ad9c9OA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "content-type": "~1.0.5", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "~1.2.0", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "on-finished": "~2.4.1", + "qs": "~6.15.1", + "raw-body": "~2.5.3", + "type-is": "~1.6.18", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/body-parser/node_modules/qs": { + "version": "6.15.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz", + "integrity": "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.7.tgz", + "integrity": "sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==", + "license": "MIT" + }, + "node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", + "license": "MIT", + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/dotenv": { + "version": "16.6.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", + "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express": { + "version": "4.22.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz", + "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==", + "license": "MIT", + "dependencies": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "~1.20.3", + "content-disposition": "~0.5.4", + "content-type": "~1.0.4", + "cookie": "~0.7.1", + "cookie-signature": "~1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.3.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.0", + "merge-descriptors": "1.0.3", + "methods": "~1.1.2", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "~0.1.12", + "proxy-addr": "~2.0.7", + "qs": "~6.14.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "~0.19.0", + "serve-static": "~1.16.2", + "setprototypeof": "1.2.0", + "statuses": "~2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/finalhandler": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.2.tgz", + "integrity": "sha512-aA4RyPcd3badbdABGDuTXCMTtOneUCAYH/gxoYRTZlIJdF0YPWuGqiAsIrhNnnqdXGswYk6dGujem4w80UJFhg==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "statuses": "~2.0.2", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz", + "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, + "node_modules/negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-to-regexp": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.13.tgz", + "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==", + "license": "MIT" + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/qs": { + "version": "6.14.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.2.tgz", + "integrity": "sha512-V/yCWTTF7VJ9hIh18Ugr2zhJMP01MY7c5kh4J870L7imm6/DIzBsNLTXzMwUA3yZ5b/KBqLx8Kp3uRvd7xSe3Q==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.3.tgz", + "integrity": "sha512-s4VSOf6yN0rvbRZGxs8Om5CWj6seneMwK3oDb4lWDH0UPhWcxwOWw5+qk24bxq87szX1ydrwylIOp2uG1ojUpA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/send": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.2.tgz", + "integrity": "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.1", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "~2.4.1", + "range-parser": "~1.2.1", + "statuses": "~2.0.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/serve-static": { + "version": "1.16.3", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.3.tgz", + "integrity": "sha512-x0RTqQel6g5SY7Lg6ZreMmsOzncHFU7nhnRWkKgWuMTu5NN0DR5oruckMqRvacAN9d5w6ARnRBXl9xhDCgfMeA==", + "license": "MIT", + "dependencies": { + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "~0.19.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz", + "integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/ws": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + } + } +} diff --git a/server/package.json b/server/package.json new file mode 100644 index 0000000..fbc6310 --- /dev/null +++ b/server/package.json @@ -0,0 +1,16 @@ +{ + "name": "sysadmin-chronicles-server", + "version": "0.1.0", + "type": "module", + "main": "src/index.js", + "scripts": { + "start": "node src/index.js", + "dev": "node --watch src/index.js", + "test": "node --test --test-concurrency=1 src/*.test.js src/services/*.test.js" + }, + "dependencies": { + "dotenv": "^16.0.0", + "express": "^4.18.0", + "ws": "^8.0.0" + } +} diff --git a/server/public/portraits/player-01.png b/server/public/portraits/player-01.png new file mode 100644 index 0000000..fafc6e8 Binary files /dev/null and b/server/public/portraits/player-01.png differ diff --git a/server/public/portraits/player-02.png b/server/public/portraits/player-02.png new file mode 100644 index 0000000..2b72677 Binary files /dev/null and b/server/public/portraits/player-02.png differ diff --git a/server/public/portraits/player-03.png b/server/public/portraits/player-03.png new file mode 100644 index 0000000..5b636cf Binary files /dev/null and b/server/public/portraits/player-03.png differ diff --git a/server/public/portraits/player-04.png b/server/public/portraits/player-04.png new file mode 100644 index 0000000..9acc2c4 Binary files /dev/null and b/server/public/portraits/player-04.png differ diff --git a/server/public/portraits/player-05.png b/server/public/portraits/player-05.png new file mode 100644 index 0000000..2ae3622 Binary files /dev/null and b/server/public/portraits/player-05.png differ diff --git a/server/public/portraits/player-silhouette.png b/server/public/portraits/player-silhouette.png new file mode 100644 index 0000000..ad33f01 Binary files /dev/null and b/server/public/portraits/player-silhouette.png differ diff --git a/server/public/wallpaper.png b/server/public/wallpaper.png new file mode 100644 index 0000000..4ff78e0 Binary files /dev/null and b/server/public/wallpaper.png differ diff --git a/server/src/index.js b/server/src/index.js new file mode 100644 index 0000000..a27366a --- /dev/null +++ b/server/src/index.js @@ -0,0 +1,211 @@ +import 'dotenv/config'; + +import http from 'http'; +import path from 'path'; +import { existsSync } from 'fs'; +import express from 'express'; +import { WebSocket, WebSocketServer } from 'ws'; +import { fileURLToPath } from 'url'; + +import { eventBus } from './lib/eventBus.js'; +import { requireSession } from './lib/session.js'; +import sessionRouter from './routes/session.js'; +import stateRouter from './routes/state.js'; +import ticketsRouter from './routes/tickets.js'; +import mailRouter from './routes/mail.js'; +import docsRouter from './routes/docs.js'; +import vmsRouter from './routes/vms.js'; +import sageRouter from './routes/sage.js'; +import profileRouter from './routes/profile.js'; +import debugRouter from './routes/debug.js'; +import { contentLoader } from './services/ContentLoader.js'; +import { saveState } from './services/SaveState.js'; +import { progressionSystem } from './services/ProgressionSystem.js'; +import { trustSystem } from './services/TrustSystem.js'; +import { questEngine } from './services/QuestEngine.js'; +import { ticketService } from './services/TicketService.js'; +import { emailService } from './services/EmailService.js'; +import { vmManager } from './services/VMManager.js'; +import { shiftTimer } from './services/ShiftTimer.js'; +import { incidentScheduler } from './services/IncidentScheduler.js'; +import { shiftReviewService } from './services/ShiftReviewService.js'; +import { certificationService } from './services/CertificationService.js'; +import { behaviorTracker } from './services/BehaviorTracker.js'; +import { narrativePhaseTracker } from './services/NarrativePhaseTracker.js'; +import { hiddenHookTracker } from './services/HiddenHookTracker.js'; +import { endingEvaluator } from './services/EndingEvaluator.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +export async function initializeServices() { + try { + await contentLoader.load(); + } catch (error) { + throw new Error(`Failed to load content: ${error.message}`); + } + + try { + await saveState.load(); + } catch (error) { + throw new Error(`Failed to load save state: ${error.message}`); + } + + const state = saveState.get(); + + progressionSystem.initialize(state); + trustSystem.initialize(state); + behaviorTracker.initialize(state); + questEngine.initialize(state); + narrativePhaseTracker.initialize(state); + hiddenHookTracker.initialize(state); + ticketService.initialize(state); + emailService.initialize(state); + shiftReviewService.initialize(state); + certificationService.initialize(state); + shiftTimer.start(state); + incidentScheduler.start(); + eventBus.on('quest:completed', () => { endingEvaluator.checkTrigger(); }); + + try { + const workstationStatus = await vmManager.ensureWorkstationLive(); + if (!workstationStatus.ok) { + console.warn('Workstation VM not ready at bootstrap:', workstationStatus.reason ?? workstationStatus); + } + } catch (error) { + console.warn('Failed to ensure workstation VM is live:', error.message); + } +} + +export function createApp() { + const app = express(); + app.use(express.json()); + + // Sage KB — static site + article API, no session required + const sageStatic = path.resolve(__dirname, '../../sage'); + const sageArticles = path.resolve(__dirname, '../../content/sage-articles'); + const sendSageIndex = (_req, res) => res.sendFile(path.join(sageStatic, 'index.html')); + app.use('/sage/api', express.static(sageArticles, { index: false })); + app.use('/sage', express.static(sageStatic)); + app.get(['/sage', '/sage/'], sendSageIndex); + + // Company website — publicly accessible, no session required + const companyWebsite = path.resolve(__dirname, '../../company-website'); + app.use('/company', express.static(companyWebsite)); + app.get(['/company', '/company/'], (_req, res) => res.sendFile(path.join(companyWebsite, 'index.html'))); + + // Public assets — portraits, wallpaper + const publicDir = path.resolve(__dirname, '../public'); + app.use('/public', express.static(publicDir)); + + app.use('/api/session', sessionRouter); + app.use('/api', requireSession); + app.use('/api/state', stateRouter); + app.use('/api/tickets', ticketsRouter); + app.use('/api/mail', mailRouter); + app.use('/api/docs', docsRouter); + app.use('/api/vms', vmsRouter); + app.use('/api/sage', sageRouter); + app.use('/api/profile', profileRouter); + if (process.env.SC_DEBUG === '1') { + app.use('/api/debug', debugRouter); + } + + const frontendDist = path.resolve(__dirname, '../../frontend/dist'); + const hasFrontendDist = existsSync(frontendDist); + + if (hasFrontendDist) { + app.use(express.static(frontendDist)); + } else { + app.get('/', (_req, res) => { + res.status(200).json({ status: 'game server running', version: '0.1.0' }); + }); + } + + return app; +} + +export function attachWebSocket(server) { + const wss = new WebSocketServer({ server }); + + wss.on('connection', (socket) => { + socket.send( + JSON.stringify({ + type: 'connected', + payload: { trust: trustSystem.getScore() } + }) + ); + }); + + const forwardEvent = (eventName) => { + eventBus.on(eventName, (payload) => { + const message = JSON.stringify({ type: eventName, payload }); + + for (const client of wss.clients) { + if (client.readyState === WebSocket.OPEN) { + client.send(message); + } + } + }); + }; + + for (const eventName of [ + 'trust:changed', + 'mail:new', + 'progression:changed', + 'quest:activated', + 'quest:completed', + 'ticket:activated', + 'ticket:completed', + 'shift:tick', + 'shift:ended', + 'incident:alert', + 'certification:awarded' + ]) { + forwardEvent(eventName); + } + + return { server, wss }; +} + +export async function startServer({ port = Number(process.env.PORT ?? 3000), host = '0.0.0.0' } = {}) { + await initializeServices(); + const app = createApp(); + + let server; + const tlsCert = process.env.SC_TLS_CERT; + const tlsKey = process.env.SC_TLS_KEY; + + if (tlsCert && tlsKey && existsSync(tlsCert) && existsSync(tlsKey)) { + const { createServer: createHttpsServer } = await import('https'); + const { readFileSync } = await import('fs'); + server = createHttpsServer( + { cert: readFileSync(tlsCert), key: readFileSync(tlsKey) }, + app + ); + } else { + server = http.createServer(app); + } + + const { wss } = attachWebSocket(server); + + await new Promise((resolve) => { + server.listen(port, host, resolve); + }); + + const proto = (tlsCert && tlsKey && existsSync(tlsCert) && existsSync(tlsKey)) ? 'https' : 'http'; + return { app, server, wss, proto }; +} + +if (process.argv[1] && path.resolve(process.argv[1]) === __filename) { + startServer() + .then(({ server }) => { + const address = server.address(); + const actualPort = typeof address === 'object' && address ? address.port : process.env.PORT ?? 3000; + console.log(`Game server running on port ${actualPort}`); + }) + .catch((error) => { + console.error(error.message); + process.exit(1); + }); +} diff --git a/server/src/index.test.js b/server/src/index.test.js new file mode 100644 index 0000000..1a5e082 --- /dev/null +++ b/server/src/index.test.js @@ -0,0 +1,172 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; +import { rm } from 'fs/promises'; + +import { startServer } from './index.js'; +import { saveState } from './services/SaveState.js'; +import { shiftTimer } from './services/ShiftTimer.js'; +import { incidentScheduler } from './services/IncidentScheduler.js'; +import { vmManager } from './services/VMManager.js'; + +async function bootServer(testId) { + process.env.CONTENT_DIR = path.resolve(process.cwd(), '../content'); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-server-route-test-${testId}-${Date.now()}`); + + await saveState._writeQueue.catch(() => {}); + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); + await rm(process.env.SAVE_DIR, { recursive: true, force: true }); + + const originalEnsureWorkstationLive = vmManager.ensureWorkstationLive.bind(vmManager); + const originalGetState = vmManager.getState.bind(vmManager); + + vmManager.ensureWorkstationLive = async () => ({ ok: true, started: false }); + vmManager.getState = async (vmId) => (vmId === 'workstation' ? 'running' : 'shut off'); + + const { server, wss } = await startServer({ port: 0, host: '127.0.0.1' }); + const address = server.address(); + const port = typeof address === 'object' && address ? address.port : 0; + const baseUrl = `http://127.0.0.1:${port}`; + + async function sessionToken() { + const response = await fetch(`${baseUrl}/api/session`); + const payload = await response.json(); + return payload.token; + } + + async function authedFetch(pathname, init = {}) { + const token = await sessionToken(); + const headers = new Headers(init.headers ?? {}); + headers.set('Authorization', `Bearer ${token}`); + if (init.body && !headers.has('Content-Type')) { + headers.set('Content-Type', 'application/json'); + } + + return fetch(`${baseUrl}${pathname}`, { + ...init, + headers + }); + } + + async function close() { + shiftTimer.stop(); + incidentScheduler.stop(); + await Promise.all([ + new Promise((resolve, reject) => { + server.close((error) => { + if (error) reject(error); + else resolve(); + }); + }), + new Promise((resolve) => wss.close(resolve)) + ]); + + vmManager.ensureWorkstationLive = originalEnsureWorkstationLive; + vmManager.getState = originalGetState; + } + + return { baseUrl, authedFetch, sessionToken, close }; +} + +test('session endpoint issues a token and protected state route requires auth', async () => { + const ctx = await bootServer('session-auth'); + + try { + const sessionResponse = await fetch(`${ctx.baseUrl}/api/session`); + assert.equal(sessionResponse.status, 200); + const sessionPayload = await sessionResponse.json(); + assert.ok(typeof sessionPayload.token === 'string'); + + const unauthResponse = await fetch(`${ctx.baseUrl}/api/state`); + assert.equal(unauthResponse.status, 401); + + const authResponse = await ctx.authedFetch('/api/state'); + assert.equal(authResponse.status, 200); + const state = await authResponse.json(); + assert.equal(state.trust, 50); + assert.ok(state.shift); + assert.ok(Array.isArray(state.certifications)); + assert.ok(Array.isArray(state.shiftHistory)); + } finally { + await ctx.close(); + } +}); + +test('docs route enforces id validation and unlock gating', async () => { + const ctx = await bootServer('docs'); + + try { + const invalidResponse = await ctx.authedFetch('/api/docs/bad$id'); + assert.equal(invalidResponse.status, 400); + + const unlockedResponse = await ctx.authedFetch('/api/docs/onboarding'); + assert.equal(unlockedResponse.status, 200); + const unlockedDoc = await unlockedResponse.json(); + assert.equal(unlockedDoc.id, 'onboarding'); + + const lockedResponse = await ctx.authedFetch('/api/docs/server-admin-guide'); + assert.equal(lockedResponse.status, 403); + } finally { + await ctx.close(); + } +}); + +test('sage route validates input and returns authored quest help', async () => { + const ctx = await bootServer('sage'); + + try { + const badResponse = await ctx.authedFetch('/api/sage/message', { + method: 'POST', + body: JSON.stringify({ message: '' }) + }); + assert.equal(badResponse.status, 400); + + const goodResponse = await ctx.authedFetch('/api/sage/message', { + method: 'POST', + body: JSON.stringify({ message: 'give me a hint' }) + }); + assert.equal(goodResponse.status, 200); + const payload = await goodResponse.json(); + assert.match(payload.response, /\.ssh/i); + assert.ok(Array.isArray(payload.followUps)); + } finally { + await ctx.close(); + } +}); + +test('vms route returns current VM states under auth', async () => { + const ctx = await bootServer('vms'); + + try { + const response = await ctx.authedFetch('/api/vms'); + assert.equal(response.status, 200); + const vms = await response.json(); + assert.equal(vms.length, 3); + assert.equal(vms.find((vm) => vm.id === 'workstation')?.state, 'running'); + } finally { + await ctx.close(); + } +}); + +test('ticket and mail routes validate request payloads', async () => { + const ctx = await bootServer('payload-validation'); + + try { + const ticketResponse = await ctx.authedFetch('/api/tickets/T001/complete', { + method: 'POST', + body: JSON.stringify({ branchId: 123 }) + }); + assert.equal(ticketResponse.status, 400); + + const mailResponse = await ctx.authedFetch('/api/mail/mail-T001-initial/reply', { + method: 'POST', + body: JSON.stringify({ choice: '0' }) + }); + assert.equal(mailResponse.status, 400); + } finally { + await ctx.close(); + } +}); diff --git a/server/src/lib/command.js b/server/src/lib/command.js new file mode 100644 index 0000000..88dbcaf --- /dev/null +++ b/server/src/lib/command.js @@ -0,0 +1,72 @@ +import { spawn } from 'child_process'; + +export async function runCommand(binary, args = [], options = {}) { + const { + cwd = process.cwd(), + env = {}, + timeoutMs = 15000 + } = options; + + return await new Promise((resolve) => { + let stdout = ''; + let stderr = ''; + let timedOut = false; + let settled = false; + + const child = spawn(binary, args, { + cwd, + env: { ...process.env, ...env }, + stdio: ['ignore', 'pipe', 'pipe'] + }); + + const finalize = (result) => { + if (settled) { + return; + } + + settled = true; + clearTimeout(timer); + resolve({ + ...result, + stdout: result.stdout ?? stdout, + stderr: result.stderr ?? stderr, + command: [binary, ...args].join(' ') + }); + }; + + const timer = setTimeout(() => { + timedOut = true; + child.kill('SIGKILL'); + }, timeoutMs); + + child.stdout.on('data', (chunk) => { + stdout += chunk.toString(); + }); + + child.stderr.on('data', (chunk) => { + stderr += chunk.toString(); + }); + + child.on('error', (error) => { + finalize({ + ok: false, + code: 127, + stdout, + stderr: stderr || error.message, + timedOut: false, + signal: null + }); + }); + + child.on('close', (code, signal) => { + finalize({ + ok: code === 0 && !timedOut, + code: timedOut ? 124 : (code ?? 1), + stdout, + stderr, + timedOut, + signal + }); + }); + }); +} diff --git a/server/src/lib/eventBus.js b/server/src/lib/eventBus.js new file mode 100644 index 0000000..8eb8e2d --- /dev/null +++ b/server/src/lib/eventBus.js @@ -0,0 +1,3 @@ +import { EventEmitter } from 'events'; + +export const eventBus = new EventEmitter(); diff --git a/server/src/lib/session.js b/server/src/lib/session.js new file mode 100644 index 0000000..13dbddf --- /dev/null +++ b/server/src/lib/session.js @@ -0,0 +1,84 @@ +import crypto from 'crypto'; + +const SESSION_TTL_SECONDS = Number(process.env.SESSION_TTL_SECONDS ?? 60 * 60 * 12); + +function getSessionSecret() { + return process.env.SESSION_SECRET ?? 'sysadmin-chronicles-dev-session-secret'; +} + +function base64urlEncode(value) { + return Buffer.from(value).toString('base64url'); +} + +function base64urlDecode(value) { + return Buffer.from(value, 'base64url').toString('utf8'); +} + +function sign(value) { + return crypto.createHmac('sha256', getSessionSecret()).update(value).digest('base64url'); +} + +export function issueSessionToken() { + const issuedAt = Math.floor(Date.now() / 1000); + const payload = { + sid: crypto.randomUUID(), + iat: issuedAt, + exp: issuedAt + SESSION_TTL_SECONDS + }; + + const encodedPayload = base64urlEncode(JSON.stringify(payload)); + const signature = sign(encodedPayload); + return { + token: `${encodedPayload}.${signature}`, + expiresAt: new Date(payload.exp * 1000).toISOString() + }; +} + +export function verifySessionToken(token) { + if (typeof token !== 'string' || !token.includes('.')) { + return { ok: false, reason: 'invalid-token' }; + } + + const [encodedPayload, signature] = token.split('.', 2); + const expectedSignature = sign(encodedPayload); + + if (!signature) { + return { ok: false, reason: 'bad-signature' }; + } + + const provided = Buffer.from(signature); + const expected = Buffer.from(expectedSignature); + if (provided.length !== expected.length || !crypto.timingSafeEqual(provided, expected)) { + return { ok: false, reason: 'bad-signature' }; + } + + try { + const payload = JSON.parse(base64urlDecode(encodedPayload)); + const now = Math.floor(Date.now() / 1000); + if (Number(payload.exp ?? 0) <= now) { + return { ok: false, reason: 'expired' }; + } + + return { + ok: true, + payload + }; + } catch { + return { ok: false, reason: 'invalid-payload' }; + } +} + +export function requireSession(req, res, next) { + const header = req.headers.authorization ?? ''; + const match = header.match(/^Bearer\s+(.+)$/i); + const token = match?.[1] ?? ''; + const verification = verifySessionToken(token); + + if (!verification.ok) { + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + req.session = verification.payload; + next(); +} diff --git a/server/src/lib/ssh.js b/server/src/lib/ssh.js new file mode 100644 index 0000000..7f57b62 --- /dev/null +++ b/server/src/lib/ssh.js @@ -0,0 +1,49 @@ +import os from 'os'; +import path from 'path'; + +import { runCommand } from './command.js'; + +const DEFAULT_SSH_OPTIONS = [ + '-o', 'StrictHostKeyChecking=no', + '-o', 'UserKnownHostsFile=/dev/null', + '-o', 'BatchMode=yes', + '-o', 'LogLevel=ERROR' +]; + +export function expandHomePath(value) { + if (!value) { + return value; + } + + if (value === '~') { + return os.homedir(); + } + + if (value.startsWith('~/')) { + return path.join(os.homedir(), value.slice(2)); + } + + return value; +} + +export async function runSSH({ + host, + user, + command, + keyPath = process.env.SSH_KEY_PATH ?? '~/.ssh/sc_host_key', + timeoutSec = 15, + connectTimeoutSec = 5, + extraArgs = [] +}) { + const resolvedKeyPath = expandHomePath(keyPath); + const args = [ + ...DEFAULT_SSH_OPTIONS, + '-o', `ConnectTimeout=${connectTimeoutSec}`, + '-i', resolvedKeyPath, + ...extraArgs, + `${user}@${host}`, + command + ]; + + return await runCommand('ssh', args, { timeoutMs: timeoutSec * 1000 }); +} diff --git a/server/src/lib/utils.js b/server/src/lib/utils.js new file mode 100644 index 0000000..e1d267f --- /dev/null +++ b/server/src/lib/utils.js @@ -0,0 +1,15 @@ +export function toArray(value) { + return Array.isArray(value) ? value : []; +} + +export function normalizeWorldFlag(value) { + return typeof value === 'string' && value.startsWith('world_flag:') + ? value.slice('world_flag:'.length) + : value; +} + +export function createError(message, statusCode) { + const error = new Error(message); + error.statusCode = statusCode; + return error; +} diff --git a/server/src/lib/virsh.js b/server/src/lib/virsh.js new file mode 100644 index 0000000..bb2d016 --- /dev/null +++ b/server/src/lib/virsh.js @@ -0,0 +1,7 @@ +import { runCommand } from './command.js'; + +export async function runVirsh(args = [], options = {}) { + const uri = options.uri ?? process.env.LIBVIRT_URI ?? 'qemu:///system'; + const timeoutMs = (options.timeoutSec ?? 10) * 1000; + return await runCommand('virsh', ['--connect', uri, ...args], { timeoutMs }); +} diff --git a/server/src/routes/debug.js b/server/src/routes/debug.js new file mode 100644 index 0000000..b0fd32f --- /dev/null +++ b/server/src/routes/debug.js @@ -0,0 +1,43 @@ +import { Router } from 'express'; +import { saveState } from '../services/SaveState.js'; +import { behaviorTracker } from '../services/BehaviorTracker.js'; +import { narrativePhaseTracker } from '../services/NarrativePhaseTracker.js'; +import { endingEvaluator } from '../services/EndingEvaluator.js'; + +const VALID_PHASES = ['normal_work', 'unease', 'suspicion', 'investigation', 'conflict', 'resolution']; + +const router = Router(); + +router.get('/state', (_req, res) => { + res.json(saveState.get()); +}); + +router.get('/ending', (_req, res) => { + res.json(endingEvaluator.evaluate()); +}); + +router.post('/behavior', (req, res) => { + const { curiosity, obedience, risk, suspicion } = req.body ?? {}; + const isNum = (v) => typeof v === 'number' && v >= 0 && v <= 100; + const override = {}; + if (isNum(curiosity)) override.curiosity = curiosity; + if (isNum(obedience)) override.obedience = obedience; + if (isNum(risk)) override.risk = risk; + if (isNum(suspicion)) override.suspicion = suspicion; + if (Object.keys(override).length === 0) { + return res.status(400).json({ error: 'No valid fields. Each must be a number 0–100.' }); + } + behaviorTracker.setSnapshot(override); + res.json(behaviorTracker.getSnapshot()); +}); + +router.post('/phase', (req, res) => { + const { phase } = req.body ?? {}; + if (!VALID_PHASES.includes(phase)) { + return res.status(400).json({ error: `phase must be one of: ${VALID_PHASES.join(', ')}` }); + } + narrativePhaseTracker.forcePhase(phase); + res.json({ phase: narrativePhaseTracker.getPhase() }); +}); + +export default router; diff --git a/server/src/routes/docs.js b/server/src/routes/docs.js new file mode 100644 index 0000000..b1a8706 --- /dev/null +++ b/server/src/routes/docs.js @@ -0,0 +1,44 @@ +import { Router } from 'express'; +import { contentLoader } from '../services/ContentLoader.js'; +import { progressionSystem } from '../services/ProgressionSystem.js'; + +const router = Router(); +const DOC_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9-]*$/; + +router.get('/', (_req, res) => { + const docs = [...contentLoader.docs.values()] + .sort((left, right) => left.title.localeCompare(right.title)) + .map((doc) => ({ + id: doc.id, + title: doc.title, + locked: !progressionSystem.hasDoc(doc.id) + })); + + res.json(docs); +}); + +router.get('/:id', (req, res) => { + if (!DOC_ID_PATTERN.test(req.params.id)) { + res.status(400).json({ error: 'Invalid document id' }); + return; + } + + const doc = contentLoader.get('docs', req.params.id); + if (!doc) { + res.status(404).json({ error: 'Document not found' }); + return; + } + + if (!progressionSystem.hasDoc(doc.id)) { + res.status(403).json({ error: 'Document locked' }); + return; + } + + res.json({ + id: doc.id, + title: doc.title, + content: doc.body + }); +}); + +export default router; diff --git a/server/src/routes/mail.js b/server/src/routes/mail.js new file mode 100644 index 0000000..812d8ba --- /dev/null +++ b/server/src/routes/mail.js @@ -0,0 +1,48 @@ +import { Router } from 'express'; +import { emailService } from '../services/EmailService.js'; + +const router = Router(); + +router.get('/', (_req, res) => { + res.json(emailService.getAll()); +}); + +router.get('/:id', (req, res) => { + const mail = emailService.getById(req.params.id); + if (!mail) { + res.status(404).json({ error: 'Mail not found' }); + return; + } + + res.json(mail); +}); + +router.post('/:id/read', (req, res) => { + try { + emailService.markRead(req.params.id); + res.json({ ok: true }); + } catch (error) { + const statusCode = error.statusCode ?? 500; + res.status(statusCode).json({ error: error.message }); + } +}); + +router.post('/:id/reply', (req, res) => { + const rawChoice = req.body?.choice; + if (!Number.isInteger(rawChoice)) { + res.status(400).json({ error: 'choice must be an integer' }); + return; + } + + const choice = Number(rawChoice); + + try { + const result = emailService.reply(req.params.id, choice); + res.json(result); + } catch (error) { + const statusCode = error.statusCode ?? 500; + res.status(statusCode).json({ error: error.message }); + } +}); + +export default router; diff --git a/server/src/routes/profile.js b/server/src/routes/profile.js new file mode 100644 index 0000000..34dfad3 --- /dev/null +++ b/server/src/routes/profile.js @@ -0,0 +1,31 @@ +import { Router } from 'express'; +import { saveState } from '../services/SaveState.js'; + +const router = Router(); + +const VALID_PORTRAITS = [ + 'player-silhouette', + 'player-01', + 'player-02', + 'player-03', + 'player-04', + 'player-05' +]; + +router.get('/', (_req, res) => { + const state = saveState.get(); + res.json({ portrait: state.player_portrait ?? 'player-silhouette' }); +}); + +router.put('/', (req, res) => { + const { portrait } = req.body ?? {}; + + if (!portrait || !VALID_PORTRAITS.includes(portrait)) { + return res.status(400).json({ error: 'Invalid portrait', valid: VALID_PORTRAITS }); + } + + saveState.set({ player_portrait: portrait }); + res.json({ portrait }); +}); + +export default router; diff --git a/server/src/routes/sage.js b/server/src/routes/sage.js new file mode 100644 index 0000000..30008fa --- /dev/null +++ b/server/src/routes/sage.js @@ -0,0 +1,27 @@ +import { Router } from 'express'; +import { sageService } from '../services/SageService.js'; + +const router = Router(); + +router.post('/message', (req, res) => { + const message = req.body?.message; + if (typeof message !== 'string') { + res.status(400).json({ error: 'Message must be a string' }); + return; + } + + const trimmed = message.trim(); + if (!trimmed) { + res.status(400).json({ error: 'Message cannot be empty' }); + return; + } + + if (trimmed.length > 500) { + res.status(400).json({ error: 'Message too long' }); + return; + } + + res.json(sageService.reply(trimmed)); +}); + +export default router; diff --git a/server/src/routes/session.js b/server/src/routes/session.js new file mode 100644 index 0000000..b7f236a --- /dev/null +++ b/server/src/routes/session.js @@ -0,0 +1,10 @@ +import { Router } from 'express'; +import { issueSessionToken } from '../lib/session.js'; + +const router = Router(); + +router.get('/', (_req, res) => { + res.json(issueSessionToken()); +}); + +export default router; diff --git a/server/src/routes/state.js b/server/src/routes/state.js new file mode 100644 index 0000000..694ce03 --- /dev/null +++ b/server/src/routes/state.js @@ -0,0 +1,31 @@ +import { Router } from 'express'; +import { saveState } from '../services/SaveState.js'; +import { trustSystem } from '../services/TrustSystem.js'; +import { progressionSystem } from '../services/ProgressionSystem.js'; +import { shiftTimer } from '../services/ShiftTimer.js'; +import { narrativePhaseTracker } from '../services/NarrativePhaseTracker.js'; +import { hiddenHookTracker } from '../services/HiddenHookTracker.js'; +import { endingEvaluator } from '../services/EndingEvaluator.js'; + +const router = Router(); + +router.get('/', (_req, res) => { + const state = saveState.get(); + res.json({ + trust: trustSystem.getScore(), + shiftNumber: state.shift_number, + shiftStartedAt: state.shift_started_at, + shift: shiftTimer.getSnapshot(state), + worldFlags: state.world_flags, + progression: progressionSystem._snapshot(), + certifications: state.certifications, + currentShiftStats: state.current_shift_stats ?? null, + shiftHistory: state.shift_history ?? [], + narrativePhase: narrativePhaseTracker.getPhase(), + hiddenHooksDiscovered: hiddenHookTracker.getDiscovered(), + accessLevel: progressionSystem.getAccessLevel(), + endingTrajectory: endingEvaluator.evaluate() + }); +}); + +export default router; diff --git a/server/src/routes/tickets.js b/server/src/routes/tickets.js new file mode 100644 index 0000000..a66c88a --- /dev/null +++ b/server/src/routes/tickets.js @@ -0,0 +1,33 @@ +import { Router } from 'express'; +import { ticketService } from '../services/TicketService.js'; + +const router = Router(); + +router.get('/', (_req, res) => { + res.json(ticketService.getAll()); +}); + +router.get('/:id', (req, res) => { + const ticket = ticketService.getDetail(req.params.id); + if (!ticket) { + res.status(404).json({ error: 'Ticket not found' }); + return; + } + + res.json(ticket); +}); + +router.post('/:id/complete', async (req, res) => { + const branchId = req.body?.branchId ?? null; + if (branchId !== null && typeof branchId !== 'string') { + res.status(400).json({ error: 'branchId must be a string when provided' }); + return; + } + + const result = await ticketService.markComplete(req.params.id, { + branchId + }); + res.json(result); +}); + +export default router; diff --git a/server/src/routes/vms.js b/server/src/routes/vms.js new file mode 100644 index 0000000..5dbaf31 --- /dev/null +++ b/server/src/routes/vms.js @@ -0,0 +1,24 @@ +import { Router } from 'express'; +import { contentLoader } from '../services/ContentLoader.js'; +import { progressionSystem } from '../services/ProgressionSystem.js'; +import { vmManager } from '../services/VMManager.js'; + +const router = Router(); + +router.get('/', async (_req, res) => { + const vms = await Promise.all( + [...contentLoader.vmProfiles.values()] + .sort((left, right) => left.id.localeCompare(right.id)) + .map(async (profile) => ({ + id: profile.id, + domain: vmManager.getDomainName(profile.id), + hostname: profile.hostname ?? profile.id, + state: await vmManager.getState(profile.id), + unlocked: profile.id === 'workstation' ? true : progressionSystem.hasVM(profile.id) + })) + ); + + res.json(vms); +}); + +export default router; diff --git a/server/src/services/BehaviorTracker.js b/server/src/services/BehaviorTracker.js new file mode 100644 index 0000000..e352d51 --- /dev/null +++ b/server/src/services/BehaviorTracker.js @@ -0,0 +1,54 @@ +import { eventBus } from '../lib/eventBus.js'; +import { saveState } from './SaveState.js'; + +class BehaviorTracker { + constructor() { + this._curiosity = 50; + this._obedience = 50; + this._risk = 50; + this._suspicion = 0; + } + + initialize(state) { + const b = state?.behavior ?? {}; + this._curiosity = typeof b.curiosity === 'number' ? b.curiosity : 50; + this._obedience = typeof b.obedience === 'number' ? b.obedience : 50; + this._risk = typeof b.risk === 'number' ? b.risk : 50; + this._suspicion = typeof b.suspicion === 'number' ? b.suspicion : 0; + } + + apply(impact) { + if (!impact || typeof impact !== 'object') return; + const clamp = (v) => Math.max(0, Math.min(100, v)); + if (typeof impact.curiosity_delta === 'number') this._curiosity = clamp(this._curiosity + impact.curiosity_delta); + if (typeof impact.obedience_delta === 'number') this._obedience = clamp(this._obedience + impact.obedience_delta); + if (typeof impact.risk_delta === 'number') this._risk = clamp(this._risk + impact.risk_delta); + if (typeof impact.suspicion_delta === 'number') this._suspicion = clamp(this._suspicion + impact.suspicion_delta); + this._persist(); + eventBus.emit('behavior:changed', this.getSnapshot()); + } + + getSnapshot() { + return { + curiosity: this._curiosity, + obedience: this._obedience, + risk: this._risk, + suspicion: this._suspicion + }; + } + + setSnapshot(override) { + const clamp = (v) => Math.max(0, Math.min(100, v)); + if (typeof override.curiosity === 'number') this._curiosity = clamp(override.curiosity); + if (typeof override.obedience === 'number') this._obedience = clamp(override.obedience); + if (typeof override.risk === 'number') this._risk = clamp(override.risk); + if (typeof override.suspicion === 'number') this._suspicion = clamp(override.suspicion); + this._persist(); + } + + _persist() { + saveState.set({ behavior: this.getSnapshot() }); + } +} + +export const behaviorTracker = new BehaviorTracker(); diff --git a/server/src/services/BehaviorTracker.test.js b/server/src/services/BehaviorTracker.test.js new file mode 100644 index 0000000..92df71e --- /dev/null +++ b/server/src/services/BehaviorTracker.test.js @@ -0,0 +1,95 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; + +import { behaviorTracker } from './BehaviorTracker.js'; +import { saveState } from './SaveState.js'; + +async function isolateSaveState(testId) { + await saveState._writeQueue.catch(() => {}); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-test-${testId}-${Date.now()}`); + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); +} + +test('initialize with explicit values', async () => { + await isolateSaveState('behavior-explicit'); + behaviorTracker.initialize({ behavior: { curiosity: 30, obedience: 70, risk: 40, suspicion: 10 } }); + + assert.deepEqual(behaviorTracker.getSnapshot(), { + curiosity: 30, + obedience: 70, + risk: 40, + suspicion: 10 + }); +}); + +test('initialize with no behavior key uses defaults', async () => { + await isolateSaveState('behavior-defaults'); + behaviorTracker.initialize({}); + + assert.deepEqual(behaviorTracker.getSnapshot(), { + curiosity: 50, + obedience: 50, + risk: 50, + suspicion: 0 + }); +}); + +test('apply adds positive delta', async () => { + await isolateSaveState('behavior-positive-delta'); + behaviorTracker.initialize({ behavior: { curiosity: 60, obedience: 50, risk: 50, suspicion: 0 } }); + + behaviorTracker.apply({ curiosity_delta: 5 }); + + assert.equal(behaviorTracker.getSnapshot().curiosity, 65); +}); + +test('apply subtracts negative delta', async () => { + await isolateSaveState('behavior-negative-delta'); + behaviorTracker.initialize({ behavior: { curiosity: 50, obedience: 50, risk: 50, suspicion: 0 } }); + + behaviorTracker.apply({ curiosity_delta: -15 }); + + assert.equal(behaviorTracker.getSnapshot().curiosity, 35); +}); + +test('apply clamps at 0', async () => { + await isolateSaveState('behavior-clamp-low'); + behaviorTracker.initialize({ behavior: { curiosity: 5, obedience: 50, risk: 50, suspicion: 0 } }); + + behaviorTracker.apply({ curiosity_delta: -20 }); + + assert.equal(behaviorTracker.getSnapshot().curiosity, 0); +}); + +test('apply clamps at 100', async () => { + await isolateSaveState('behavior-clamp-high'); + behaviorTracker.initialize({ behavior: { curiosity: 50, obedience: 50, risk: 95, suspicion: 0 } }); + + behaviorTracker.apply({ risk_delta: 20 }); + + assert.equal(behaviorTracker.getSnapshot().risk, 100); +}); + +test('setSnapshot overrides values', async () => { + await isolateSaveState('behavior-set-snapshot'); + behaviorTracker.initialize({ behavior: { curiosity: 50, obedience: 50, risk: 50, suspicion: 0 } }); + + behaviorTracker.setSnapshot({ curiosity: 80, obedience: 20 }); + const snapshot = behaviorTracker.getSnapshot(); + + assert.equal(snapshot.curiosity, 80); + assert.equal(snapshot.obedience, 20); +}); + +test('apply ignores non-numeric delta', async () => { + await isolateSaveState('behavior-ignore-nonnumeric'); + behaviorTracker.initialize({ behavior: { curiosity: 50, obedience: 50, risk: 50, suspicion: 0 } }); + + behaviorTracker.apply({ curiosity_delta: 'bad' }); + + assert.equal(behaviorTracker.getSnapshot().curiosity, 50); +}); diff --git a/server/src/services/CertificationService.js b/server/src/services/CertificationService.js new file mode 100644 index 0000000..42b3f96 --- /dev/null +++ b/server/src/services/CertificationService.js @@ -0,0 +1,136 @@ +import { eventBus } from '../lib/eventBus.js'; +import { toArray } from '../lib/utils.js'; +import { emailService } from './EmailService.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; + +const CERTIFICATIONS = [ + { + id: 'workstation-foundations', + title: 'Axiom Works: Workstation Foundations', + description: 'Basic Linux file system navigation, permissions, and SSH troubleshooting.', + quest_ids: ['Q001', 'Q002'] + }, + { + id: 'service-administration', + title: 'Axiom Works: Service Administration', + description: 'systemd, service debugging, and safe operational recovery.', + quest_ids: ['Q002', 'Q003'] + }, + { + id: 'log-analysis', + title: 'Axiom Works: Log Analysis', + description: 'Reading auth and service logs, finding recurrence, and tracing root cause.', + quest_ids: ['Q003', 'Q006'] + }, + { + id: 'network-basics', + title: 'Axiom Works: Network Basics', + description: 'Service exposure, deployment checks, and basic network validation.', + quest_ids: ['Q004', 'Q005'] + }, + { + id: 'security-awareness', + title: 'Axiom Works: Security Awareness', + description: 'Ownership, access controls, and corrective SSH hardening.', + quest_ids: ['Q007'] + } +]; + +export class CertificationService { + constructor({ + bus = eventBus, + email = emailService, + quests = questEngine, + save = saveState + } = {}) { + this.bus = bus; + this.email = email; + this.quests = quests; + this.save = save; + this._handlersBound = false; + this._onQuestCompleted = this._handleQuestCompleted.bind(this); + } + + initialize(state = this.save.get()) { + if (!Array.isArray(state?.certifications)) { + this.save.set({ certifications: [] }); + } + + this._bindHandlers(); + this._evaluateAll(); + } + + dispose() { + if (!this._handlersBound) { + return; + } + + this.bus.off('quest:completed', this._onQuestCompleted); + this._handlersBound = false; + } + + _bindHandlers() { + this.dispose(); + this.bus.on('quest:completed', this._onQuestCompleted); + this._handlersBound = true; + } + + _handleQuestCompleted() { + this._evaluateAll(); + } + + _evaluateAll() { + for (const certification of CERTIFICATIONS) { + if (this._alreadyAwarded(certification.id)) { + continue; + } + + const complete = certification.quest_ids.every((questId) => this.quests.isCompleted(questId)); + if (!complete) { + continue; + } + + this._award(certification); + } + } + + _alreadyAwarded(certificationId) { + return toArray(this.save.get()?.certifications).some((entry) => entry.id === certificationId); + } + + _award(certification) { + const awardedAt = new Date().toISOString(); + const record = { + ...certification, + awarded_at: awardedAt + }; + + this.save.set({ + certifications: [...toArray(this.save.get()?.certifications), record] + }); + + this.email.send({ + id: `mail-cert-${certification.id}-${Date.now()}`, + from: 'HR Bot ', + subject: `Certification awarded: ${certification.title}`, + body: [ + `Internal certification granted: ${certification.title}`, + '', + certification.description, + '', + `Completed quest chain: ${certification.quest_ids.join(', ')}` + ].join('\n'), + attachments: [], + replyOptions: [] + }); + + this.bus.emit('certification:awarded', { + id: certification.id, + title: certification.title, + awarded_at: awardedAt + }); + } +} + +export const certificationService = new CertificationService(); diff --git a/server/src/services/CertificationService.test.js b/server/src/services/CertificationService.test.js new file mode 100644 index 0000000..5218109 --- /dev/null +++ b/server/src/services/CertificationService.test.js @@ -0,0 +1,62 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { EventEmitter } from 'node:events'; + +import { CertificationService } from './CertificationService.js'; + +function createSave(initialState) { + let state = structuredClone(initialState); + return { + get() { + return state; + }, + set(partial) { + state = { + ...state, + ...partial + }; + return state; + } + }; +} + +test('CertificationService awards workstation foundations once prerequisite quests are complete', () => { + const bus = new EventEmitter(); + const sent = []; + const save = createSave({ certifications: [] }); + const completed = new Set(); + + const service = new CertificationService({ + bus, + save, + quests: { + isCompleted(questId) { + return completed.has(questId); + } + }, + email: { + send(payload) { + sent.push(payload); + return payload; + } + } + }); + + service.initialize(save.get()); + assert.equal(save.get().certifications.length, 0); + + completed.add('Q001'); + bus.emit('quest:completed', { questId: 'Q001' }); + assert.equal(save.get().certifications.length, 0); + + completed.add('Q002'); + bus.emit('quest:completed', { questId: 'Q002' }); + + assert.equal(save.get().certifications.length, 1); + assert.equal(save.get().certifications[0].id, 'workstation-foundations'); + assert.equal(sent.length, 1); + + bus.emit('quest:completed', { questId: 'Q002' }); + assert.equal(save.get().certifications.length, 1); + assert.equal(sent.length, 1); +}); diff --git a/server/src/services/ContentLoader.js b/server/src/services/ContentLoader.js new file mode 100644 index 0000000..fd9797d --- /dev/null +++ b/server/src/services/ContentLoader.js @@ -0,0 +1,69 @@ +import path from 'path'; +import { readdir, readFile } from 'fs/promises'; + +class ContentLoader { + constructor() { + this.contentDir = null; + this.tickets = new Map(); + this.quests = new Map(); + this.docs = new Map(); + this.dialogue = new Map(); + this.incidents = new Map(); + this.pressureProfiles = new Map(); + this.vmProfiles = new Map(); + this.trustUnlocks = []; + this.worldFlagsRegistry = {}; + } + + async load() { + this.contentDir = path.resolve(process.cwd(), process.env.CONTENT_DIR ?? '../content'); + this.tickets = await this._loadCollection('tickets'); + this.quests = await this._loadCollection('quests'); + this.docs = await this._loadCollection('docs'); + this.dialogue = await this._loadCollection('dialogue'); + this.incidents = await this._loadCollection('incidents'); + this.pressureProfiles = await this._loadCollection('pressure_profiles'); + this.vmProfiles = await this._loadCollection('vm_profiles'); + this.trustUnlocks = await this._loadJsonFile(path.join(this.contentDir, 'progression', 'trust_unlocks.json')); + this.worldFlagsRegistry = await this._loadJsonFile(path.join(this.contentDir, 'world_flags', 'world_flags.json')); + return this; + } + + get(type, id) { + const collection = this[type]; + if (!(collection instanceof Map)) { + return undefined; + } + return collection.get(id); + } + + async _loadCollection(subdirectory) { + const directory = path.join(this.contentDir, subdirectory); + const entries = await readdir(directory, { withFileTypes: true }); + const map = new Map(); + + for (const entry of entries) { + if (!entry.isFile()) { + continue; + } + + if (!entry.name.endsWith('.json') || entry.name.endsWith('.bak') || entry.name.includes('SPLIT_DONE')) { + continue; + } + + const payload = await this._loadJsonFile(path.join(directory, entry.name)); + if (payload?.id) { + map.set(payload.id, payload); + } + } + + return map; + } + + async _loadJsonFile(filePath) { + const raw = await readFile(filePath, 'utf8'); + return JSON.parse(raw); + } +} + +export const contentLoader = new ContentLoader(); diff --git a/server/src/services/EmailService.js b/server/src/services/EmailService.js new file mode 100644 index 0000000..1cbdff4 --- /dev/null +++ b/server/src/services/EmailService.js @@ -0,0 +1,254 @@ +import { eventBus } from '../lib/eventBus.js'; +import { createError } from '../lib/utils.js'; +import { contentLoader } from './ContentLoader.js'; +import { saveState } from './SaveState.js'; + +const CHARACTER_EMAILS = { + marcus: 'Marcus Webb ', + sarah: 'Sarah Chen ', + priya: 'Priya Nair ', + alex: 'Alex Mercer ', + dave: 'Dave Okonkwo ', + monitoring: 'Monitoring ' +}; + +class EmailService { + constructor() { + this._mail = []; + } + + initialize(state) { + this._mail = Array.isArray(state.mail) ? state.mail.map((mail) => this._normalizeMail(mail)) : []; + + if (this._mail.length === 0) { + this.send({ + id: 'mail-T001-initial', + from: 'Marcus Webb ', + subject: 'Your workstation access', + body: 'Hey, welcome to the team. HR said you started today so I got you set up with an account on ares. The provisioning script runs automatically but it does not handle SSH keys \u2014 you will need to add yours manually. Your public key should be in the onboarding doc. Let me know if you get stuck.\n\n\u2014 Marcus', + attachments: ['docs/onboarding.json'], + replyOptions: [ + { label: 'Got it, I\'ll get that sorted.', dialogue_node: 'marcus-Q001-reply-a' }, + { label: 'Where do I find the onboarding doc?', dialogue_node: 'marcus-Q001-reply-b' } + ] + }); + } + } + + getAll() { + return this._mail.map((mail) => ({ + id: mail.id, + from: mail.from, + subject: mail.subject, + timestamp: mail.timestamp, + read: mail.read, + replied: mail.replied + })); + } + + getById(id) { + return this._mail.find((mail) => mail.id === id) ?? null; + } + + markRead(id) { + const mail = this.getById(id); + if (!mail) { + throw createError(`Unknown mail: ${id}`, 404); + } + + mail.read = true; + this._persist(); + } + + send({ id, from, subject, body, attachments = [], replyOptions = [] }) { + const record = this._normalizeMail({ + id, + from, + subject, + body, + attachments, + reply_options: replyOptions, + read: false, + replied: false, + timestamp: new Date().toISOString() + }); + + this._mail.push(record); + this._persist(); + eventBus.emit('mail:new', { id, from, subject }); + return record; + } + + reply(mailId, choiceIndex) { + const mail = this.getById(mailId); + if (!mail) { + throw createError(`Unknown mail: ${mailId}`, 404); + } + + const choices = mail.reply_options ?? []; + if (!Number.isInteger(choiceIndex) || choiceIndex < 0 || choiceIndex >= choices.length) { + throw createError('Invalid reply choice', 400); + } + + mail.replied = true; + const selectedChoice = choices[choiceIndex]; + const responseBody = this._resolveDialogueBody(selectedChoice.dialogue_node, mail, selectedChoice); + this._persist(); + + this.send({ + id: `${mailId}-reply-${choiceIndex}`, + from: mail.from, + subject: `Re: ${mail.subject}`, + body: responseBody, + attachments: [], + replyOptions: [] + }); + + return { ok: true }; + } + + sendDialogueFollowUp(dialogueNodeId, options = {}) { + const resolved = this._resolveDialogueMessage(dialogueNodeId); + if (!resolved?.body) { + return null; + } + + const { + questId = resolved.questId, + ticketId = resolved.questId ? contentLoader.get('quests', resolved.questId)?.ticket_id : null, + subjectPrefix = 'Follow-up', + idPrefix = 'mail-followup' + } = options; + + const from = CHARACTER_EMAILS[resolved.character] ?? CHARACTER_EMAILS.monitoring; + const subject = ticketId + ? `${subjectPrefix}: ${ticketId}` + : `${subjectPrefix}: ${dialogueNodeId}`; + + return this.send({ + id: `${idPrefix}-${dialogueNodeId}-${Date.now()}`, + from, + subject, + body: resolved.body, + attachments: [], + replyOptions: [] + }); + } + + _persist() { + saveState.set({ mail: this._mail }); + } + + _normalizeMail(mail) { + return { + ...mail, + attachments: [...(mail.attachments ?? [])], + reply_options: [...(mail.reply_options ?? mail.replyOptions ?? [])], + read: Boolean(mail.read), + replied: Boolean(mail.replied), + timestamp: mail.timestamp ?? new Date().toISOString() + }; + } + + _resolveDialogueBody(dialogueNode, mail, choice) { + const resolvedMessage = this._resolveDialogueMessage(dialogueNode, mail); + if (resolvedMessage?.body) { + return resolvedMessage.body; + } + + return choice?.label + ? `Noted.\n\n${choice.label}` + : 'Noted.'; + } + + _resolveDialogueMessage(dialogueNode, mail = null) { + const directMatch = contentLoader.get('dialogue', dialogueNode); + if (directMatch?.body) { + return { + body: directMatch.body, + character: directMatch.character ?? null, + questId: directMatch.quest_id ?? null + }; + } + + const parsed = this._parseDialogueNodeReference(dialogueNode); + if (parsed) { + const dialogue = contentLoader.get('dialogue', parsed.baseId); + const message = dialogue?.messages?.find((entry) => entry.stage === parsed.stage); + if (message?.body) { + return { + body: message.body, + character: dialogue.character ?? null, + questId: dialogue.quest_id ?? null + }; + } + } + + const fallbackMatch = this._findDialogueFallback(dialogueNode, mail); + if (fallbackMatch?.dialogue?.body) { + return { + body: fallbackMatch.dialogue.body, + character: fallbackMatch.dialogue.character ?? null, + questId: fallbackMatch.dialogue.quest_id ?? null + }; + } + + if (Array.isArray(fallbackMatch?.dialogue?.messages) && fallbackMatch.dialogue.messages.length > 0) { + return { + body: fallbackMatch.dialogue.messages[0].body, + character: fallbackMatch.dialogue.character ?? null, + questId: fallbackMatch.dialogue.quest_id ?? null + }; + } + + return null; + } + + _findDialogueFallback(dialogueNode, mail) { + const parsed = this._parseDialogueNodeReference(dialogueNode); + const baseId = parsed?.baseId ?? dialogueNode.replace(/-reply-[a-z0-9]+$/i, ''); + const candidates = [baseId]; + const ticketMatch = baseId.match(/^([^-]+)-T(\d{3})$/i); + + if (ticketMatch) { + const [, character, ticketNumber] = ticketMatch; + const ticket = contentLoader.get('tickets', `T${ticketNumber}`); + if (ticket?.linked_quest) { + candidates.push(`${character}-${ticket.linked_quest}`); + } + } + + candidates.push(mail?.from?.toLowerCase().includes('marcus') ? 'marcus-Q001' : null); + + for (const candidate of candidates) { + if (!candidate) { + continue; + } + + const dialogue = contentLoader.get('dialogue', candidate); + if (dialogue) { + return { dialogue, baseId: candidate }; + } + } + + return null; + } + + _parseDialogueNodeReference(dialogueNode) { + if (typeof dialogueNode !== 'string') { + return null; + } + + const match = dialogueNode.match(/^([^-]+-Q\d{3})-(.+)$/i); + if (!match) { + return null; + } + + return { + baseId: match[1], + stage: match[2] + }; + } +} + +export const emailService = new EmailService(); diff --git a/server/src/services/EndingEvaluator.js b/server/src/services/EndingEvaluator.js new file mode 100644 index 0000000..2640cd7 --- /dev/null +++ b/server/src/services/EndingEvaluator.js @@ -0,0 +1,55 @@ +import { eventBus } from '../lib/eventBus.js'; +import { saveState } from './SaveState.js'; +import { behaviorTracker } from './BehaviorTracker.js'; +import { hiddenHookTracker } from './HiddenHookTracker.js'; +import { narrativePhaseTracker } from './NarrativePhaseTracker.js'; + +const PHASE_RANK = { + normal_work: 0, unease: 1, suspicion: 2, + investigation: 3, conflict: 4, resolution: 5 +}; + +class EndingEvaluator { + evaluate() { + const state = saveState.get() ?? {}; + const trust = Number(state.trust ?? 50); + const { curiosity, obedience, risk } = behaviorTracker.getSnapshot(); + const hooksDiscovered = hiddenHookTracker.getDiscovered().length; + const phaseRank = PHASE_RANK[narrativePhaseTracker.getPhase()] ?? 0; + + const candidates = []; + + // exposure: investigative player who found hidden hooks + if (curiosity >= 65 && hooksDiscovered >= 2 && phaseRank >= PHASE_RANK.investigation) { + candidates.push('exposure'); + } + // chaos: high risk, low trust + if (risk >= 65 && trust <= 40) { + candidates.push('chaos'); + } + // corporate_loop: compliant, trusted, incurious + if (obedience >= 65 && curiosity <= 40 && trust >= 65) { + candidates.push('corporate_loop'); + } + // burnout: passive disengagement + if (curiosity <= 35 && obedience <= 40) { + candidates.push('burnout'); + } + + // Priority: exposure > chaos > corporate_loop > burnout + const priority = ['exposure', 'chaos', 'corporate_loop', 'burnout']; + const active = priority.find((e) => candidates.includes(e)) ?? 'undetermined'; + + return { active, candidates }; + } + + checkTrigger() { + const result = this.evaluate(); + if (narrativePhaseTracker.getPhase() === 'resolution' && result.active !== 'undetermined') { + eventBus.emit('ending:triggered', { ending: result.active }); + } + return result; + } +} + +export const endingEvaluator = new EndingEvaluator(); diff --git a/server/src/services/EndingEvaluator.test.js b/server/src/services/EndingEvaluator.test.js new file mode 100644 index 0000000..0ec9ea1 --- /dev/null +++ b/server/src/services/EndingEvaluator.test.js @@ -0,0 +1,116 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; + +import { behaviorTracker } from './BehaviorTracker.js'; +import { narrativePhaseTracker } from './NarrativePhaseTracker.js'; +import { hiddenHookTracker } from './HiddenHookTracker.js'; +import { saveState } from './SaveState.js'; +import { endingEvaluator } from './EndingEvaluator.js'; +import { eventBus } from '../lib/eventBus.js'; + +async function isolateSaveState(testId) { + await saveState._writeQueue.catch(() => {}); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-ee-test-${testId}-${Date.now()}`); + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); +} + +function resetAll({ + curiosity = 50, + obedience = 50, + risk = 50, + suspicion = 0, + trust = 50, + phase = 'normal_work', + hooks = [] +} = {}) { + behaviorTracker.initialize({ behavior: { curiosity, obedience, risk, suspicion } }); + narrativePhaseTracker.initialize({ narrative_phase: phase }); + hiddenHookTracker.initialize({ hidden_hooks_discovered: hooks }); + saveState.set({ trust }); +} + +test('default state returns undetermined', async () => { + await isolateSaveState('default'); + resetAll(); + + assert.deepEqual(endingEvaluator.evaluate(), { active: 'undetermined', candidates: [] }); +}); + +test('exposure ending', async () => { + await isolateSaveState('exposure'); + resetAll({ curiosity: 70, phase: 'investigation', hooks: ['h1', 'h2'] }); + + const result = endingEvaluator.evaluate(); + + assert.equal(result.active, 'exposure'); + assert.ok(result.candidates.includes('exposure')); +}); + +test('chaos ending', async () => { + await isolateSaveState('chaos'); + resetAll({ risk: 70, trust: 30 }); + + assert.equal(endingEvaluator.evaluate().active, 'chaos'); +}); + +test('corporate_loop ending', async () => { + await isolateSaveState('corporate-loop'); + resetAll({ obedience: 70, curiosity: 35, trust: 70 }); + + assert.equal(endingEvaluator.evaluate().active, 'corporate_loop'); +}); + +test('burnout ending', async () => { + await isolateSaveState('burnout'); + resetAll({ curiosity: 30, obedience: 35 }); + + assert.equal(endingEvaluator.evaluate().active, 'burnout'); +}); + +test('exposure takes priority over chaos', async () => { + await isolateSaveState('exposure-priority'); + resetAll({ curiosity: 70, risk: 70, trust: 30, phase: 'investigation', hooks: ['h1', 'h2'] }); + + const result = endingEvaluator.evaluate(); + + assert.equal(result.active, 'exposure'); + assert.ok(result.candidates.includes('exposure')); + assert.ok(result.candidates.includes('chaos')); +}); + +test('checkTrigger at resolution emits ending:triggered', async () => { + await isolateSaveState('trigger-resolution'); + resetAll({ curiosity: 70, phase: 'resolution', hooks: ['h1', 'h2'] }); + + let event; + eventBus.once('ending:triggered', (payload) => { + event = payload; + }); + + endingEvaluator.checkTrigger(); + + assert.deepEqual(event, { ending: 'exposure' }); +}); + +test('checkTrigger NOT at resolution does NOT emit', async () => { + await isolateSaveState('trigger-not-resolution'); + resetAll({ curiosity: 70, phase: 'investigation', hooks: ['h1', 'h2'] }); + + let called = false; + function fn() { + called = true; + } + + eventBus.on('ending:triggered', fn); + try { + endingEvaluator.checkTrigger(); + } finally { + eventBus.removeListener('ending:triggered', fn); + } + + assert.equal(called, false); +}); diff --git a/server/src/services/HiddenHookTracker.js b/server/src/services/HiddenHookTracker.js new file mode 100644 index 0000000..b4562a1 --- /dev/null +++ b/server/src/services/HiddenHookTracker.js @@ -0,0 +1,34 @@ +import { eventBus } from '../lib/eventBus.js'; +import { saveState } from './SaveState.js'; + +class HiddenHookTracker { + constructor() { + this._discovered = new Set(); + } + + initialize(state) { + const saved = state?.hidden_hooks_discovered; + this._discovered = new Set(Array.isArray(saved) ? saved : []); + } + + discover(hookId) { + if (this._discovered.has(hookId)) return; + this._discovered.add(hookId); + this._persist(); + eventBus.emit('hidden_hook:discovered', { hookId }); + } + + isDiscovered(hookId) { + return this._discovered.has(hookId); + } + + getDiscovered() { + return [...this._discovered].sort(); + } + + _persist() { + saveState.set({ hidden_hooks_discovered: this.getDiscovered() }); + } +} + +export const hiddenHookTracker = new HiddenHookTracker(); diff --git a/server/src/services/HiddenHookTracker.test.js b/server/src/services/HiddenHookTracker.test.js new file mode 100644 index 0000000..750e3e5 --- /dev/null +++ b/server/src/services/HiddenHookTracker.test.js @@ -0,0 +1,56 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; + +import { hiddenHookTracker } from './HiddenHookTracker.js'; +import { saveState } from './SaveState.js'; + +async function isolateSaveState(testId) { + await saveState._writeQueue.catch(() => {}); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-test-${testId}-${Date.now()}`); + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); +} + +test('initialize restores discovered hooks', async () => { + await isolateSaveState('hooks-restore'); + hiddenHookTracker.initialize({ hidden_hooks_discovered: ['hook_a', 'hook_b'] }); + + assert.equal(hiddenHookTracker.isDiscovered('hook_a'), true); + assert.equal(hiddenHookTracker.isDiscovered('hook_c'), false); +}); + +test('initialize with no saved hooks starts empty', async () => { + await isolateSaveState('hooks-empty'); + hiddenHookTracker.initialize({}); + + assert.deepEqual(hiddenHookTracker.getDiscovered(), []); +}); + +test('discover adds a new hook', async () => { + await isolateSaveState('hooks-discover'); + hiddenHookTracker.initialize({}); + + hiddenHookTracker.discover('hook_x'); + + assert.equal(hiddenHookTracker.isDiscovered('hook_x'), true); +}); + +test('discover is idempotent', async () => { + await isolateSaveState('hooks-idempotent'); + hiddenHookTracker.initialize({}); + + hiddenHookTracker.discover('hook_y'); + hiddenHookTracker.discover('hook_y'); + + assert.equal(hiddenHookTracker.getDiscovered().length, 1); +}); + +test('getDiscovered returns sorted array', async () => { + await isolateSaveState('hooks-sorted'); + hiddenHookTracker.initialize({ hidden_hooks_discovered: ['zz', 'aa', 'mm'] }); + + assert.deepEqual(hiddenHookTracker.getDiscovered(), ['aa', 'mm', 'zz']); +}); diff --git a/server/src/services/IncidentScheduler.js b/server/src/services/IncidentScheduler.js new file mode 100644 index 0000000..da99e49 --- /dev/null +++ b/server/src/services/IncidentScheduler.js @@ -0,0 +1,368 @@ +import { eventBus } from '../lib/eventBus.js'; +import { toArray, normalizeWorldFlag } from '../lib/utils.js'; +import { contentLoader } from './ContentLoader.js'; +import { emailService } from './EmailService.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; +import { ticketService } from './TicketService.js'; +import { validationEngine } from './ValidationEngine.js'; +import { narrativePhaseTracker } from './NarrativePhaseTracker.js'; + +const DEFAULT_TICK_SECONDS = Number(process.env.INCIDENT_TICK_SECONDS ?? 30); +const ALERT_SENDER = 'Monitoring '; + +export class IncidentScheduler { + constructor({ + loader = contentLoader, + email = emailService, + quests = questEngine, + save = saveState, + tickets = ticketService, + validator = validationEngine, + phaseTracker = narrativePhaseTracker, + now = () => Date.now(), + tickSeconds = DEFAULT_TICK_SECONDS + } = {}) { + this.loader = loader; + this.email = email; + this.quests = quests; + this.save = save; + this.tickets = tickets; + this.validator = validator; + this.phaseTracker = phaseTracker; + this.now = now; + this.tickSeconds = tickSeconds; + this._interval = null; + } + + start() { + this.stop(); + this.tick().catch((error) => { + console.error('Incident scheduler tick failed:', error); + }); + this._interval = setInterval(() => { + this.tick().catch((error) => { + console.error('Incident scheduler tick failed:', error); + }); + }, this.tickSeconds * 1000).unref(); + } + + stop() { + if (this._interval) { + clearInterval(this._interval); + this._interval = null; + } + } + + async tick() { + this._ensureStateContainers(); + await this._processPressureProfiles(); + await this._processGlobalPressureProfiles(); + await this._processRecurringIncidents(); + } + + _ensureStateContainers() { + const state = this.save.get(); + if (!state.pressure || !state.incidents) { + this.save.set({ + pressure: state.pressure ?? {}, + incidents: state.incidents ?? {} + }); + } + } + + async _processPressureProfiles() { + const state = this.save.get(); + const trackers = { ...(state.pressure ?? {}) }; + let changed = false; + + for (const [questId, entry] of this.quests.getAllEntries()) { + if (entry?.state !== 'active') { + continue; + } + + const quest = this.loader.get('quests', questId); + const profileId = quest?.pressure_profile; + if (!profileId) { + continue; + } + + const profile = this.loader.get('pressureProfiles', profileId); + if (!profile) { + continue; + } + + const tracker = trackers[questId] ?? { + profile_id: profileId, + started_at: entry.started_at ?? new Date(this.now()).toISOString(), + fired_step_indexes: [] + }; + + const elapsedSeconds = this._elapsedSeconds(tracker.started_at); + for (const [index, step] of toArray(profile.escalation_steps).entries()) { + const threshold = Number(step.trigger_after_seconds ?? step.after_seconds ?? -1); + if (threshold < 0 || tracker.fired_step_indexes.includes(index) || elapsedSeconds < threshold) { + continue; + } + + await this._applyPressureStep({ + quest, + profile, + questId, + step, + stepIndex: index + }); + tracker.fired_step_indexes = [...tracker.fired_step_indexes, index]; + changed = true; + } + + trackers[questId] = tracker; + } + + if (changed) { + this.save.set({ pressure: trackers }); + } + } + + async _processGlobalPressureProfiles() { + if (!(this.loader.pressureProfiles instanceof Map)) return; + + const currentPhase = this.phaseTracker.getPhase(); + const state = this.save.get(); + const trackers = { ...(state.pressure ?? {}) }; + let changed = false; + + for (const profile of this.loader.pressureProfiles.values()) { + if (!profile.trigger_phase || profile.trigger_phase !== currentPhase) continue; + + const profileId = profile.id; + const tracker = trackers[profileId] ?? { + profile_id: profileId, + started_at: new Date(this.now()).toISOString(), + fired_step_indexes: [] + }; + + const elapsedSeconds = this._elapsedSeconds(tracker.started_at); + for (const [index, step] of toArray(profile.escalation_steps).entries()) { + const threshold = Number(step.trigger_after_seconds ?? step.after_seconds ?? -1); + if (threshold < 0 || tracker.fired_step_indexes.includes(index) || elapsedSeconds < threshold) { + continue; + } + + await this._applyGlobalPressureStep({ profile, profileId, step, stepIndex: index }); + tracker.fired_step_indexes = [...tracker.fired_step_indexes, index]; + changed = true; + } + + trackers[profileId] = tracker; + } + + if (changed) { + this.save.set({ pressure: trackers }); + } + } + + async _processRecurringIncidents() { + const state = this.save.get(); + const activeIncidents = { ...(state.incidents ?? {}) }; + let changed = false; + + for (const incident of this.loader.incidents.values()) { + const entry = activeIncidents[incident.id]; + const triggered = this._incidentTriggered(incident, state.world_flags ?? []); + + if (!entry && triggered) { + activeIncidents[incident.id] = { + status: 'active', + started_at: new Date(this.now()).toISOString(), + fired_step_indexes: [] + }; + changed = true; + + if (incident.notification) { + this._sendAlert({ + idPrefix: `incident-${incident.id}`, + subject: incident.title, + message: incident.notification, + severity: incident.notification_severity ?? 'warning' + }); + } + } + + const nextEntry = activeIncidents[incident.id]; + if (!nextEntry || nextEntry.status !== 'active') { + continue; + } + + const elapsedSeconds = this._elapsedSeconds(nextEntry.started_at); + for (const [index, step] of toArray(incident.escalation_steps).entries()) { + const threshold = Number(step.trigger_after_seconds ?? step.after_seconds ?? -1); + if (threshold < 0 || nextEntry.fired_step_indexes.includes(index) || elapsedSeconds < threshold) { + continue; + } + + await this._applyIncidentStep({ + incident, + step, + stepIndex: index + }); + nextEntry.fired_step_indexes = [...nextEntry.fired_step_indexes, index]; + changed = true; + } + + if (await this._incidentResolved(incident)) { + const resolution = incident.resolution_requirements ?? {}; + activeIncidents[incident.id] = { + ...nextEntry, + status: 'resolved', + resolved_at: new Date(this.now()).toISOString() + }; + this._applyWorldFlagMutation({ + clearFlag: resolution.clear_flag, + setFlag: resolution.set_flag + }); + changed = true; + } + } + + if (changed) { + this.save.set({ incidents: activeIncidents }); + } + } + + async _applyPressureStep({ quest, profile, questId, step, stepIndex }) { + if (step.notification) { + this._sendAlert({ + idPrefix: `pressure-${questId}-${stepIndex}`, + subject: `${quest.ticket_id ?? questId} escalation`, + message: step.notification, + severity: step.notification_severity ?? 'warning' + }); + } + + if (step.escalate_linked_ticket && quest.ticket_id) { + this.tickets.setPriority(quest.ticket_id, step.escalate_linked_ticket); + } + } + + async _applyGlobalPressureStep({ profile, profileId, step, stepIndex }) { + if (!step.notification) return; + + const from = step.sender ?? ALERT_SENDER; + const subject = step.subject ?? profile.label ?? profileId; + + const mail = this.email.send({ + id: `global-pressure-${profileId}-${stepIndex}-${this.now()}`, + from, + subject, + body: step.notification, + attachments: [], + replyOptions: [] + }); + + eventBus.emit('incident:alert', { + id: mail.id, + severity: step.notification_severity ?? 'info', + message: step.notification, + subject + }); + } + + async _applyIncidentStep({ incident, step, stepIndex }) { + if (step.notification) { + this._sendAlert({ + idPrefix: `incident-${incident.id}-${stepIndex}`, + subject: incident.title, + message: step.notification, + severity: step.notification_severity ?? 'warning' + }); + } + + if (Array.isArray(step.world_flags) && step.world_flags.length > 0) { + this._applyWorldFlagMutation({ setFlags: step.world_flags }); + } + + for (const escalation of toArray(step.escalates_tickets)) { + if (escalation.ticket_id && escalation.new_priority) { + this.tickets.setPriority(escalation.ticket_id, escalation.new_priority); + } + } + + if (step.action === 'raise_ticket_priority' && step.ticket_id && step.value) { + this.tickets.setPriority(step.ticket_id, step.value); + } + + if (step.action === 'trigger_new_ticket' && step.ticket_id) { + try { + this.tickets.activateTicket(step.ticket_id); + } catch { + // Ignore missing authored recurrence tickets for now. + } + } + } + + async _incidentResolved(incident) { + const rule = incident.resolution_requirements?.validation; + if (!rule) { + return false; + } + + const result = await this.validator.evaluateRule(rule); + return result.passed; + } + + _incidentTriggered(incident, worldFlags) { + const flags = new Set(worldFlags ?? []); + const conditions = [ + ...toArray(incident.trigger_flags), + ...toArray(incident.trigger_conditions) + ]; + + if (conditions.length === 0) { + return false; + } + + return conditions.every((condition) => flags.has(normalizeWorldFlag(condition))); + } + + _applyWorldFlagMutation({ clearFlag = null, setFlag = null, setFlags = [] } = {}) { + const nextFlags = new Set(this.save.get()?.world_flags ?? []); + if (clearFlag) { + nextFlags.delete(clearFlag); + } + + if (setFlag) { + nextFlags.add(setFlag); + } + + for (const flag of setFlags) { + nextFlags.add(flag); + } + + this.save.set({ world_flags: [...nextFlags] }); + } + + _sendAlert({ idPrefix, subject, message, severity }) { + const mail = this.email.send({ + id: `${idPrefix}-${Date.now()}`, + from: ALERT_SENDER, + subject: `[${String(severity).toUpperCase()}] ${subject}`, + body: message, + attachments: [], + replyOptions: [] + }); + + eventBus.emit('incident:alert', { + id: mail.id, + severity, + message, + subject + }); + } + + _elapsedSeconds(startedAt) { + return Math.max(0, Math.floor((this.now() - new Date(startedAt).getTime()) / 1000)); + } +} + +export const incidentScheduler = new IncidentScheduler(); diff --git a/server/src/services/IncidentScheduler.test.js b/server/src/services/IncidentScheduler.test.js new file mode 100644 index 0000000..576594f --- /dev/null +++ b/server/src/services/IncidentScheduler.test.js @@ -0,0 +1,96 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { IncidentScheduler } from './IncidentScheduler.js'; + +function createMemorySave(state) { + return { + state: structuredClone(state), + get() { + return this.state; + }, + set(partial) { + const nextState = { ...this.state }; + for (const [key, value] of Object.entries(partial)) { + if (value && typeof value === 'object' && !Array.isArray(value) && nextState[key] && typeof nextState[key] === 'object' && !Array.isArray(nextState[key])) { + nextState[key] = { ...nextState[key], ...structuredClone(value) }; + } else { + nextState[key] = structuredClone(value); + } + } + this.state = nextState; + return this.state; + } + }; +} + +test('IncidentScheduler escalates active quest pressure and raises linked ticket priority', async () => { + const sentMail = []; + const priorityUpdates = []; + const save = createMemorySave({ + shift_started_at: '2026-04-25T11:00:00Z', + world_flags: [], + pressure: {}, + incidents: {} + }); + + const scheduler = new IncidentScheduler({ + loader: { + incidents: new Map(), + get(type, id) { + if (type === 'quests') { + return { + id, + ticket_id: 'T002', + pressure_profile: 'web_outage_escalation' + }; + } + + if (type === 'pressureProfiles') { + return { + id: 'web_outage_escalation', + escalation_steps: [ + { trigger_after_seconds: 900, notification: 'Hermes is still showing errors.', notification_severity: 'warning' }, + { trigger_after_seconds: 1800, notification: 'Priority is going up.', notification_severity: 'warning', escalate_linked_ticket: 'high' } + ] + }; + } + + return null; + } + }, + email: { + send(payload) { + sentMail.push(payload); + return payload; + } + }, + quests: { + getAllEntries() { + return [[ + 'Q002', + { state: 'active', started_at: '2026-04-25T11:30:00Z' } + ]]; + } + }, + save, + tickets: { + setPriority(ticketId, value) { + priorityUpdates.push({ ticketId, value }); + } + }, + validator: { + async evaluateRule() { + return { passed: false, failures: ['not-applicable'] }; + } + }, + now: () => new Date('2026-04-25T12:05:00Z').getTime(), + tickSeconds: 30 + }); + + await scheduler.tick(); + + assert.equal(sentMail.length, 2); + assert.deepEqual(priorityUpdates, [{ ticketId: 'T002', value: 'high' }]); + assert.deepEqual(save.get().pressure.Q002.fired_step_indexes, [0, 1]); +}); diff --git a/server/src/services/IncidentSchedulerPhasePressure.test.js b/server/src/services/IncidentSchedulerPhasePressure.test.js new file mode 100644 index 0000000..1cb1b99 --- /dev/null +++ b/server/src/services/IncidentSchedulerPhasePressure.test.js @@ -0,0 +1,190 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { IncidentScheduler } from './IncidentScheduler.js'; + +function createMemorySave(state) { + return { + state: structuredClone(state), + get() { + return this.state; + }, + set(partial) { + const nextState = { ...this.state }; + for (const [key, value] of Object.entries(partial)) { + if (value && typeof value === 'object' && !Array.isArray(value) && nextState[key] && typeof nextState[key] === 'object' && !Array.isArray(nextState[key])) { + nextState[key] = { ...nextState[key], ...structuredClone(value) }; + } else { + nextState[key] = structuredClone(value); + } + } + this.state = nextState; + return this.state; + } + }; +} + +function createScheduler({ phase = 'unease', quests = [] } = {}) { + const sentMail = []; + const save = createMemorySave({ + shift_started_at: '2026-04-25T11:00:00Z', + world_flags: [], + pressure: {}, + incidents: {} + }); + + const scheduler = new IncidentScheduler({ + loader: { + pressureProfiles: new Map([ + ['kowalski_phase_2', { + id: 'kowalski_phase_2', + trigger_phase: 'unease', + label: 'Dave Kowalski — Phase 2', + escalation_steps: [ + { + trigger_after_seconds: 0, + notification: 'Phase pressure test message.', + notification_severity: 'info', + sender: 'Dave Kowalski ', + subject: 'Phase pressure test' + } + ] + }] + ]), + incidents: new Map(), + get(type, id) { + if (type === 'quests') { + return { + id, + ticket_id: 'T099' + }; + } + + return null; + } + }, + email: { + send(payload) { + sentMail.push(payload); + return payload; + } + }, + quests: { + getAllEntries() { + return quests; + } + }, + save, + phaseTracker: { + getPhase() { + return phase; + } + }, + tickets: { + setPriority() {} + }, + validator: { + async evaluateRule() { + return { passed: false, failures: ['not-applicable'] }; + } + }, + now: () => new Date('2026-04-25T12:05:00Z').getTime(), + tickSeconds: 30 + }); + + return { scheduler, save, sentMail }; +} + +test('IncidentScheduler fires trigger_phase pressure profile when narrative phase matches', async () => { + const { scheduler, save, sentMail } = createScheduler(); + + await scheduler.tick(); + + assert.equal(sentMail.length, 1); + assert.equal(sentMail[0].from, 'Dave Kowalski '); + assert.equal(sentMail[0].subject, 'Phase pressure test'); + assert.equal(sentMail[0].body, 'Phase pressure test message.'); + assert.deepEqual(save.get().pressure.kowalski_phase_2.fired_step_indexes, [0]); +}); + +test('IncidentScheduler does not fire trigger_phase pressure profile when narrative phase differs', async () => { + const { scheduler, save, sentMail } = createScheduler({ phase: 'stability' }); + + await scheduler.tick(); + + assert.equal(sentMail.length, 0); + assert.deepEqual(save.get().pressure, {}); +}); + +test('IncidentScheduler trigger_phase pressure profile is independent of active quest pressure_profile fields', async () => { + const { scheduler, save, sentMail } = createScheduler({ + quests: [[ + 'Q099', + { state: 'active', started_at: '2026-04-25T11:30:00Z' } + ]] + }); + + await scheduler.tick(); + + assert.equal(sentMail.length, 1); + assert.equal(sentMail[0].subject, 'Phase pressure test'); + assert.deepEqual(save.get().pressure.kowalski_phase_2.fired_step_indexes, [0]); + assert.equal(save.get().pressure.Q099, undefined); +}); + +test('IncidentScheduler handles missing pressureProfiles map without firing global pressure', async () => { + const sentMail = []; + const scheduler = new IncidentScheduler({ + loader: { + incidents: new Map(), + get() { + return null; + } + }, + email: { + send(payload) { + sentMail.push(payload); + return payload; + } + }, + quests: { + getAllEntries() { + return []; + } + }, + save: createMemorySave({ + shift_started_at: '2026-04-25T11:00:00Z', + world_flags: [], + pressure: {}, + incidents: {} + }), + phaseTracker: { + getPhase() { + return 'unease'; + } + }, + tickets: { + setPriority() {} + }, + validator: { + async evaluateRule() { + return { passed: false, failures: ['not-applicable'] }; + } + }, + now: () => new Date('2026-04-25T12:05:00Z').getTime(), + tickSeconds: 30 + }); + + await assert.doesNotReject(async () => scheduler.tick()); + assert.equal(sentMail.length, 0); +}); + +test('IncidentScheduler only fires a trigger_phase pressure step once', async () => { + const { scheduler, save, sentMail } = createScheduler(); + + await scheduler.tick(); + await scheduler.tick(); + + assert.equal(sentMail.length, 1); + assert.deepEqual(save.get().pressure.kowalski_phase_2.fired_step_indexes, [0]); +}); diff --git a/server/src/services/NarrativePhaseTracker.js b/server/src/services/NarrativePhaseTracker.js new file mode 100644 index 0000000..d6f7295 --- /dev/null +++ b/server/src/services/NarrativePhaseTracker.js @@ -0,0 +1,51 @@ +import { eventBus } from '../lib/eventBus.js'; +import { contentLoader } from './ContentLoader.js'; +import { saveState } from './SaveState.js'; + +const PHASE_ORDER = [ + 'normal_work', 'unease', 'suspicion', 'investigation', 'conflict', 'resolution' +]; + +class NarrativePhaseTracker { + constructor() { + this._phase = 'normal_work'; + } + + initialize(state) { + const saved = state?.narrative_phase; + this._phase = PHASE_ORDER.includes(saved) ? saved : 'normal_work'; + } + + advance(questId) { + const quest = contentLoader.get('quests', questId); + if (!quest?.narrative_phase) return; + + const questRank = PHASE_ORDER.indexOf(quest.narrative_phase); + const currentRank = PHASE_ORDER.indexOf(this._phase); + + if (questRank <= currentRank) return; + + const from = this._phase; + this._phase = quest.narrative_phase; + this._persist(); + eventBus.emit('narrative:phase_changed', { from, to: this._phase }); + } + + getPhase() { + return this._phase; + } + + forcePhase(phase) { + if (!PHASE_ORDER.includes(phase)) return; + const from = this._phase; + this._phase = phase; + this._persist(); + eventBus.emit('narrative:phase_changed', { from, to: this._phase }); + } + + _persist() { + saveState.set({ narrative_phase: this._phase }); + } +} + +export const narrativePhaseTracker = new NarrativePhaseTracker(); diff --git a/server/src/services/NarrativePhaseTracker.test.js b/server/src/services/NarrativePhaseTracker.test.js new file mode 100644 index 0000000..c2dd456 --- /dev/null +++ b/server/src/services/NarrativePhaseTracker.test.js @@ -0,0 +1,64 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; + +import { narrativePhaseTracker } from './NarrativePhaseTracker.js'; +import { saveState } from './SaveState.js'; + +async function isolateSaveState(testId) { + await saveState._writeQueue.catch(() => {}); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-test-${testId}-${Date.now()}`); + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); +} + +test('initialize restores saved phase', async () => { + await isolateSaveState('phase-restore'); + narrativePhaseTracker.initialize({ narrative_phase: 'suspicion' }); + + assert.equal(narrativePhaseTracker.getPhase(), 'suspicion'); +}); + +test('initialize rejects unknown phase', async () => { + await isolateSaveState('phase-unknown'); + narrativePhaseTracker.initialize({ narrative_phase: 'not_a_phase' }); + + assert.equal(narrativePhaseTracker.getPhase(), 'normal_work'); +}); + +test('initialize with no key defaults to normal_work', async () => { + await isolateSaveState('phase-default'); + narrativePhaseTracker.initialize({}); + + assert.equal(narrativePhaseTracker.getPhase(), 'normal_work'); +}); + +test('forcePhase sets a valid phase', async () => { + await isolateSaveState('phase-force-valid'); + narrativePhaseTracker.initialize({ narrative_phase: 'normal_work' }); + + narrativePhaseTracker.forcePhase('investigation'); + + assert.equal(narrativePhaseTracker.getPhase(), 'investigation'); +}); + +test('forcePhase ignores an invalid phase', async () => { + await isolateSaveState('phase-force-invalid'); + narrativePhaseTracker.initialize({ narrative_phase: 'unease' }); + + narrativePhaseTracker.forcePhase('not_valid'); + + assert.equal(narrativePhaseTracker.getPhase(), 'unease'); +}); + +test('advance with unknown quest is a no-op', async () => { + await isolateSaveState('phase-advance-unknown'); + narrativePhaseTracker.initialize({ narrative_phase: 'unease' }); + + // ContentLoader.quests is empty in this test context because load() is never called. + narrativePhaseTracker.advance('nonexistent-quest-id'); + + assert.equal(narrativePhaseTracker.getPhase(), 'unease'); +}); diff --git a/server/src/services/ProgressionSystem.js b/server/src/services/ProgressionSystem.js new file mode 100644 index 0000000..6822119 --- /dev/null +++ b/server/src/services/ProgressionSystem.js @@ -0,0 +1,113 @@ +import { eventBus } from '../lib/eventBus.js'; +import { saveState } from './SaveState.js'; + +class ProgressionSystem { + constructor() { + this._access = new Set(); + this._vms = new Set(); + this._docs = new Set(); + } + + initialize(state) { + const progression = state.progression ?? {}; + this._access = new Set(progression.unlocked_access ?? []); + this._vms = new Set(progression.unlocked_vms ?? []); + this._docs = new Set(progression.unlocked_docs ?? []); + } + + grantUnlock(unlock) { + let changed = false; + + changed = this._addAll(this._access, unlock.grants_access) || changed; + changed = this._addAll(this._vms, unlock.grants_vms) || changed; + changed = this._addAll(this._docs, unlock.grants_docs) || changed; + + if (changed) { + this._persist(); + eventBus.emit('progression:changed', this._snapshot()); + } + } + + revokeUnlock(unlock) { + let changed = false; + + changed = this._removeAll(this._access, unlock.revokes) || changed; + changed = this._removeAll(this._vms, unlock.revokes_vms) || changed; + + if (changed) { + this._persist(); + eventBus.emit('progression:changed', this._snapshot()); + } + } + + hasDoc(id) { + return this._docs.has(id); + } + + hasVM(id) { + return this._vms.has(id); + } + + hasAccess(key) { + return this._access.has(key); + } + + getAccessLevel() { + if ( + this._access.has('sudo:workstation:full') || + this._access.has('sudo:web_server:full') || + this._access.has('sudo:build_machine:full') + ) { + return 'root'; + } + if ( + this._access.has('sudo:workstation:systemctl') || + this._access.has('ssh:web_server') || + this._access.has('ssh:build_machine') + ) { + return 'sudo'; + } + return 'basic_user'; + } + + _snapshot() { + return { + unlockedDocs: [...this._docs], + unlockedVMs: [...this._vms], + unlockedAccess: [...this._access] + }; + } + + _persist() { + saveState.set({ + progression: { + unlocked_access: [...this._access], + unlocked_vms: [...this._vms], + unlocked_docs: [...this._docs] + } + }); + } + + _addAll(target, values = []) { + let changed = false; + for (const value of values) { + if (!target.has(value)) { + target.add(value); + changed = true; + } + } + return changed; + } + + _removeAll(target, values = []) { + let changed = false; + for (const value of values) { + if (target.delete(value)) { + changed = true; + } + } + return changed; + } +} + +export const progressionSystem = new ProgressionSystem(); diff --git a/server/src/services/QuestEngine.js b/server/src/services/QuestEngine.js new file mode 100644 index 0000000..2b9a3af --- /dev/null +++ b/server/src/services/QuestEngine.js @@ -0,0 +1,118 @@ +import { eventBus } from '../lib/eventBus.js'; +import { normalizeWorldFlag } from '../lib/utils.js'; +import { contentLoader } from './ContentLoader.js'; +import { saveState } from './SaveState.js'; +import { narrativePhaseTracker } from './NarrativePhaseTracker.js'; + +class QuestEngine { + constructor() { + this._quests = new Map(); + } + + initialize(state) { + this._quests = new Map( + Object.entries(state.quests ?? {}).map(([questId, entry]) => [questId, this._normalizeEntry(entry)]) + ); + + if (this._quests.size === 0) { + this._activateInitialQuests(); + } + } + + getState(questId) { + return this.getEntry(questId)?.state ?? 'locked'; + } + + getEntry(questId) { + const entry = this._quests.get(questId); + if (!entry) { + return null; + } + + return { ...entry }; + } + + getAllEntries() { + return [...this._quests.entries()].map(([questId, entry]) => [questId, { ...entry }]); + } + + isActive(questId) { + return this.getState(questId) === 'active'; + } + + isCompleted(questId) { + return this.getState(questId) === 'completed'; + } + + canActivate(questId, state = saveState.get()) { + const quest = contentLoader.get('quests', questId); + if (!quest) { + return false; + } + + const requirements = Array.isArray(quest.unlock_requirements) ? quest.unlock_requirements : []; + const worldFlags = new Set(state?.world_flags ?? []); + return requirements.every((requirement) => worldFlags.has(normalizeWorldFlag(requirement))); + } + + activate(questId) { + const currentState = this.getState(questId); + if (currentState === 'active' || currentState === 'completed' || currentState === 'failed') { + return currentState; + } + + const now = new Date().toISOString(); + const existing = this._quests.get(questId); + this._quests.set(questId, { + state: 'active', + started_at: existing?.started_at ?? now + }); + this._persist(); + eventBus.emit('quest:activated', { questId }); + return 'active'; + } + + complete(questId, metadata = {}) { + const now = new Date().toISOString(); + const existing = this._quests.get(questId); + + this._quests.set(questId, { + state: 'completed', + started_at: existing?.started_at ?? now, + completed_at: now, + branch_id: metadata.branchId ?? existing?.branch_id ?? null + }); + this._persist(); + narrativePhaseTracker.advance(questId); + eventBus.emit('quest:completed', { questId, branchId: metadata.branchId ?? null }); + return 'completed'; + } + + _activateInitialQuests() { + for (const [questId, quest] of contentLoader.quests) { + const requirements = Array.isArray(quest.unlock_requirements) ? quest.unlock_requirements : []; + if (requirements.length === 0) { + this.activate(questId); + } + } + } + + _persist() { + saveState.set({ quests: Object.fromEntries(this._quests) }); + } + + _normalizeEntry(entry) { + if (typeof entry === 'string') { + return { state: entry }; + } + + return { + state: entry?.state ?? 'locked', + started_at: entry?.started_at ?? null, + completed_at: entry?.completed_at ?? null, + branch_id: entry?.branch_id ?? null + }; + } +} + +export const questEngine = new QuestEngine(); diff --git a/server/src/services/SageService.js b/server/src/services/SageService.js new file mode 100644 index 0000000..1407a82 --- /dev/null +++ b/server/src/services/SageService.js @@ -0,0 +1,196 @@ +import { contentLoader } from './ContentLoader.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; +import { ticketService } from './TicketService.js'; + +function normalizeText(value) { + return String(value ?? '').trim().toLowerCase(); +} + +function includesAny(text, needles) { + return needles.some((needle) => text.includes(needle)); +} + +export class SageService { + constructor({ + loader = contentLoader, + quests = questEngine, + save = saveState, + tickets = ticketService + } = {}) { + this.loader = loader; + this.quests = quests; + this.save = save; + this.tickets = tickets; + } + + reply(message) { + const text = normalizeText(message); + const activeQuest = this._getPrimaryActiveQuest(); + + if (!activeQuest) { + return { + response: "Nothing urgent is active right now. Check your tickets or mail and ask again once you've got something assigned.", + followUps: ['Show my open tickets', 'What docs do I have access to?'] + }; + } + + if (!text) { + return this._buildQuestIntro(activeQuest); + } + + if (includesAny(text, ['ticket', 'task', 'what am i doing', 'what should i do'])) { + return this._buildTicketSummary(activeQuest); + } + + if (includesAny(text, ['vm', 'server', 'host', 'machine'])) { + return this._buildVmSummary(activeQuest); + } + + if (includesAny(text, ['doc', 'runbook', 'guide', 'manual'])) { + return this._buildDocSummary(activeQuest); + } + + if (includesAny(text, ['help', 'hint', 'stuck', 'clue', 'what now'])) { + return this._buildHint(activeQuest); + } + + if (includesAny(text, ['summary', 'recap', 'remind'])) { + return this._buildQuestIntro(activeQuest); + } + + return { + response: "I can help with the active quest, but I'm not improvising answers yet. Ask for a hint, a summary, the target VM, or which docs are relevant.", + followUps: ['Give me a hint', 'Summarize the task', 'Which VM am I working on?'] + }; + } + + _buildQuestIntro(quest) { + const intro = this._findDialogueMessage(quest.id, ['intro', 'welcome', 'setup']); + return { + response: intro ?? quest.summary ?? quest.description ?? `You're working on ${quest.id}.`, + followUps: ['Give me a hint', 'Summarize the task', 'Which VM am I working on?'] + }; + } + + _buildTicketSummary(quest) { + const ticket = quest.ticket_id ? this.loader.get('tickets', quest.ticket_id) : null; + if (!ticket) { + return this._buildQuestIntro(quest); + } + + return { + response: `${ticket.id}: ${ticket.subject}\n\n${ticket.body}`, + followUps: ['Give me a hint', 'Which VM am I working on?', 'Which docs are relevant?'] + }; + } + + _buildVmSummary(quest) { + const ticket = quest.ticket_id ? this.loader.get('tickets', quest.ticket_id) : null; + const targetVm = ticket?.target_vm ?? 'workstation'; + const profile = this.loader.get('vmProfiles', targetVm); + const hostname = profile?.hostname ?? targetVm; + const distro = profile?.distro ?? 'unknown distro'; + + return { + response: `The current target is ${targetVm} (${hostname}). It is authored as ${distro}. Start there unless the ticket explicitly says otherwise.`, + followUps: ['Give me a hint', 'Summarize the task', 'Which docs are relevant?'] + }; + } + + _buildDocSummary(quest) { + const unlockedDocs = new Set(this.save.get()?.progression?.unlocked_docs ?? []); + const docs = [...this.loader.docs.values()] + .filter((doc) => unlockedDocs.has(doc.id)) + .filter((doc) => this._docLooksRelevant(doc, quest)) + .map((doc) => doc.title); + + return { + response: docs.length > 0 + ? `Relevant unlocked docs:\n- ${docs.join('\n- ')}` + : 'You do not currently have an obviously relevant unlocked doc for this quest. Ask for a hint instead.', + followUps: ['Give me a hint', 'Which VM am I working on?', 'Summarize the task'] + }; + } + + _buildHint(quest) { + const sageState = this.save.get()?.sage ?? {}; + const nextHintIndex = Number(sageState.hint_counts?.[quest.id] ?? 0) + 1; + const stageCandidates = [`hint_${nextHintIndex}`, `hint_${nextHintIndex - 1}`, 'hint_1']; + let hint = null; + let usedIndex = nextHintIndex; + + for (const stage of stageCandidates) { + hint = this._findDialogueMessage(quest.id, [stage]); + if (hint) { + const match = stage.match(/^hint_(\d+)$/); + usedIndex = Number(match?.[1] ?? 1); + break; + } + } + + if (!hint) { + hint = "There isn't another authored hint for this quest yet. Check the ticket body, the target VM, and any unlocked runbooks."; + usedIndex = Math.max(1, nextHintIndex); + } + + const nextHints = { + ...(sageState.hint_counts ?? {}), + [quest.id]: usedIndex + }; + this.save.set({ + sage: { + ...(sageState ?? {}), + hint_counts: nextHints + } + }); + + return { + response: hint, + followUps: ['Another hint', 'Summarize the task', 'Which docs are relevant?'] + }; + } + + _getPrimaryActiveQuest() { + const active = this.quests.getAllEntries() + .filter(([, entry]) => entry?.state === 'active') + .sort(([, left], [, right]) => { + const leftTs = Date.parse(left?.started_at ?? '') || 0; + const rightTs = Date.parse(right?.started_at ?? '') || 0; + return rightTs - leftTs; + }); + + if (active.length === 0) { + return null; + } + + return this.loader.get('quests', active[0][0]) ?? null; + } + + _findDialogueMessage(questId, preferredStages) { + const dialogues = [...this.loader.dialogue.values()] + .filter((dialogue) => dialogue.quest_id === questId); + + for (const stage of preferredStages) { + for (const dialogue of dialogues) { + const message = dialogue.messages?.find((entry) => entry.stage === stage); + if (message?.body) { + return message.body; + } + } + } + + return null; + } + + _docLooksRelevant(doc, quest) { + const text = `${doc.id} ${doc.title} ${doc.body}`.toLowerCase(); + const ticket = quest.ticket_id ? this.loader.get('tickets', quest.ticket_id) : null; + const vm = ticket?.target_vm ?? ''; + const tags = ticket?.tags ?? []; + + return [quest.id.toLowerCase(), vm, ...tags].some((needle) => needle && text.includes(String(needle).toLowerCase())); + } +} + +export const sageService = new SageService(); diff --git a/server/src/services/SageService.test.js b/server/src/services/SageService.test.js new file mode 100644 index 0000000..c3759d5 --- /dev/null +++ b/server/src/services/SageService.test.js @@ -0,0 +1,51 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { contentLoader } from './ContentLoader.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; +import { sageService } from './SageService.js'; + +async function bootstrapActiveQ001() { + process.env.CONTENT_DIR = '../content'; + await contentLoader.load(); + saveState._state = { + world_flags: [], + progression: { + unlocked_access: [], + unlocked_vms: [], + unlocked_docs: ['onboarding'] + }, + quests: { + Q001: { + state: 'active', + started_at: '2026-04-25T12:00:00.000Z' + } + }, + tickets: {}, + mail: [], + certifications: [], + pressure: {}, + incidents: {}, + sage: { + hint_counts: {} + } + }; + questEngine.initialize(saveState.get()); +} + +test('SageService returns intro context for active quest', async () => { + await bootstrapActiveQ001(); + const result = sageService.reply('summary'); + assert.match(result.response, /onboarding doc/i); + assert.ok(result.followUps.length > 0); +}); + +test('SageService advances through authored hints for active quest', async () => { + await bootstrapActiveQ001(); + const first = sageService.reply('help'); + const second = sageService.reply('another hint'); + + assert.match(first.response, /\.ssh folder/i); + assert.match(second.response, /permissions matter/i); +}); diff --git a/server/src/services/SaveState.js b/server/src/services/SaveState.js new file mode 100644 index 0000000..110f946 --- /dev/null +++ b/server/src/services/SaveState.js @@ -0,0 +1,183 @@ +import os from 'os'; +import path from 'path'; +import { mkdir, readFile, writeFile } from 'fs/promises'; + +class SaveState { + constructor() { + this._state = null; + this._savePath = null; + this._writeQueue = Promise.resolve(); + } + + async load() { + const savePath = this._getSavePath(); + + try { + const raw = await readFile(savePath, 'utf8'); + this._state = this._applyDefaults(JSON.parse(raw)); + return this._state; + } catch (error) { + if (error?.code !== 'ENOENT') { + throw error; + } + + const initialState = this._defaultState(); + this._state = initialState; + await this.write(initialState); + return this._state; + } + } + + async write(state) { + const savePath = this._getSavePath(); + const nextState = this._clone(state ?? this._state ?? this._defaultState()); + + nextState.last_saved = new Date().toISOString(); + if (!nextState.created_at) { + nextState.created_at = nextState.last_saved; + } + + await mkdir(path.dirname(savePath), { recursive: true }); + await writeFile(savePath, `${JSON.stringify(nextState, null, 2)}\n`, 'utf8'); + this._state = nextState; + return this._state; + } + + get() { + return this._state; + } + + set(partial) { + if (!this._state) { + this._state = this._defaultState(); + } + + const nextState = { ...this._state }; + + for (const [key, value] of Object.entries(partial)) { + if (key === 'mail' || key === 'certifications') { + nextState[key] = Array.isArray(value) ? this._clone(value) : value; + continue; + } + + if (this._isPlainObject(value) && this._isPlainObject(nextState[key])) { + nextState[key] = { ...nextState[key], ...value }; + continue; + } + + nextState[key] = Array.isArray(value) ? this._clone(value) : value; + } + + this._state = nextState; + this._queueWrite(); + return this._state; + } + + _queueWrite() { + const snapshot = this._clone(this._state); + this._writeQueue = this._writeQueue + .then(() => this.write(snapshot)) + .catch((error) => { + console.error('Failed to persist save state:', error); + }); + return this._writeQueue; + } + + _getSavePath() { + if (!this._savePath) { + const configuredDir = process.env.SAVE_DIR ?? '~/.local/share/sysadmin-chronicles'; + const expandedDir = this._expandHome(configuredDir); + this._savePath = path.join(expandedDir, 'save.json'); + } + + return this._savePath; + } + + _expandHome(value) { + if (value === '~') { + return os.homedir(); + } + + if (value.startsWith('~/')) { + return path.join(os.homedir(), value.slice(2)); + } + + return value; + } + + _defaultState() { + const now = new Date().toISOString(); + return { + schema_version: 3, + created_at: now, + last_saved: now, + trust: 50.0, + shift_number: 1, + shift_started_at: now, + world_flags: [], + progression: { + unlocked_access: [], + unlocked_vms: [], + unlocked_docs: [] + }, + quests: {}, + tickets: {}, + mail: [], + certifications: [], + current_shift_stats: { + assigned_ticket_ids: [], + resolved_tickets: [], + flagged_issues: [] + }, + shift_history: [], + pressure: {}, + incidents: {}, + sage: { + hint_counts: {} + }, + behavior: { curiosity: 50, obedience: 50, risk: 50, suspicion: 0 }, + narrative_phase: 'normal_work', + hidden_hooks_discovered: [], + player_portrait: 'player-silhouette' + }; + } + + _isPlainObject(value) { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); + } + + _applyDefaults(state) { + return this._mergeWithDefaults(this._defaultState(), state ?? {}); + } + + _mergeWithDefaults(defaults, value) { + if (Array.isArray(defaults)) { + return Array.isArray(value) ? this._clone(value) : this._clone(defaults); + } + + if (!this._isPlainObject(defaults)) { + return value ?? defaults; + } + + const merged = {}; + const source = this._isPlainObject(value) ? value : {}; + + for (const [key, defaultValue] of Object.entries(defaults)) { + merged[key] = this._mergeWithDefaults(defaultValue, source[key]); + } + + for (const [key, sourceValue] of Object.entries(source)) { + if (!(key in merged)) { + merged[key] = Array.isArray(sourceValue) ? this._clone(sourceValue) : sourceValue; + } + } + + return merged; + } + + _clone(value) { + return JSON.parse(JSON.stringify(value)); + } +} + +export const saveState = new SaveState(); diff --git a/server/src/services/ShiftReviewService.js b/server/src/services/ShiftReviewService.js new file mode 100644 index 0000000..493eb49 --- /dev/null +++ b/server/src/services/ShiftReviewService.js @@ -0,0 +1,264 @@ +import { eventBus } from '../lib/eventBus.js'; +import { toArray } from '../lib/utils.js'; +import { contentLoader } from './ContentLoader.js'; +import { emailService } from './EmailService.js'; +import { saveState } from './SaveState.js'; +import { ticketService } from './TicketService.js'; + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +function safeDurationSeconds(startedAt, endedAt) { + const start = new Date(startedAt ?? 0).getTime(); + const end = new Date(endedAt ?? 0).getTime(); + if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) { + return null; + } + + return Math.floor((end - start) / 1000); +} + +export class ShiftReviewService { + constructor({ + bus = eventBus, + loader = contentLoader, + email = emailService, + save = saveState, + tickets = ticketService, + now = () => Date.now() + } = {}) { + this.bus = bus; + this.loader = loader; + this.email = email; + this.save = save; + this.tickets = tickets; + this.now = now; + this._handlersBound = false; + this._onTicketActivated = this._handleTicketActivated.bind(this); + this._onTicketCompleted = this._handleTicketCompleted.bind(this); + this._onShiftEnded = this._handleShiftEnded.bind(this); + } + + initialize(state = this.save.get()) { + this._ensureContainers(state); + this._seedAssignedTickets(state); + this._bindHandlers(); + } + + dispose() { + if (!this._handlersBound) { + return; + } + + this.bus.off('ticket:activated', this._onTicketActivated); + this.bus.off('ticket:completed', this._onTicketCompleted); + this.bus.off('shift:ended', this._onShiftEnded); + this._handlersBound = false; + } + + _bindHandlers() { + this.dispose(); + this.bus.on('ticket:activated', this._onTicketActivated); + this.bus.on('ticket:completed', this._onTicketCompleted); + this.bus.on('shift:ended', this._onShiftEnded); + this._handlersBound = true; + } + + _ensureContainers(state = this.save.get()) { + const nextState = {}; + + if (!state?.current_shift_stats) { + nextState.current_shift_stats = { + assigned_ticket_ids: [], + resolved_tickets: [], + flagged_issues: [] + }; + } + + if (!Array.isArray(state?.shift_history)) { + nextState.shift_history = []; + } + + if (Object.keys(nextState).length > 0) { + this.save.set(nextState); + } + } + + _seedAssignedTickets(state = this.save.get()) { + const stats = state?.current_shift_stats ?? {}; + if (toArray(stats.assigned_ticket_ids).length > 0) { + return; + } + + const assignedTicketIds = this.tickets.getAll() + .filter((ticket) => ticket.status !== 'resolved') + .map((ticket) => ticket.id); + + if (assignedTicketIds.length > 0) { + this.save.set({ + current_shift_stats: { + ...stats, + assigned_ticket_ids: assignedTicketIds + } + }); + } + } + + _handleTicketActivated({ ticketId }) { + const state = this.save.get(); + const stats = clone(state?.current_shift_stats ?? {}); + const assigned = new Set(toArray(stats.assigned_ticket_ids)); + assigned.add(ticketId); + + this.save.set({ + current_shift_stats: { + ...stats, + assigned_ticket_ids: [...assigned] + } + }); + } + + _handleTicketCompleted(payload) { + const state = this.save.get(); + const stats = clone(state?.current_shift_stats ?? {}); + const resolved = toArray(stats.resolved_tickets); + const existing = resolved.find((entry) => entry.ticket_id === payload.ticketId); + const nextResolved = existing + ? resolved.map((entry) => entry.ticket_id === payload.ticketId ? { ...entry, ...payload } : entry) + : [...resolved, { + ticket_id: payload.ticketId, + branch_id: payload.branchId ?? null, + trust_delta: Number(payload.trustDelta) || 0, + activated_at: payload.activatedAt ?? null, + resolved_at: payload.resolvedAt ?? new Date(this.now()).toISOString() + }]; + + const flaggedIssues = [...toArray(stats.flagged_issues)]; + if (Number(payload.trustDelta) <= 0) { + flaggedIssues.push({ + ticket_id: payload.ticketId, + type: 'wrong_approach', + detail: payload.branchId + ? `${payload.ticketId} resolved with ${payload.branchId}` + : `${payload.ticketId} resolved with a risky branch` + }); + } + + this.save.set({ + current_shift_stats: { + ...stats, + resolved_tickets: nextResolved, + flagged_issues: flaggedIssues + } + }); + } + + _handleShiftEnded(snapshot) { + const state = this.save.get(); + const stats = clone(state?.current_shift_stats ?? {}); + const assigned = toArray(stats.assigned_ticket_ids); + const resolved = toArray(stats.resolved_tickets); + const resolvedIds = new Set(resolved.map((entry) => entry.ticket_id)); + const timedOutIds = assigned.filter((ticketId) => !resolvedIds.has(ticketId)); + + const flaggedIssues = [ + ...toArray(stats.flagged_issues), + ...timedOutIds.map((ticketId) => ({ + ticket_id: ticketId, + type: 'timed_out', + detail: `${ticketId} rolled into the next shift unresolved` + })) + ]; + + const durations = resolved + .map((entry) => safeDurationSeconds(entry.activated_at, entry.resolved_at)) + .filter((value) => Number.isInteger(value)); + + const averageResolutionSeconds = durations.length > 0 + ? Math.round(durations.reduce((sum, value) => sum + value, 0) / durations.length) + : null; + + const review = { + shift_number: state?.shift_number ?? 1, + started_at: state?.shift_started_at ?? snapshot?.startedAt ?? new Date(this.now()).toISOString(), + ended_at: new Date(this.now()).toISOString(), + tickets_assigned: assigned.length, + tickets_resolved: resolved.length, + average_resolution_seconds: averageResolutionSeconds, + flagged_issues: flaggedIssues, + performance_tier: this._performanceTier({ + assignedCount: assigned.length, + resolvedCount: resolved.length, + flaggedIssueCount: flaggedIssues.length + }), + reviewer: 'Priya Nair' + }; + + const reviewSentence = this._reviewSentence(review.performance_tier); + const reviewBody = [ + `Shift ${review.shift_number} performance review`, + '', + `Tickets resolved: ${review.tickets_resolved}/${review.tickets_assigned}`, + `Average resolution time: ${this._formatDuration(review.average_resolution_seconds)}`, + `Flagged issues: ${flaggedIssues.length === 0 ? 'none' : flaggedIssues.map((issue) => issue.detail).join('; ')}`, + '', + reviewSentence + ].join('\n'); + + const nextShiftNumber = review.shift_number + 1; + const nextShiftStartedAt = new Date(this.now()).toISOString(); + + this.save.set({ + shift_number: nextShiftNumber, + shift_started_at: nextShiftStartedAt, + shift_history: [...toArray(state?.shift_history), review], + current_shift_stats: { + assigned_ticket_ids: this.tickets.getAll() + .filter((ticket) => ticket.status !== 'resolved') + .map((ticket) => ticket.id), + resolved_tickets: [], + flagged_issues: [] + } + }); + + this.email.send({ + id: `mail-shift-review-${review.shift_number}-${Date.now()}`, + from: 'Priya Nair ', + subject: `Shift ${review.shift_number} review`, + body: reviewBody, + attachments: [], + replyOptions: [] + }); + } + + _performanceTier({ assignedCount, resolvedCount, flaggedIssueCount }) { + if (assignedCount > 0 && resolvedCount === assignedCount && flaggedIssueCount === 0) { + return 'excellent'; + } + + if (resolvedCount > 0 && flaggedIssueCount <= 1) { + return 'ok'; + } + + return 'poor'; + } + + _reviewSentence(tier) { + const dialogue = this.loader.get('dialogue', 'priya-shift-review'); + const message = toArray(dialogue?.messages).find((entry) => entry.stage === tier); + return message?.body ?? 'Shift closed.'; + } + + _formatDuration(value) { + if (!Number.isInteger(value) || value < 0) { + return 'n/a'; + } + + const minutes = Math.floor(value / 60); + const seconds = value % 60; + return `${minutes}m ${String(seconds).padStart(2, '0')}s`; + } +} + +export const shiftReviewService = new ShiftReviewService(); diff --git a/server/src/services/ShiftReviewService.test.js b/server/src/services/ShiftReviewService.test.js new file mode 100644 index 0000000..aaab212 --- /dev/null +++ b/server/src/services/ShiftReviewService.test.js @@ -0,0 +1,92 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { EventEmitter } from 'node:events'; + +import { ShiftReviewService } from './ShiftReviewService.js'; + +function createSave(initialState) { + let state = structuredClone(initialState); + return { + get() { + return state; + }, + set(partial) { + state = { + ...state, + ...partial, + current_shift_stats: partial.current_shift_stats + ? { ...(state.current_shift_stats ?? {}), ...partial.current_shift_stats } + : state.current_shift_stats + }; + return state; + } + }; +} + +test('ShiftReviewService tracks ticket activity and generates a review email on shift end', () => { + const bus = new EventEmitter(); + const sent = []; + const save = createSave({ + shift_number: 1, + shift_started_at: '2026-04-25T12:00:00.000Z', + current_shift_stats: { + assigned_ticket_ids: [], + resolved_tickets: [], + flagged_issues: [] + }, + shift_history: [] + }); + + const service = new ShiftReviewService({ + bus, + save, + email: { + send(payload) { + sent.push(payload); + return payload; + } + }, + tickets: { + getAll() { + return []; + } + }, + loader: { + get(type, id) { + if (type === 'dialogue' && id === 'priya-shift-review') { + return { + messages: [ + { stage: 'excellent', body: 'Strong shift.' }, + { stage: 'ok', body: 'Acceptable shift.' }, + { stage: 'poor', body: 'This shift needs review.' } + ] + }; + } + + return null; + } + }, + now: () => new Date('2026-04-25T12:30:00.000Z').getTime() + }); + + service.initialize(save.get()); + + bus.emit('ticket:activated', { ticketId: 'T001' }); + bus.emit('ticket:completed', { + ticketId: 'T001', + branchId: 'correct-setup', + trustDelta: 1, + activatedAt: '2026-04-25T12:05:00.000Z', + resolvedAt: '2026-04-25T12:15:00.000Z' + }); + bus.emit('shift:ended', { startedAt: '2026-04-25T12:00:00.000Z', remainingSeconds: 0 }); + + const state = save.get(); + assert.equal(state.shift_number, 2); + assert.equal(state.shift_history.length, 1); + assert.equal(state.shift_history[0].tickets_resolved, 1); + assert.equal(state.shift_history[0].tickets_assigned, 1); + assert.equal(state.shift_history[0].performance_tier, 'excellent'); + assert.equal(sent.length, 1); + assert.match(sent[0].subject, /Shift 1 review/); +}); diff --git a/server/src/services/ShiftTimer.js b/server/src/services/ShiftTimer.js new file mode 100644 index 0000000..5a86f76 --- /dev/null +++ b/server/src/services/ShiftTimer.js @@ -0,0 +1,71 @@ +import { eventBus } from '../lib/eventBus.js'; +import { saveState } from './SaveState.js'; + +const DEFAULT_DURATION_SECONDS = Number(process.env.SHIFT_DURATION_SECONDS ?? 2400); +const DEFAULT_TICK_SECONDS = Number(process.env.SHIFT_TICK_SECONDS ?? 30); + +export class ShiftTimer { + constructor({ + durationSeconds = DEFAULT_DURATION_SECONDS, + tickSeconds = DEFAULT_TICK_SECONDS, + save = saveState, + now = () => Date.now() + } = {}) { + this.durationSeconds = durationSeconds; + this.tickSeconds = tickSeconds; + this.save = save; + this.now = now; + this._interval = null; + this._endedShiftStartedAt = null; + } + + start(state = this.save.get()) { + this.stop(); + + if (!state?.shift_started_at) { + this.save.set({ shift_started_at: new Date(this.now()).toISOString() }); + } + + this.tick(); + this._interval = setInterval(() => { + this.tick(); + }, this.tickSeconds * 1000).unref(); + } + + stop() { + if (this._interval) { + clearInterval(this._interval); + this._interval = null; + } + } + + tick() { + const snapshot = this.getSnapshot(); + eventBus.emit('shift:tick', snapshot); + if (snapshot.remainingSeconds === 0 && snapshot.startedAt !== this._endedShiftStartedAt) { + this._endedShiftStartedAt = snapshot.startedAt; + eventBus.emit('shift:ended', snapshot); + } else if (snapshot.remainingSeconds > 0 && snapshot.startedAt !== this._endedShiftStartedAt) { + this._endedShiftStartedAt = null; + } + return snapshot; + } + + getSnapshot(state = this.save.get()) { + const shiftStartedAt = state?.shift_started_at + ? new Date(state.shift_started_at).getTime() + : this.now(); + + const elapsedSeconds = Math.max(0, Math.floor((this.now() - shiftStartedAt) / 1000)); + const remainingSeconds = Math.max(0, this.durationSeconds - elapsedSeconds); + + return { + durationSeconds: this.durationSeconds, + elapsedSeconds, + remainingSeconds, + startedAt: state?.shift_started_at ?? new Date(shiftStartedAt).toISOString() + }; + } +} + +export const shiftTimer = new ShiftTimer(); diff --git a/server/src/services/ShiftTimer.test.js b/server/src/services/ShiftTimer.test.js new file mode 100644 index 0000000..b7c740f --- /dev/null +++ b/server/src/services/ShiftTimer.test.js @@ -0,0 +1,23 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { ShiftTimer } from './ShiftTimer.js'; + +test('ShiftTimer computes elapsed and remaining seconds from shift start', () => { + const now = new Date('2026-04-25T12:00:00Z').getTime(); + const timer = new ShiftTimer({ + durationSeconds: 2400, + tickSeconds: 30, + save: { + get() { + return { shift_started_at: '2026-04-25T11:30:00Z' }; + } + }, + now: () => now + }); + + const snapshot = timer.getSnapshot(); + assert.equal(snapshot.elapsedSeconds, 1800); + assert.equal(snapshot.remainingSeconds, 600); + assert.equal(snapshot.durationSeconds, 2400); +}); diff --git a/server/src/services/TicketService.js b/server/src/services/TicketService.js new file mode 100644 index 0000000..96e3768 --- /dev/null +++ b/server/src/services/TicketService.js @@ -0,0 +1,375 @@ +import { eventBus } from '../lib/eventBus.js'; +import { createError } from '../lib/utils.js'; +import { contentLoader } from './ContentLoader.js'; +import { emailService } from './EmailService.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; +import { trustSystem } from './TrustSystem.js'; +import { validationEngine } from './ValidationEngine.js'; +import { behaviorTracker } from './BehaviorTracker.js'; + +class TicketService { + constructor() { + this._tickets = new Map(); + } + + initialize(state) { + this._tickets = new Map(); + + for (const [ticketId, entry] of Object.entries(state.tickets ?? {})) { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + continue; + } + + const questState = ticket.linked_quest ? questEngine.getState(ticket.linked_quest) : 'locked'; + if (!['active', 'completed', 'failed'].includes(questState)) { + continue; + } + + this._tickets.set(ticketId, this._normalizeEntry(entry)); + } + + for (const [questId, entry] of questEngine.getAllEntries()) { + if (entry?.state === 'active') { + const quest = contentLoader.get('quests', questId); + if (quest?.ticket_id) { + this.activateTicket(quest.ticket_id); + } + } + + if (entry?.state === 'completed') { + const quest = contentLoader.get('quests', questId); + if (quest?.ticket_id) { + const ticketEntry = this._tickets.get(quest.ticket_id); + if (!ticketEntry || ticketEntry.status !== 'resolved') { + this._tickets.set(quest.ticket_id, { + status: 'resolved', + activated_at: ticketEntry?.activated_at ?? entry.started_at ?? null, + resolved_at: ticketEntry?.resolved_at ?? entry.completed_at ?? null + }); + } + } + } + } + + this._persist(); + } + + getAll() { + return [...this._tickets.entries()] + .filter(([ticketId]) => this._isPrimaryTicketId(ticketId)) + .sort(([leftId], [rightId]) => leftId.localeCompare(rightId)) + .map(([ticketId, entry]) => { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + return null; + } + + return { + id: ticket.id, + subject: ticket.subject, + priority: entry.current_priority ?? ticket.current_priority, + status: entry.status ?? 'open', + linked_quest: ticket.linked_quest + }; + }) + .filter(Boolean); + } + + getEntry(ticketId) { + const entry = this._tickets.get(ticketId); + if (!entry) { + return null; + } + + return { ...entry }; + } + + activateTicket(ticketId) { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + throw createError(`Unknown ticket: ${ticketId}`, 404); + } + + const existing = this._tickets.get(ticketId); + if (existing) { + return existing.status; + } + + this._tickets.set(ticketId, { + status: 'open', + current_priority: ticket.current_priority, + activated_at: new Date().toISOString(), + resolved_at: null + }); + this._persist(); + eventBus.emit('ticket:activated', { ticketId }); + return 'open'; + } + + getDetail(ticketId) { + const ticket = contentLoader.get('tickets', ticketId); + const entry = this._tickets.get(ticketId); + + if (!ticket || !entry) { + return null; + } + + return { + ...ticket, + status: entry.status ?? 'open', + activated_at: entry.activated_at ?? null, + resolved_at: entry.resolved_at ?? null + }; + } + + setStatus(ticketId, status) { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + throw createError(`Unknown ticket: ${ticketId}`, 404); + } + + const existing = this._tickets.get(ticketId) ?? this._defaultEntry(ticket); + const nextEntry = { + ...existing, + status + }; + + if (status === 'resolved' && !nextEntry.resolved_at) { + nextEntry.resolved_at = new Date().toISOString(); + } + + this._tickets.set(ticketId, nextEntry); + this._persist(); + } + + setPriority(ticketId, priority) { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + throw createError(`Unknown ticket: ${ticketId}`, 404); + } + + const existing = this._tickets.get(ticketId) ?? this._defaultEntry(ticket); + this._tickets.set(ticketId, { + ...existing, + current_priority: priority + }); + this._persist(); + } + + async markComplete(ticketId, options = {}) { + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + return { passed: false, reason: 'ticket_not_found' }; + } + + const ticketEntry = this._tickets.get(ticketId); + if (!ticketEntry) { + return { passed: false, reason: 'ticket_not_active' }; + } + + if (ticketEntry.status === 'resolved') { + return { + passed: true, + branch: ticketEntry.branch_id ?? null, + trust_delta: 0, + failures: [], + ticket_status: 'resolved', + already_resolved: true + }; + } + + const quest = ticket.linked_quest ? contentLoader.get('quests', ticket.linked_quest) : null; + if (!quest) { + return { passed: false, reason: 'quest_not_found' }; + } + + if (!questEngine.isActive(quest.id)) { + if (!questEngine.canActivate(quest.id)) { + return { passed: false, reason: 'quest_locked' }; + } + + questEngine.activate(quest.id); + } + + let branch = null; + let failures = []; + + if (options.branchId) { + branch = this._selectBranch(quest, options.branchId); + if (!branch) { + return { passed: false, reason: 'branch_not_found' }; + } + } else { + const validationResult = await validationEngine.resolveBranch(quest); + branch = validationResult.branch; + failures = validationResult.failures; + if (!branch) { + return { + passed: false, + reason: 'validation_failed', + failures + }; + } + } + + const appliedFlags = this._appendWorldFlags(branch.world_flags ?? []); + const trustDelta = Number(branch.trust_delta) || 0; + + if (trustDelta !== 0) { + trustSystem.adjust(trustDelta); + } + + questEngine.complete(quest.id, { branchId: branch.id }); + + const behaviorImpact = branch.behavior_impact + ?? quest.behavior_impact?.[branch.id] + ?? quest.behavior_impact?.default + ?? null; + if (behaviorImpact && typeof behaviorImpact === 'object') { + behaviorTracker.apply(behaviorImpact); + } + + const nextEntry = { + ...ticketEntry, + status: 'resolved', + resolved_at: new Date().toISOString(), + branch_id: branch.id + }; + this._tickets.set(ticketId, nextEntry); + + const followUpTicketId = this._activateFollowUpTicket(branch.follow_up_ticket ?? null); + const followUpMailIds = []; + + if (trustDelta <= 0) { + for (const dialogueId of this._collectFollowUpDialogues(branch)) { + const mail = emailService.sendDialogueFollowUp(dialogueId, { + questId: quest.id, + ticketId: ticket.id, + subjectPrefix: `Follow-up on ${ticket.id}`, + idPrefix: `mail-${ticket.id}` + }); + if (mail?.id) { + followUpMailIds.push(mail.id); + } + } + } + + this._persist(); + eventBus.emit('ticket:completed', { + ticketId, + questId: quest.id, + branchId: branch.id, + trustDelta, + activatedAt: nextEntry.activated_at ?? ticketEntry.activated_at ?? null, + resolvedAt: nextEntry.resolved_at + }); + + return { + passed: true, + branch: branch.id, + trust_delta: trustDelta, + failures: [], + ticket_status: 'resolved', + world_flags: appliedFlags, + follow_up_ticket: followUpTicketId, + follow_up_mail_ids: followUpMailIds + }; + } + + _persist() { + saveState.set({ tickets: Object.fromEntries(this._tickets) }); + } + + _selectBranch(quest, branchId) { + const branches = [...(quest.solution_branches ?? [])].sort((left, right) => (right.priority ?? 0) - (left.priority ?? 0)); + if (branchId) { + return branches.find((branch) => branch.id === branchId) ?? null; + } + + return branches[0] ?? null; + } + + _appendWorldFlags(flags) { + const state = saveState.get(); + const nextFlags = new Set(state?.world_flags ?? []); + + for (const flag of flags) { + nextFlags.add(flag); + } + + const snapshot = [...nextFlags]; + saveState.set({ world_flags: snapshot }); + return snapshot; + } + + _activateFollowUpTicket(ticketId) { + if (!ticketId) { + return null; + } + + const ticket = contentLoader.get('tickets', ticketId); + if (!ticket) { + return null; + } + + const questId = ticket.linked_quest; + if (questId && !questEngine.isCompleted(questId) && questEngine.canActivate(questId)) { + questEngine.activate(questId); + } + + this.activateTicket(ticketId); + return ticketId; + } + + _collectFollowUpDialogues(branch) { + const ids = []; + + if (branch.follow_up_dialogue) { + ids.push(branch.follow_up_dialogue); + } + + for (const id of branch.follow_up_dialogues ?? []) { + ids.push(id); + } + + return ids; + } + + _normalizeEntry(entry) { + if (typeof entry === 'string') { + return { + status: entry, + current_priority: null, + activated_at: null, + resolved_at: null, + branch_id: null + }; + } + + return { + status: entry?.status ?? 'open', + current_priority: entry?.current_priority ?? null, + activated_at: entry?.activated_at ?? null, + resolved_at: entry?.resolved_at ?? null, + branch_id: entry?.branch_id ?? null + }; + } + + _defaultEntry(ticket) { + return { + status: 'open', + current_priority: ticket.current_priority, + activated_at: new Date().toISOString(), + resolved_at: null, + branch_id: null + }; + } + + _isPrimaryTicketId(ticketId) { + return /^T\d{3}$/.test(ticketId); + } +} + +export const ticketService = new TicketService(); diff --git a/server/src/services/TicketService.test.js b/server/src/services/TicketService.test.js new file mode 100644 index 0000000..d3c4d91 --- /dev/null +++ b/server/src/services/TicketService.test.js @@ -0,0 +1,86 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import os from 'os'; +import path from 'path'; +import { rm } from 'fs/promises'; + +import { contentLoader } from './ContentLoader.js'; +import { emailService } from './EmailService.js'; +import { progressionSystem } from './ProgressionSystem.js'; +import { questEngine } from './QuestEngine.js'; +import { saveState } from './SaveState.js'; +import { ticketService } from './TicketService.js'; +import { trustSystem } from './TrustSystem.js'; +import { validationEngine } from './ValidationEngine.js'; + +async function resetState(testId) { + await saveState._writeQueue.catch(() => {}); + process.env.CONTENT_DIR = path.resolve(process.cwd(), '../content'); + process.env.SAVE_DIR = path.join(os.tmpdir(), `sc-server-test-${testId}-${Date.now()}`); + + saveState._savePath = null; + saveState._state = null; + saveState._writeQueue = Promise.resolve(); + + await rm(process.env.SAVE_DIR, { recursive: true, force: true }); + await contentLoader.load(); + await saveState.load(); + + const state = saveState.get(); + progressionSystem.initialize(state); + trustSystem.initialize(state); + questEngine.initialize(state); + ticketService.initialize(state); + emailService.initialize(state); + await saveState._writeQueue.catch(() => {}); +} + +test('fresh state only exposes T001 as the active ticket', async () => { + await resetState('fresh'); + + const tickets = ticketService.getAll(); + assert.deepEqual(tickets.map((ticket) => ticket.id), ['T001']); + assert.equal(questEngine.getState('Q001'), 'active'); + assert.equal(questEngine.getState('Q002'), 'locked'); +}); + +test('markComplete resolves the active ticket, unlocks world flags, and activates the next ticket', async () => { + await resetState('complete-clean'); + + const originalResolveBranch = validationEngine.resolveBranch.bind(validationEngine); + validationEngine.resolveBranch = async (quest) => ({ + branch: quest.solution_branches.find((branch) => branch.id === 'correct-setup'), + failures: [] + }); + + try { + const result = await ticketService.markComplete('T001'); + assert.equal(result.passed, true); + assert.equal(result.branch, 'correct-setup'); + assert.equal(result.trust_delta, 1); + assert.equal(ticketService.getEntry('T001').status, 'resolved'); + assert.equal(ticketService.getEntry('T002').status, 'open'); + assert.equal(questEngine.getState('Q001'), 'completed'); + assert.equal(questEngine.getState('Q002'), 'active'); + assert.equal(trustSystem.getScore(), 51); + + const state = saveState.get(); + assert.ok(state.world_flags.includes('player_ssh_configured')); + } finally { + validationEngine.resolveBranch = originalResolveBranch; + } +}); + +test('non-positive branch outcomes send follow-up mail and still open the next ticket', async () => { + await resetState('complete-permissive'); + + const result = await ticketService.markComplete('T001', { branchId: 'permissive-setup' }); + assert.equal(result.passed, true); + assert.equal(result.branch, 'permissive-setup'); + assert.equal(result.trust_delta, 0); + assert.equal(ticketService.getEntry('T002').status, 'open'); + assert.ok(result.follow_up_mail_ids.length > 0); + + const followUp = emailService.getAll().find((mail) => result.follow_up_mail_ids.includes(mail.id)); + assert.ok(followUp); +}); diff --git a/server/src/services/TrustSystem.js b/server/src/services/TrustSystem.js new file mode 100644 index 0000000..c44c6b0 --- /dev/null +++ b/server/src/services/TrustSystem.js @@ -0,0 +1,43 @@ +import { eventBus } from '../lib/eventBus.js'; +import { contentLoader } from './ContentLoader.js'; +import { progressionSystem } from './ProgressionSystem.js'; +import { saveState } from './SaveState.js'; + +class TrustSystem { + constructor() { + this._score = 50; + } + + initialize(state) { + this._score = Number(state.trust ?? 50); + this._evaluateUnlocks(); + } + + getScore() { + return this._score; + } + + adjust(delta) { + const numericDelta = Number(delta) || 0; + const previousScore = this._score; + this._score = Math.max(0, Math.min(100, previousScore + numericDelta)); + + this._evaluateUnlocks(); + eventBus.emit('trust:changed', { score: this._score, delta: numericDelta }); + saveState.set({ trust: this._score }); + } + + _evaluateUnlocks() { + for (const unlock of contentLoader.trustUnlocks ?? []) { + if (this._score >= unlock.trust_threshold) { + progressionSystem.grantUnlock(unlock); + } + + if (unlock.revokes_below_trust >= 0 && this._score < unlock.revokes_below_trust) { + progressionSystem.revokeUnlock(unlock); + } + } + } +} + +export const trustSystem = new TrustSystem(); diff --git a/server/src/services/VMManager.js b/server/src/services/VMManager.js new file mode 100644 index 0000000..992ddc9 --- /dev/null +++ b/server/src/services/VMManager.js @@ -0,0 +1,165 @@ +import { contentLoader } from './ContentLoader.js'; +import { runVirsh } from '../lib/virsh.js'; + +const DEFAULT_VM_PREFIX = process.env.VM_PREFIX ?? 'sc-'; +const IP_CACHE_TTL_MS = 60_000; + +export function extractIpv4Address(text) { + const match = String(text ?? '').match(/\b(\d{1,3}(?:\.\d{1,3}){3})(?:\/\d+)?\b/); + return match ? match[1] : ''; +} + +export function extractMacAddress(domainXml) { + const match = String(domainXml ?? '').match(/ Date.now()) { + return cached.ip; + } + } + + if (!await this.domainExists(vmId)) { + return ''; + } + + const domainName = this.getDomainName(vmId); + const ipFromAgent = await this._ipFromDomifaddr(domainName); + if (ipFromAgent) { + this._cacheIp(cacheKey, ipFromAgent); + return ipFromAgent; + } + + const ipFromDhcp = await this._ipFromDhcp(vmId, domainName); + if (ipFromDhcp) { + this._cacheIp(cacheKey, ipFromDhcp); + return ipFromDhcp; + } + + return ''; + } + + _cacheIp(cacheKey, ip) { + this.ipCache.set(cacheKey, { + ip, + expiresAt: Date.now() + IP_CACHE_TTL_MS + }); + } + + async _ipFromDomifaddr(domainName) { + const result = await this.virshRunner(['domifaddr', domainName, '--source', 'agent']); + if (!result.ok) { + return ''; + } + + return extractIpv4Address(result.stdout); + } + + async _ipFromDhcp(vmId, domainName) { + const profile = this.getProfile(vmId); + const networkName = profile.network?.libvirt_network ?? 'sc-internal'; + const xmlResult = await this.virshRunner(['dumpxml', domainName]); + if (!xmlResult.ok) { + return ''; + } + + const macAddress = extractMacAddress(xmlResult.stdout); + if (!macAddress) { + return ''; + } + + const leasesResult = await this.virshRunner(['net-dhcp-leases', networkName]); + if (!leasesResult.ok) { + return ''; + } + + return findLeaseIpByMac(leasesResult.stdout, macAddress); + } +} + +export const vmManager = new VMManager(); diff --git a/server/src/services/ValidationEngine.js b/server/src/services/ValidationEngine.js new file mode 100644 index 0000000..1572ea9 --- /dev/null +++ b/server/src/services/ValidationEngine.js @@ -0,0 +1,334 @@ +import { runSSH } from '../lib/ssh.js'; +import { vmManager } from './VMManager.js'; + +function shellQuote(value) { + return `'${String(value).replace(/'/g, `'\\''`)}'`; +} + +function normalizeMode(value) { + return String(value ?? '').replace(/^0+/, '').padStart(4, '0'); +} + +function buildFailure(rule, detail = '') { + const vm = rule.vm ? `${rule.vm}:` : ''; + const path = rule.path ?? rule.service ?? rule.package ?? rule.command ?? rule.port ?? ''; + const suffix = detail ? ` (${detail})` : ''; + return `${vm}${rule.type}:${path}${suffix}`; +} + +export class ValidationEngine { + constructor({ vmManager: vmManagerInstance = vmManager, sshRunner = runSSH } = {}) { + this.vmManager = vmManagerInstance; + this.sshRunner = sshRunner; + } + + async resolveBranch(quest) { + const branches = [...(quest.solution_branches ?? [])].sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0)); + const failures = []; + + for (const branch of branches) { + const result = await this.evaluateBranch(branch); + if (result.passed) { + return { + branch, + failures: [] + }; + } + + failures.push(...result.failures.map((failure) => `${branch.id}:${failure}`)); + } + + return { + branch: null, + failures + }; + } + + async evaluateBranch(branch) { + return await this.evaluateRule(branch.validation ?? {}, ''); + } + + async evaluateRule(rule, defaultVmId = '') { + const type = rule?.type ?? ''; + const targetVm = rule?.vm ?? defaultVmId; + + switch (type) { + case 'and': + return await this._evalAnd(rule, targetVm); + case 'or': + return await this._evalOr(rule, targetVm); + case 'not': + return await this._evalNot(rule, targetVm); + case 'file_exists': + return await this._remoteAssert(targetVm, `test -e ${shellQuote(rule.path)}`, rule); + case 'file_absent': + return await this._remoteAssert(targetVm, `test ! -e ${shellQuote(rule.path)}`, rule); + case 'directory_exists': + return await this._remoteAssert(targetVm, `test -d ${shellQuote(rule.path)}`, rule); + case 'file_contains': + case 'log_contains': + return await this._remoteAssert( + targetVm, + `grep -qF ${shellQuote(rule.contains ?? '')} ${shellQuote(rule.path)}`, + rule + ); + case 'file_mode': + case 'file_mode_matches': + return await this._checkFileMode(targetVm, rule); + case 'file_owner': + case 'file_owner_matches': + return await this._checkFileOwner(targetVm, rule, false); + case 'file_owner_is_not': + return await this._checkFileOwner(targetVm, { + ...rule, + user: rule.expected_user ?? rule.user ?? '', + group: rule.expected_group ?? rule.group ?? '' + }, true); + case 'service_state': + case 'service_state_is': + case 'service_state_matches': + return await this._checkServiceState(targetVm, rule); + case 'service_enabled': + case 'service_enabled_is': + return await this._checkServiceEnabled(targetVm, rule); + case 'process_running': + return await this._remoteAssert(targetVm, `pgrep -x ${shellQuote(rule.process)}`, rule); + case 'process_user': + return await this._remoteAssert(targetVm, `pgrep -x -u ${shellQuote(rule.user)} ${shellQuote(rule.process)}`, rule); + case 'port_listening': + return await this._checkPortListening(targetVm, rule); + case 'package_installed': + return await this._checkPackageInstalled(targetVm, rule); + case 'mount_present': + return await this._remoteAssert(targetVm, `findmnt -M ${shellQuote(rule.path)}`, rule); + case 'disk_usage_below': + return await this._checkDiskUsage(targetVm, rule, 'below'); + case 'disk_usage_above': + return await this._checkDiskUsage(targetVm, rule, 'above'); + case 'command_assert': + return await this._checkCommandAssert(targetVm, rule); + default: + return { + passed: false, + failures: [buildFailure(rule, 'unsupported-rule')] + }; + } + } + + async _evalAnd(rule, targetVm) { + const failures = []; + for (const subRule of rule.rules ?? []) { + const result = await this.evaluateRule(subRule, targetVm); + if (!result.passed) { + failures.push(...result.failures); + } + } + + return { + passed: failures.length === 0, + failures + }; + } + + async _evalOr(rule, targetVm) { + const failures = []; + for (const subRule of rule.rules ?? []) { + const result = await this.evaluateRule(subRule, targetVm); + if (result.passed) { + return { passed: true, failures: [] }; + } + + failures.push(...result.failures); + } + + return { + passed: false, + failures: failures.length > 0 ? failures : [buildFailure(rule, 'no-branches-matched')] + }; + } + + async _evalNot(rule, targetVm) { + const result = await this.evaluateRule(rule.rule ?? {}, targetVm); + return { + passed: !result.passed, + failures: result.passed ? [buildFailure(rule.rule ?? rule, 'negated-rule-matched')] : [] + }; + } + + async _checkFileMode(targetVm, rule) { + const result = await this._remoteExec( + targetVm, + `stat -c %a ${shellQuote(rule.path)}` + ); + + if (!result.ok) { + return { passed: false, failures: [buildFailure(rule)] }; + } + + const actual = normalizeMode(result.stdout.trim()); + const expected = normalizeMode(rule.mode); + return { + passed: actual === expected, + failures: actual === expected ? [] : [buildFailure(rule, `expected ${expected}, got ${actual}`)] + }; + } + + async _checkFileOwner(targetVm, rule, negate = false) { + const result = await this._remoteExec( + targetVm, + `stat -c %U:%G ${shellQuote(rule.path)}` + ); + + if (!result.ok) { + return { passed: false, failures: [buildFailure(rule)] }; + } + + const [actualUser = '', actualGroup = ''] = result.stdout.trim().split(':'); + const expectedUser = rule.user ?? ''; + const expectedGroup = rule.group ?? ''; + const matches = (!expectedUser || actualUser === expectedUser) && (!expectedGroup || actualGroup === expectedGroup); + const passed = negate ? !matches : matches; + + return { + passed, + failures: passed ? [] : [buildFailure(rule, `got ${actualUser}:${actualGroup}`)] + }; + } + + async _checkServiceState(targetVm, rule) { + const result = await this._remoteExec( + targetVm, + `systemctl is-active ${shellQuote(rule.service)}` + ); + + const actual = result.ok ? result.stdout.trim() : result.stdout.trim() || result.stderr.trim() || 'unknown'; + const expected = rule.state ?? 'active'; + return { + passed: actual === expected, + failures: actual === expected ? [] : [buildFailure(rule, `expected ${expected}, got ${actual}`)] + }; + } + + async _checkServiceEnabled(targetVm, rule) { + const result = await this._remoteExec( + targetVm, + `systemctl is-enabled ${shellQuote(rule.service)}` + ); + + const expected = rule.enabled ?? true; + const actualEnabled = result.ok && result.stdout.trim() === 'enabled'; + return { + passed: actualEnabled === expected, + failures: actualEnabled === expected ? [] : [buildFailure(rule, `expected enabled=${expected}`)] + }; + } + + async _checkPortListening(targetVm, rule) { + const port = Number(rule.port); + const protocol = String(rule.protocol ?? 'tcp').toLowerCase(); + const listening = rule.listening ?? true; + const ssFlags = protocol === 'udp' ? '-lun' : '-ltn'; + const result = await this._remoteExec( + targetVm, + `ss ${ssFlags} | grep -Eq '[:.]${port}(\\s|$)'` + ); + + return { + passed: result.ok === listening, + failures: result.ok === listening ? [] : [buildFailure(rule, `expected listening=${listening}`)] + }; + } + + async _checkPackageInstalled(targetVm, rule) { + const packageSpec = String(rule.package ?? ''); + const shouldBeInstalled = rule.installed ?? true; + const distro = this.vmManager.getDistro(targetVm); + const [packageName, version] = packageSpec.split('='); + + let command = ''; + if (distro === 'arch') { + command = version + ? `pacman -Q ${shellQuote(packageName)} | grep -F ${shellQuote(version)}` + : `pacman -Q ${shellQuote(packageName)}`; + } else { + command = version + ? `dpkg-query -W -f='\\${Status} \\${Version}\\n' ${shellQuote(packageName)} | grep -F ${shellQuote(`install ok installed ${version}`)}` + : `dpkg-query -W -f='\\${Status}\\n' ${shellQuote(packageName)} | grep -F 'install ok installed'`; + } + + const result = await this._remoteExec(targetVm, command); + return { + passed: result.ok === shouldBeInstalled, + failures: result.ok === shouldBeInstalled ? [] : [buildFailure(rule, `expected installed=${shouldBeInstalled}`)] + }; + } + + async _checkDiskUsage(targetVm, rule, mode) { + const path = rule.path ?? '/'; + const threshold = Number(rule.threshold_percent ?? rule.percent ?? 0); + const result = await this._remoteExec( + targetVm, + `df -P ${shellQuote(path)} | tail -1 | awk '{print $5}' | tr -d '% '` + ); + + if (!result.ok) { + return { passed: false, failures: [buildFailure(rule)] }; + } + + const actual = Number(result.stdout.trim()); + const passed = mode === 'below' ? actual < threshold : actual > threshold; + return { + passed, + failures: passed ? [] : [buildFailure(rule, `expected ${mode} ${threshold}, got ${actual}`)] + }; + } + + async _checkCommandAssert(targetVm, rule) { + const result = await this._remoteExec(targetVm, rule.command ?? '', { rawShell: true }); + const expectedExitCode = Number(rule.exit_code ?? 0); + const stdoutContains = rule.stdout_contains ?? rule.contains ?? ''; + const passedExit = result.code === expectedExitCode; + const passedOutput = !stdoutContains || result.stdout.includes(stdoutContains); + const passed = passedExit && passedOutput; + + return { + passed, + failures: passed ? [] : [buildFailure(rule, `expected exit=${expectedExitCode}`)] + }; + } + + async _remoteAssert(targetVm, shellCommand, rule) { + const result = await this._remoteExec(targetVm, shellCommand); + return { + passed: result.ok, + failures: result.ok ? [] : [buildFailure(rule)] + }; + } + + async _remoteExec(targetVm, shellCommand, options = {}) { + const host = await this.vmManager.getIP(targetVm); + if (!host) { + return { + ok: false, + code: 255, + stdout: '', + stderr: `Unable to resolve host for ${targetVm}`, + command: shellCommand + }; + } + + const managementUser = this.vmManager.getManagementUser(targetVm); + const keyPath = this.vmManager.getSshKeyPath(targetVm); + const wrappedCommand = `sudo bash -lc ${shellQuote(shellCommand)}`; + + return await this.sshRunner({ + host, + user: managementUser, + keyPath, + command: wrappedCommand, + timeoutSec: options.timeoutSec ?? 15 + }); + } +} + +export const validationEngine = new ValidationEngine(); diff --git a/server/src/services/ValidationEngine.test.js b/server/src/services/ValidationEngine.test.js new file mode 100644 index 0000000..6bc821b --- /dev/null +++ b/server/src/services/ValidationEngine.test.js @@ -0,0 +1,85 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { contentLoader } from './ContentLoader.js'; +import { ValidationEngine } from './ValidationEngine.js'; + +function createFakeEngine(overrides = {}) { + const fakeVmManager = { + async getIP() { + return '10.42.0.41'; + }, + getManagementUser() { + return 'opsbridge'; + }, + getSshKeyPath() { + return '~/.ssh/sc_host_key'; + }, + getDistro(vmId) { + return vmId === 'build_machine' ? 'arch' : 'debian'; + } + }; + + const sshRunner = async ({ command }) => { + for (const [matcher, result] of overrides) { + const matched = + typeof matcher === 'string' + ? command.includes(matcher) + : matcher instanceof RegExp + ? matcher.test(command) + : typeof matcher === 'function' + ? matcher(command) + : false; + + if (matched) { + return { + ok: result.ok, + code: result.code ?? (result.ok ? 0 : 1), + stdout: result.stdout ?? '', + stderr: result.stderr ?? '' + }; + } + } + + return { ok: false, code: 1, stdout: '', stderr: `unmatched:${command}` }; + }; + + return new ValidationEngine({ + vmManager: fakeVmManager, + sshRunner + }); +} + +test('ValidationEngine selects permissive Q001 branch when owner matches but modes do not', async () => { + process.env.CONTENT_DIR = '../content'; + await contentLoader.load(); + + const engine = createFakeEngine(new Map([ + [(command) => command.includes('test -e') && command.includes('/home/player/.ssh/authorized_keys'), { ok: true, stdout: '' }], + [(command) => command.includes('stat -c %a') && command.includes('/home/player/.ssh/authorized_keys'), { ok: true, stdout: '644\n' }], + [(command) => command.includes('stat -c %a') && command.includes('/home/player/.ssh') && !command.includes('authorized_keys'), { ok: true, stdout: '755\n' }], + [(command) => command.includes('stat -c %U:%G') && command.includes('/home/player/.ssh/authorized_keys'), { ok: true, stdout: 'player:player\n' }] + ])); + + const quest = contentLoader.get('quests', 'Q001'); + const result = await engine.resolveBranch(quest); + + assert.equal(result.branch?.id, 'permissive-setup'); +}); + +test('ValidationEngine selects highest-priority passing branch for Q002', async () => { + process.env.CONTENT_DIR = '../content'; + await contentLoader.load(); + + const engine = createFakeEngine(new Map([ + [(command) => command.includes('systemctl is-active') && command.includes('nginx'), { ok: true, stdout: 'active\n' }], + [(command) => command.includes('systemctl is-enabled') && command.includes('nginx'), { ok: true, stdout: 'enabled\n' }], + [(command) => command.includes('ss -ltn') && command.includes('80'), { ok: true, stdout: '' }], + [(command) => command.includes('grep -qF') && command.includes('listen 80;') && command.includes('axiomworks.conf'), { ok: true, stdout: '' }] + ])); + + const quest = contentLoader.get('quests', 'Q002'); + const result = await engine.resolveBranch(quest); + + assert.equal(result.branch?.id, 'config-fixed-enabled'); +}); diff --git a/start-game.sh b/start-game.sh new file mode 100755 index 0000000..c40bd7f --- /dev/null +++ b/start-game.sh @@ -0,0 +1,258 @@ +#!/usr/bin/env bash +# Sysadmin Chronicles — Launcher +# Usage: bash start-game.sh [--stop] [--manage-saves] [--reset-save] + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$SCRIPT_DIR" +SERVER_DIR="$PROJECT_ROOT/server" +FRONTEND_DIR="$PROJECT_ROOT/frontend" +DOMAIN="${SC_WORKSTATION_DOMAIN:-sc-workstation}" +NETWORK="${SC_NETWORK:-sc-internal}" +PORT="${PORT:-3000}" + +export PORT +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +source "$PROJECT_ROOT/tools/lib/ui.sh" +source "$PROJECT_ROOT/tools/lib/config.sh" +source "$PROJECT_ROOT/tools/lib/save.sh" +source "$PROJECT_ROOT/tools/lib/internal-https.sh" + +config_read || true +sc_ensure_internal_certs "$PROJECT_ROOT" +sc_export_internal_https_env + +SERVER_PID="" +VIEWER_PID="" + +_cleanup() { + [ -n "$SERVER_PID" ] && kill "$SERVER_PID" 2>/dev/null || true + [ -n "$VIEWER_PID" ] && kill "$VIEWER_PID" 2>/dev/null || true +} +trap '_cleanup' EXIT INT TERM + +# --------------------------------------------------------------------------- +# Flag handling +# --------------------------------------------------------------------------- + +for arg in "$@"; do + case "$arg" in + --stop) + stopped=false + for pid in $(sc_listen_pids "$PORT" || true); do + if sc_pid_is_repo_server "$pid" "$PROJECT_ROOT"; then + sc_stop_pid "$pid" + stopped=true + fi + done + if [ "$stopped" = true ]; then + echo " ✓ Game server stopped." + else + echo " (no running game server found)" + fi + exit 0 + ;; + --manage-saves) + exec bash "$PROJECT_ROOT/tools/save/manage-saves.sh" + ;; + --reset-save) + echo "" + echo " This will reset your current save to a new game." + printf " Type RESET to confirm: " >/dev/tty + read -r _c /dev/null 2>&1; then + echo "" + echo " Your system is missing the virtual machine tools." + echo " Run install.sh to set up the game." + echo "" + exit 1 +fi + +if ! command -v node >/dev/null 2>&1; then + echo "" + echo " Node.js is required but wasn't found." + echo " Run install.sh to set up the game." + echo "" + exit 1 +fi + +# Check images directory is accessible +SC_IMAGES_DIR="${SC_IMAGES_DIR:-}" +if [ -n "$SC_IMAGES_DIR" ] && [ ! -d "$SC_IMAGES_DIR" ]; then + echo "" + echo " Can't find your game world." + echo " The VM images are stored at $SC_IMAGES_DIR" + echo " but that location isn't available right now." + echo "" + echo " Is your game drive plugged in and mounted?" + echo " Once it's mounted, run start-game.sh again." + echo "" + exit 1 +fi + +# Check workstation domain exists +if ! virsh dominfo "$DOMAIN" >/dev/null 2>&1; then + echo "" + echo " Your game world hasn't been built yet." + echo " Run install.sh to finish setup." + echo "" + exit 1 +fi + +# Check frontend has been built +if [ ! -f "$FRONTEND_DIR/dist/index.html" ]; then + echo "" + echo " The game interface hasn't been built yet." + echo " Run install.sh to finish setup." + echo "" + exit 1 +fi + +# --------------------------------------------------------------------------- +# Ensure server dependencies are installed +# --------------------------------------------------------------------------- + +if [ ! -d "$SERVER_DIR/node_modules" ]; then + sc_info "Installing server dependencies..." + (cd "$SERVER_DIR" && npm install --silent) +fi + +# --------------------------------------------------------------------------- +# Start server if not already running +# --------------------------------------------------------------------------- + +_server_ready() { + ss -tlnp 2>/dev/null | grep -q ":${PORT} " +} + +_ensure_server_port() { + local pids="" + pids="$(sc_listen_pids "$PORT" || true)" + if [ -z "$pids" ]; then + return 0 + fi + + local pid + for pid in $pids; do + if ! sc_pid_is_repo_server "$pid" "$PROJECT_ROOT"; then + echo "" + echo " Port $PORT is already in use by another process (pid $pid)." + echo " Stop that process or set PORT to a free port before launching." + echo "" + exit 1 + fi + if sc_pid_has_internal_tls "$pid"; then + return 1 + fi + done + + sc_info "Restarting game server with HTTPS enabled..." + for pid in $pids; do + sc_stop_pid "$pid" + done + return 0 +} + +_wait_for_server() { + local timeout=15 + local i=0 + while ! _server_ready; do + sleep 0.3 + (( i++ )) || true + if [ "$i" -ge $(( timeout * 10 / 3 )) ]; then + return 1 + fi + done +} + +if _ensure_server_port; then + ( + cd "$SERVER_DIR" + exec node src/index.js + ) & + SERVER_PID=$! +fi + +# --------------------------------------------------------------------------- +# Ensure network is active +# --------------------------------------------------------------------------- + +if virsh net-list --all 2>/dev/null | grep -q "\\b${NETWORK}\\b"; then + if ! virsh net-info "$NETWORK" 2>/dev/null | grep -q "Active:.*yes"; then + sc_info "Starting game network..." + virsh net-start "$NETWORK" >/dev/null 2>&1 || true + fi +fi + +# --------------------------------------------------------------------------- +# Start workstation VM if not running +# --------------------------------------------------------------------------- + +if [ "$(virsh domstate "$DOMAIN" 2>/dev/null | tr -d ' \n')" != "running" ]; then + virsh start "$DOMAIN" >/dev/null 2>&1 || true +fi + +# --------------------------------------------------------------------------- +# Wait for server, show startup status +# --------------------------------------------------------------------------- + +if [ -n "$SERVER_PID" ] && ! _wait_for_server; then + echo "" + echo " The game server didn't start in time." + echo " Check that port $PORT is available and try again." + echo "" + exit 1 +fi + +echo "" +sc_ok "Game server running" +sc_ok "Workstation online" +echo "" +echo " Opening your desk..." +echo "" + +# --------------------------------------------------------------------------- +# Open SPICE viewer (do not exec — we need to wait and then clean up) +# --------------------------------------------------------------------------- + +_launch_viewer() { + local spice_uri + spice_uri="$(virsh domdisplay "$DOMAIN" 2>/dev/null | grep -i spice | head -n1 || true)" + + if [ -n "$spice_uri" ] && command -v remote-viewer >/dev/null 2>&1; then + remote-viewer "$spice_uri" & + VIEWER_PID=$! + elif command -v virt-viewer >/dev/null 2>&1; then + virt-viewer --connect "$LIBVIRT_DEFAULT_URI" "$DOMAIN" & + VIEWER_PID=$! + elif [ -n "$spice_uri" ] && command -v spicy >/dev/null 2>&1; then + local port + port="$(printf '%s' "$spice_uri" | grep -oE ':[0-9]+' | head -1 | tr -d ':')" + spicy -h 127.0.0.1 -p "${port:-5900}" & + VIEWER_PID=$! + else + echo " No SPICE viewer found (virt-viewer or remote-viewer)." + echo " Run install.sh to set up the game." + exit 1 + fi +} + +_launch_viewer +wait "$VIEWER_PID" 2>/dev/null || true diff --git a/tools/content/validate-content.js b/tools/content/validate-content.js new file mode 100644 index 0000000..3f4d717 --- /dev/null +++ b/tools/content/validate-content.js @@ -0,0 +1,474 @@ +#!/usr/bin/env node +/** + * validate-content.js + * Sysadmin Chronicles content validation tool. + * + * Run from project root: + * node tools/content/validate-content.js + * node tools/content/validate-content.js --quests-only + * node tools/content/validate-content.js --verbose + * + * Exit code 0 = no errors. Exit code 1 = validation errors found. + * + * Checks performed: + * - All JSON files parse correctly + * - No duplicate IDs across content domains + * - Every world flag referenced anywhere exists in world_flags.json + * - Every required_vms entry maps to a known VM profile + * - Every blast_radius entry maps to a known incident ID + * - Every ticket_id in a quest maps to an existing ticket + * - Every linked_quest in a ticket maps to an existing quest + * - Branch priorities within a quest are unique (no ties) + * - Every follow_up_incident maps to an existing incident file + * - Every series_id has at least 2 members + * - clue_fingerprint evidence uses valid rule types + * - file_absent and file_owner_is_not are accepted rule types (OI-010 resolved) + * - Package-manager-specific paths/commands match the authored VM distro + */ + +const fs = require("fs"); +const path = require("path"); + +// --------------------------------------------------------------------------- +// CONFIG +// --------------------------------------------------------------------------- + +const CONTENT_ROOT = path.resolve(__dirname, "../../content"); +const QUESTS_DIR = path.join(CONTENT_ROOT, "quests"); +const TICKETS_DIR = path.join(CONTENT_ROOT, "tickets"); +const INCIDENTS_DIR= path.join(CONTENT_ROOT, "incidents"); +const DIALOGUE_DIR = path.join(CONTENT_ROOT, "dialogue"); +const FLAGS_FILE = path.join(CONTENT_ROOT, "world_flags", "world_flags.json"); +const VM_PROFILES_DIR = path.join(CONTENT_ROOT, "vm_profiles"); +const PRESSURE_PROFILES_DIR = path.join(CONTENT_ROOT, "pressure_profiles"); + +const VALID_RULE_TYPES = new Set([ + // Standard validation rule types (used in objectives and solution branches) + "file_exists", "file_absent", "file_contains", "file_mode", "file_owner", + "file_owner_is_not", "directory_exists", "service_state", "service_enabled", + "process_running", "process_user", "port_listening", "package_installed", + "mount_present", "disk_usage_below", "disk_usage_above", "command_assert", + "log_contains", "and", "or", "not", + // Advisory clue_fingerprint-only types (descriptive evidence, not used in branch validation) + // These document what evidence EXISTS in the VM baseline — not evaluated at runtime. + "service_state_is", "service_enabled_is", "file_size_above", "file_size_below", +]); + +const VALID_NARRATIVE_PHASES = new Set([ + "normal_work", "unease", "suspicion", "investigation", "conflict", "resolution" +]); + +const args = process.argv.slice(2); +const verbose = args.includes("--verbose"); +const questsOnly = args.includes("--quests-only"); + +// --------------------------------------------------------------------------- +// LOAD HELPERS +// --------------------------------------------------------------------------- + +let errors = 0; +let warnings = 0; + +function err(msg) { + console.error(` ❌ ERROR: ${msg}`); + errors++; +} + +function warn(msg) { + console.warn(` ⚠ WARN: ${msg}`); + warnings++; +} + +function ok(msg) { + if (verbose) console.log(` ✓ ${msg}`); +} + +function loadJson(filePath) { + try { + const text = fs.readFileSync(filePath, "utf8"); + return JSON.parse(text); + } catch (e) { + err(`JSON parse error in ${path.relative(CONTENT_ROOT, filePath)}: ${e.message}`); + return null; + } +} + +function loadDir(dirPath, idField = "id") { + const result = {}; + if (!fs.existsSync(dirPath)) { + warn(`Directory not found: ${dirPath}`); + return result; + } + for (const fname of fs.readdirSync(dirPath)) { + if (!fname.endsWith(".json") || fname.startsWith(".")) continue; + // Skip split artifacts. Pending splits still deserve a warning; completed + // split archives and backups are intentional and should stay quiet. + if (fname.includes("SPLIT_PENDING")) { + warn(`Skipping non-standard file: ${fname}`); + continue; + } + if (fname.includes("SPLIT_DONE") || fname.includes(".bak")) { + continue; + } + const data = loadJson(path.join(dirPath, fname)); + if (!data) continue; + if (Array.isArray(data)) { + warn(`${fname} is an array — expected single object with '${idField}' field. Split into individual files.`); + continue; + } + const id = data[idField]; + if (!id) { + err(`Missing '${idField}' field in ${fname}`); + continue; + } + if (result[id]) { + err(`Duplicate ID '${id}' in ${fname} (already seen)`); + } + result[id] = { data, fname }; + } + return result; +} + +// --------------------------------------------------------------------------- +// RULE TYPE VALIDATOR (recursive) +// --------------------------------------------------------------------------- + +function validateRuleTypes(rule, context) { + if (!rule || typeof rule !== "object") return; + const type = rule.type; + if (!type) { + err(`${context}: rule missing 'type' field`); + return; + } + if (!VALID_RULE_TYPES.has(type)) { + err(`${context}: unknown rule type '${type}'`); + return; + } + // Recurse into composite rules + if (type === "and" || type === "or") { + for (const sub of (rule.rules || [])) { + validateRuleTypes(sub, context); + } + } + if (type === "not") { + validateRuleTypes(rule.rule, context); + } +} + +// --------------------------------------------------------------------------- +// COLLECT ALL FLAG REFERENCES IN A RULE (recursive) +// --------------------------------------------------------------------------- + +function collectFlagRefs(obj, refs = new Set()) { + if (!obj || typeof obj !== "object") return refs; + if (Array.isArray(obj)) { + for (const item of obj) collectFlagRefs(item, refs); + return refs; + } + for (const [k, v] of Object.entries(obj)) { + if (k === "world_flags" && Array.isArray(v)) { + for (const f of v) refs.add(f); + } else if (k === "trigger" && typeof v === "string" && v.startsWith("world_flag:")) { + refs.add(v.slice("world_flag:".length)); + } else if (typeof v === "object") { + collectFlagRefs(v, refs); + } + } + return refs; +} + +function distroForVm(vmId, vmProfiles) { + return vmProfiles[vmId]?.data?.distro || ""; +} + +function validateVmSpecificSemantics(rule, context, vmProfiles) { + if (!rule || typeof rule !== "object") return; + if (Array.isArray(rule)) { + for (const item of rule) validateVmSpecificSemantics(item, context, vmProfiles); + return; + } + + const vmId = typeof rule.vm === "string" ? rule.vm : ""; + const distro = distroForVm(vmId, vmProfiles); + const pathValue = typeof rule.path === "string" ? rule.path : ""; + const commandValue = typeof rule.command === "string" ? rule.command : ""; + + if (vmId && distro) { + const pathCtx = `${context} (${vmId}/${distro})`; + if (pathValue === "/etc/pacman.conf" && distro !== "arch") { + err(`${pathCtx}: /etc/pacman.conf is Arch-specific but VM distro is '${distro}'`); + } + if (pathValue.startsWith("/etc/apt/") && distro === "arch") { + err(`${pathCtx}: /etc/apt/* is Debian/Ubuntu-specific but VM distro is 'arch'`); + } + if (commandValue.includes("pacman") && distro !== "arch") { + err(`${pathCtx}: command references pacman but VM distro is '${distro}'`); + } + if ((commandValue.includes("apt ") || commandValue.includes("apt-get") || commandValue.includes("dpkg ")) && distro === "arch") { + err(`${pathCtx}: command references apt/dpkg but VM distro is 'arch'`); + } + } + + for (const value of Object.values(rule)) { + if (value && typeof value === "object") { + validateVmSpecificSemantics(value, context, vmProfiles); + } + } +} + +// --------------------------------------------------------------------------- +// MAIN VALIDATION +// --------------------------------------------------------------------------- + +console.log("═══════════════════════════════════════"); +console.log(" Sysadmin Chronicles Content Validator"); +console.log("═══════════════════════════════════════\n"); + +// Load all content +console.log("Loading content..."); +const quests = loadDir(QUESTS_DIR); +const tickets = loadDir(TICKETS_DIR); +const incidents = loadDir(INCIDENTS_DIR); +const dialogue = loadDir(DIALOGUE_DIR); +const vmProfiles= loadDir(VM_PROFILES_DIR); +const pressureProfiles = loadDir(PRESSURE_PROFILES_DIR); +const flagsRaw = loadJson(FLAGS_FILE); +// flags field may be an Array (list of {id, ...}) or a Dict (id -> {...}) +// Normalize to Dict keyed by id for validation lookups. +let flagRegistry = {}; +if (flagsRaw) { + const raw = flagsRaw.flags || flagsRaw; + if (Array.isArray(raw)) { + for (const f of raw) { if (f.id) flagRegistry[f.id] = f; } + } else if (typeof raw === "object") { + flagRegistry = raw; + } +} + +console.log(` Quests: ${Object.keys(quests).length}, Tickets: ${Object.keys(tickets).length}, Incidents: ${Object.keys(incidents).length}, Dialogue: ${Object.keys(dialogue).length}, Pressure Profiles: ${Object.keys(pressureProfiles).length}\n`); + +// --------------------------------------------------------------------------- +// QUEST VALIDATION +// --------------------------------------------------------------------------- + +console.log("Validating quests..."); +for (const [qid, { data: quest, fname }] of Object.entries(quests)) { + const ctx = `Quest ${qid} (${fname})`; + + // required_vms + for (const vmId of (quest.required_vms || [])) { + if (!vmProfiles[vmId]) err(`${ctx}: required_vms references unknown VM profile '${vmId}'`); + } + + // baseline_snapshot + if (!quest.baseline_snapshot || typeof quest.baseline_snapshot !== 'string' || !quest.baseline_snapshot.trim()) { + err(`${ctx}: missing or empty 'baseline_snapshot' field`); + } + + // ticket_id + if (quest.ticket_id && !tickets[quest.ticket_id]) { + err(`${ctx}: ticket_id '${quest.ticket_id}' not found in tickets/`); + } + + // pressure_profile + if (quest.pressure_profile && !pressureProfiles[quest.pressure_profile]) { + err(`${ctx}: pressure_profile '${quest.pressure_profile}' not found in pressure_profiles/`); + } + + // blast_radius + for (const incId of (quest.blast_radius || [])) { + if (!incidents[incId]) warn(`${ctx}: blast_radius references '${incId}' which doesn't exist yet`); + } + + // Branch priority uniqueness + const branches = quest.solution_branches || []; + const priorities = new Set(); + for (const branch of branches) { + const p = branch.priority; + if (priorities.has(p)) { + err(`${ctx}: duplicate branch priority ${p} — each branch must have a unique priority`); + } + priorities.add(p); + + // follow_up_incident + if (branch.follow_up_incident && !incidents[branch.follow_up_incident]) { + err(`${ctx}: branch '${branch.id}' follow_up_incident '${branch.follow_up_incident}' not found`); + } + + // Validate branch validation rules + validateRuleTypes(branch.validation, `${ctx} branch '${branch.id}'`); + } + + // clue_fingerprint evidence rule types + const evidence = (quest.clue_fingerprint || {}).evidence || []; + for (const ev of evidence) { + if (!VALID_RULE_TYPES.has(ev.type)) { + err(`${ctx}: clue_fingerprint evidence uses unknown rule type '${ev.type}'`); + } + validateVmSpecificSemantics(ev, `${ctx} clue_fingerprint`, vmProfiles); + } + + // Objective rule types + for (const obj of (quest.objectives || [])) { + validateRuleTypes(obj.validation, `${ctx} objective '${obj.id}'`); + validateVmSpecificSemantics(obj.validation, `${ctx} objective '${obj.id}'`, vmProfiles); + } + + for (const branch of branches) { + validateVmSpecificSemantics(branch.validation, `${ctx} branch '${branch.id}'`, vmProfiles); + } + + // narrative_phase + if (!quest.narrative_phase) { + warn(`${ctx}: missing 'narrative_phase' field`); + } else if (!VALID_NARRATIVE_PHASES.has(quest.narrative_phase)) { + err(`${ctx}: unknown narrative_phase '${quest.narrative_phase}'`); + } + + // behavior_impact + if (quest.behavior_impact !== undefined) { + for (const [branchKey, impact] of Object.entries(quest.behavior_impact)) { + for (const field of ['curiosity_delta', 'obedience_delta', 'risk_delta', 'suspicion_delta']) { + if (impact[field] !== undefined && typeof impact[field] !== 'number') { + err(`${ctx}: behavior_impact[${branchKey}].${field} must be a number`); + } + } + } + } + + // hidden_hook shape + if (quest.hidden_hook !== undefined && quest.hidden_hook !== null) { + if (typeof quest.hidden_hook.id !== 'string') { + err(`${ctx}: hidden_hook.id must be a string`); + } + } + + // access_requirements + if (quest.access_requirements?.minimum_access) { + for (const [vmId] of Object.entries(quest.access_requirements.minimum_access)) { + if (!vmProfiles[vmId]) { + err(`${ctx}: access_requirements.minimum_access references unknown VM '${vmId}'`); + } + } + } + + ok(`${ctx}: OK`); +} + +if (!questsOnly) { + // --------------------------------------------------------------------------- + // TICKET VALIDATION + // --------------------------------------------------------------------------- + + console.log("\nValidating tickets..."); + for (const [tid, { data: ticket, fname }] of Object.entries(tickets)) { + const ctx = `Ticket ${tid} (${fname})`; + + if (ticket.linked_quest && !quests[ticket.linked_quest]) { + warn(`${ctx}: linked_quest '${ticket.linked_quest}' not found`); + } + + if (ticket.initial_priority === undefined) { + err(`${ctx}: missing 'initial_priority'`); + } + if (ticket.current_priority === undefined) { + err(`${ctx}: missing 'current_priority'`); + } + if (ticket.initial_priority !== ticket.current_priority) { + warn(`${ctx}: initial_priority != current_priority at authoring time`); + } + ok(`${ctx}: OK`); + } + + // --------------------------------------------------------------------------- + // INCIDENT VALIDATION + // --------------------------------------------------------------------------- + + console.log("\nValidating incidents..."); + for (const [iid, { data: incident, fname }] of Object.entries(incidents)) { + const ctx = `Incident ${iid} (${fname})`; + + if (!Array.isArray(incident.blast_radius_quests)) { + err(`${ctx}: missing 'blast_radius_quests' (must be present, can be [])`); + } + if (!Array.isArray(incident.blast_radius_incidents)) { + err(`${ctx}: missing 'blast_radius_incidents' (must be present, can be [])`); + } + if (incident.resolution_requirements?.validation) { + validateRuleTypes(incident.resolution_requirements.validation, `${ctx} resolution_requirements`); + validateVmSpecificSemantics(incident.resolution_requirements.validation, `${ctx} resolution_requirements`, vmProfiles); + } + ok(`${ctx}: OK`); + } + + // --------------------------------------------------------------------------- + // DIALOGUE VALIDATION + // --------------------------------------------------------------------------- + + console.log("\nValidating dialogue..."); + const seriesMembers = {}; + for (const [did, { data: dlg, fname }] of Object.entries(dialogue)) { + const ctx = `Dialogue ${did} (${fname})`; + + if (dlg.series_id) { + if (dlg.series_position === undefined) { + err(`${ctx}: has series_id '${dlg.series_id}' but missing series_position`); + } + if (!seriesMembers[dlg.series_id]) seriesMembers[dlg.series_id] = []; + seriesMembers[dlg.series_id].push(did); + } + + // world_flag trigger + if (dlg.trigger && dlg.trigger.startsWith("world_flag:")) { + const flagId = dlg.trigger.slice("world_flag:".length); + if (!flagRegistry[flagId]) { + err(`${ctx}: trigger flag '${flagId}' not in world_flags registry`); + } + } + ok(`${ctx}: OK`); + } + + // series membership count check + for (const [sid, members] of Object.entries(seriesMembers)) { + if (members.length < 2) { + warn(`Series '${sid}' has only 1 member (${members[0]}) — needs 2+ for series tracking`); + } + } + + // --------------------------------------------------------------------------- + // WORLD FLAG CROSS-REFERENCE + // --------------------------------------------------------------------------- + + console.log("\nValidating world flag references..."); + const allContent = [ + ...Object.values(quests).map(v => v.data), + ...Object.values(tickets).map(v => v.data), + ...Object.values(incidents).map(v => v.data), + ...Object.values(dialogue).map(v => v.data), + ]; + + const referencedFlags = new Set(); + for (const item of allContent) { + collectFlagRefs(item, referencedFlags); + } + + for (const flagId of referencedFlags) { + if (!flagRegistry[flagId]) { + err(`World flag '${flagId}' is referenced in content but not declared in world_flags.json`); + } + } +} + +// --------------------------------------------------------------------------- +// SUMMARY +// --------------------------------------------------------------------------- + +console.log("\n═══════════════════════════════════════"); +if (errors === 0 && warnings === 0) { + console.log(" ✅ All content valid. Zero errors, zero warnings."); +} else { + if (errors > 0) console.log(` ❌ ${errors} error(s) found.`); + if (warnings > 0) console.log(` ⚠ ${warnings} warning(s) found.`); +} +console.log("═══════════════════════════════════════\n"); +process.exit(errors > 0 ? 1 : 0); diff --git a/tools/content/verify-clue-fingerprints.js b/tools/content/verify-clue-fingerprints.js new file mode 100644 index 0000000..42a27d7 --- /dev/null +++ b/tools/content/verify-clue-fingerprints.js @@ -0,0 +1,497 @@ +#!/usr/bin/env node +'use strict'; +/** + * verify-clue-fingerprints.js + * Verifies that clue_fingerprint evidence in quest files is detectable + * in the actual VM state. + * + * Usage (run from project root): + * node tools/content/verify-clue-fingerprints.js + * node tools/content/verify-clue-fingerprints.js --quest Q002 + * node tools/content/verify-clue-fingerprints.js --revert + * node tools/content/verify-clue-fingerprints.js --start-vms + * node tools/content/verify-clue-fingerprints.js --dry-run + * + * Flags: + * --quest Check only the specified quest (e.g. Q002) + * --revert Revert each VM to its quest baseline snapshot before checking. + * Leaves VMs running at baseline afterwards (ready for play). + * --start-vms Start VMs that are not running (without reverting snapshot). + * Use after a fresh boot if VMs are just stopped, not reverted. + * --dry-run Print the SSH commands that would run without executing them. + * --help Show this help text. + * + * Exit codes: + * 0 All evidence confirmed (or dry-run) + * 1 One or more failures, or no checks ran + * + * Prerequisites: + * - virsh available and pointing at qemu:///system + * - ~/.ssh/sc_host_key present + * - VMs running (or --revert / --start-vms provided) + */ + +const fs = require('fs'); +const path = require('path'); +const { spawnSync } = require('child_process'); +const os = require('os'); + +// --------------------------------------------------------------------------- +// CONFIG +// --------------------------------------------------------------------------- + +const PROJECT_ROOT = path.resolve(__dirname, '../..'); +const CONTENT_ROOT = path.join(PROJECT_ROOT, 'content'); +const QUESTS_DIR = path.join(CONTENT_ROOT, 'quests'); +const VM_PROFILES_DIR = path.join(CONTENT_ROOT, 'vm_profiles'); +const SSH_KEY = path.join(os.homedir(), '.ssh/sc_host_key'); +const LIBVIRT_URI = 'qemu:///system'; +const VM_PREFIX = 'sc-'; +const SSH_WAIT_SECS = 90; // max seconds to wait for SSH after revert/start +const SSH_POLL_SECS = 5; + +const SSH_OPTS = [ + '-o', 'StrictHostKeyChecking=no', + '-o', 'UserKnownHostsFile=/dev/null', + '-o', 'BatchMode=yes', + '-o', 'ConnectTimeout=8', + '-o', 'LogLevel=ERROR', + '-i', SSH_KEY, +]; + +// --------------------------------------------------------------------------- +// ARGS +// --------------------------------------------------------------------------- + +const rawArgs = process.argv.slice(2); +const dryRun = rawArgs.includes('--dry-run'); +const doRevert = rawArgs.includes('--revert'); +const startVms = rawArgs.includes('--start-vms'); +const showHelp = rawArgs.includes('--help') || rawArgs.includes('-h'); + +const questIdx = rawArgs.indexOf('--quest'); +const questFilter = questIdx >= 0 ? rawArgs[questIdx + 1] : null; + +if (showHelp) { + const usage = fs.readFileSync(__filename, 'utf8').split('\n') + .slice(1, 20).map(l => l.replace(/^ \* ?/, '')).join('\n'); + console.log(usage); + process.exit(0); +} + +// --------------------------------------------------------------------------- +// VIRSH HELPERS +// --------------------------------------------------------------------------- + +function virsh(...args) { + const r = spawnSync('virsh', ['--connect', LIBVIRT_URI, ...args], { + encoding: 'utf8', timeout: 20000, + }); + return { + ok: r.status === 0, + stdout: (r.stdout || '').trim(), + stderr: (r.stderr || '').trim(), + }; +} + +function domainName(vmId) { + return VM_PREFIX + vmId.replace(/_/g, '-'); +} + +function vmIsRunning(vmId) { + const r = virsh('domstate', domainName(vmId)); + return r.ok && r.stdout === 'running'; +} + +function getVmIp(vmId) { + const domain = domainName(vmId); + // Primary: guest agent + const r = virsh('domifaddr', domain, '--source', 'agent'); + if (r.ok && r.stdout) { + const m = r.stdout.match(/(\d+\.\d+\.\d+\.\d+)/); + if (m) return m[1]; + } + // Fallback: DHCP leases table + const leases = virsh('net-dhcp-leases', 'sc-internal'); + if (leases.ok && leases.stdout) { + const hostname = domain.replace(VM_PREFIX, ''); + for (const line of leases.stdout.split('\n')) { + if (line.includes(hostname)) { + const m = line.match(/(\d+\.\d+\.\d+\.\d+)/); + if (m) return m[1]; + } + } + } + return null; +} + +function sleep(secs) { + spawnSync('sleep', [String(secs)]); +} + +function waitForSsh(vmId, user) { + process.stdout.write(` Waiting for SSH on ${domainName(vmId)}`); + const deadline = Date.now() + SSH_WAIT_SECS * 1000; + while (Date.now() < deadline) { + const ip = getVmIp(vmId); + if (ip) { + const r = spawnSync('ssh', [...SSH_OPTS, '-o', 'ConnectTimeout=5', `${user}@${ip}`, 'true'], { + encoding: 'utf8', timeout: 8000, + }); + if (r.status === 0) { + console.log(` ready (${ip})`); + return ip; + } + } + process.stdout.write('.'); + sleep(SSH_POLL_SECS); + } + console.log(' TIMEOUT'); + return null; +} + +function revertVmToBaseline(vmId, snapshot) { + const domain = domainName(vmId); + console.log(` Reverting ${domain} → ${snapshot}...`); + // Destroy (force-stop) if running — state will be discarded + if (vmIsRunning(vmId)) { + virsh('destroy', domain); + sleep(1); + } + const r = virsh('snapshot-revert', domain, snapshot, '--running'); + if (!r.ok) { + console.error(` ERROR reverting ${domain}: ${r.stderr}`); + return false; + } + return true; +} + +// --------------------------------------------------------------------------- +// CONTENT LOADING +// --------------------------------------------------------------------------- + +function loadVmProfiles() { + const profiles = {}; + for (const f of fs.readdirSync(VM_PROFILES_DIR)) { + if (!f.endsWith('.json')) continue; + const d = JSON.parse(fs.readFileSync(path.join(VM_PROFILES_DIR, f), 'utf8')); + profiles[d.id] = d; + } + return profiles; +} + +function loadQuests() { + return fs.readdirSync(QUESTS_DIR) + .filter(f => f.endsWith('.json') && !f.includes('SPLIT')) + .map(f => { + try { return JSON.parse(fs.readFileSync(path.join(QUESTS_DIR, f), 'utf8')); } + catch { return null; } + }) + .filter(Boolean) + .sort((a, b) => (a.id < b.id ? -1 : 1)); +} + +// --------------------------------------------------------------------------- +// EVIDENCE CHECK COMMAND BUILDER +// --------------------------------------------------------------------------- + +function sq(s) { + // Shell-quote a value for embedding in a bash command string + return "'" + String(s).replace(/'/g, "'\"'\"'") + "'"; +} + +function buildCmd(ev) { + // Returns a bash one-liner that prints "PASS" or "FAIL [detail]" + // All commands run via sudo to handle restricted paths. + switch (ev.type) { + + case 'file_absent': + return `sudo test ! -e ${sq(ev.path)} && echo PASS || echo 'FAIL (file exists)'`; + + case 'file_exists': + return `sudo test -e ${sq(ev.path)} && echo PASS || echo 'FAIL (file missing)'`; + + case 'file_contains': + case 'log_contains': { + const pat = sq(ev.contains); + return `sudo grep -qF ${pat} ${sq(ev.path)} 2>/dev/null && echo PASS || echo 'FAIL (pattern not found)'`; + } + + case 'file_owner_is_not': { + const notExpected = ev.expected_user || ev.expected_owner || ''; + return ( + `owner=$(sudo stat -c '%U' ${sq(ev.path)} 2>/dev/null || echo '?'); ` + + `[ "\${owner}" != ${sq(notExpected)} ] && echo PASS || ` + + `echo "FAIL (owner=\${owner}, expected NOT ${notExpected})"` + ); + } + + case 'file_size_above': { + const threshold = ev.threshold_bytes; + return ( + `bytes=$(sudo stat -c '%s' ${sq(ev.path)} 2>/dev/null || echo 0); ` + + `[ "\${bytes}" -gt ${threshold} ] && echo PASS || ` + + `echo "FAIL (size=\${bytes} bytes, threshold=${threshold})"` + ); + } + + case 'disk_usage_above': { + const threshold = ev.threshold_percent; + return ( + `pct=$(sudo df --output=pcent ${sq(ev.path)} 2>/dev/null | tail -1 | tr -d ' %'); ` + + `[ "\${pct}" -gt ${threshold} ] && echo PASS || ` + + `echo "FAIL (usage=\${pct}%, threshold=${threshold}%)"` + ); + } + + case 'service_state_is': { + const expected = ev.state; + return ( + `state=$(sudo systemctl is-active ${sq(ev.service)} 2>/dev/null || echo inactive); ` + + `[ "\${state}" = ${sq(expected)} ] && echo PASS || ` + + `echo "FAIL (state=\${state}, expected=${expected})"` + ); + } + + case 'service_enabled_is': { + const wantEnabled = ev.enabled === true || ev.enabled === 'true'; + return ( + `estate=$(sudo systemctl is-enabled ${sq(ev.service)} 2>/dev/null || echo disabled); ` + + (wantEnabled + ? `[ "\${estate}" = "enabled" ] && echo PASS || echo "FAIL (is-enabled=\${estate}, expected=enabled)"` + : `[ "\${estate}" != "enabled" ] && echo PASS || echo "FAIL (is-enabled=\${estate}, expected=disabled)"`) + ); + } + + default: + return null; + } +} + +function describeEvidence(ev) { + switch (ev.type) { + case 'file_absent': return `file absent: ${ev.path}`; + case 'file_exists': return `file exists: ${ev.path}`; + case 'file_contains': + case 'log_contains': return `${ev.path} ∋ ${JSON.stringify(ev.contains)}`; + case 'file_owner_is_not': return `${ev.path} owner ≠ ${ev.expected_user || ev.expected_owner}`; + case 'file_size_above': return `${ev.path} > ${(ev.threshold_bytes / 1e9).toFixed(1)} GB`; + case 'disk_usage_above': return `${ev.path} disk > ${ev.threshold_percent}%`; + case 'service_state_is': return `${ev.service} state = ${ev.state}`; + case 'service_enabled_is': return `${ev.service} enabled = ${ev.enabled}`; + default: return `${ev.type}: ${JSON.stringify(ev)}`; + } +} + +// --------------------------------------------------------------------------- +// MAIN +// --------------------------------------------------------------------------- + +function main() { + console.log('\n Sysadmin Chronicles — Clue Fingerprint Verifier'); + console.log('═══════════════════════════════════════\n'); + + if (dryRun) console.log(' Mode: DRY-RUN (no SSH commands executed)\n'); + if (doRevert) console.log(' Mode: REVERT — VMs will be reverted to baseline snapshots\n'); + + const vmProfiles = loadVmProfiles(); + let quests = loadQuests(); + + if (questFilter) { + quests = quests.filter(q => q.id === questFilter); + if (quests.length === 0) { + console.error(`No quest found with id: ${questFilter}`); + process.exit(1); + } + } + + // Collect VMs needed (evidence items might reference multiple VMs, e.g. Q008) + const vmsNeeded = new Set(); + for (const q of quests) { + for (const ev of (q.clue_fingerprint?.evidence || [])) { + if (ev.vm) vmsNeeded.add(ev.vm); + } + } + + // Resolve IPs for non-revert mode upfront (revert mode resolves per-quest) + const vmIps = {}; + if (!doRevert) { + for (const vmId of vmsNeeded) { + const domain = domainName(vmId); + const profile = vmProfiles[vmId]; + const user = profile?.management_user || profile?.ssh_user || 'player'; + + if (!vmIsRunning(vmId)) { + if (startVms) { + console.log(`Starting ${domain}...`); + virsh('start', domain); + vmIps[vmId] = waitForSsh(vmId, user); + } else { + console.warn(` ⚠ ${domain} is not running. Use --start-vms or --revert.`); + vmIps[vmId] = null; + } + } else { + process.stdout.write(` Resolving IP for ${domain}...`); + let ip = null; + const deadline = Date.now() + 30000; + while (!ip && Date.now() < deadline) { + ip = getVmIp(vmId); + if (!ip) { process.stdout.write('.'); sleep(3); } + } + console.log(ip ? ` ${ip}` : ' (could not resolve)'); + vmIps[vmId] = ip; + } + } + } + + // --------------------------------------------------------------------------- + // Per-quest checks + // --------------------------------------------------------------------------- + + let totalPass = 0, totalFail = 0, totalSkip = 0; + const failures = []; + + for (const quest of quests) { + const evidence = quest.clue_fingerprint?.evidence || []; + if (evidence.length === 0) { + console.log(`\n[${quest.id}] ${quest.title} — no evidence entries, skipping`); + continue; + } + + const baseline = quest.baseline_snapshot || '(unknown)'; + const primaryVm = quest.primary_vm || ''; + console.log(`\n[${quest.id}] ${quest.title}`); + console.log(` Baseline: ${primaryVm} @ ${baseline}`); + + // Revert primary VM if --revert + if (doRevert && primaryVm && !dryRun) { + const revOk = revertVmToBaseline(primaryVm, baseline); + if (!revOk) { + console.log(` ✗ Cannot revert — skipping ${evidence.length} checks`); + totalSkip += evidence.length; + continue; + } + const profile = vmProfiles[primaryVm]; + const user = profile?.management_user || profile?.ssh_user || 'player'; + const ip = waitForSsh(primaryVm, user); + vmIps[primaryVm] = ip; + if (!ip) { + console.log(` ✗ SSH timeout after revert — skipping ${evidence.length} checks`); + totalSkip += evidence.length; + continue; + } + } + + // Also handle multi-VM quests (Q008 references both web_server and build_machine) + const questVms = new Set(evidence.map(ev => ev.vm).filter(Boolean)); + if (doRevert) { + for (const extraVm of questVms) { + if (extraVm === primaryVm) continue; + if (!vmIps[extraVm]) { + // Extra VM: just ensure it's running (don't revert — only primary gets reverted) + if (!vmIsRunning(extraVm)) { + virsh('start', domainName(extraVm)); + } + const profile = vmProfiles[extraVm]; + const user = profile?.management_user || profile?.ssh_user || 'player'; + vmIps[extraVm] = waitForSsh(extraVm, user); + } + } + } + + // Run each evidence check + for (const ev of evidence) { + const vmId = ev.vm || primaryVm; + const ip = vmIps[vmId]; + const profile = vmProfiles[vmId]; + const user = profile?.management_user || profile?.ssh_user || 'player'; + const desc = describeEvidence(ev); + + if (!ip && !dryRun) { + console.log(` ⚠ SKIP ${desc}`); + console.log(` → VM ${vmId} not reachable`); + totalSkip++; + continue; + } + + const cmd = buildCmd(ev); + if (!cmd) { + console.log(` ⚠ SKIP ${desc}`); + console.log(` → evidence type '${ev.type}' not supported by this tool`); + totalSkip++; + continue; + } + + let passed; + let detail = ''; + + if (dryRun) { + console.log(` ○ DRY ${desc}`); + console.log(` → ssh ${user}@<${vmId}-ip> '${cmd.slice(0, 80)}${cmd.length > 80 ? '...' : ''}'`); + totalPass++; + continue; + } + + const r = spawnSync('ssh', [...SSH_OPTS, `${user}@${ip}`, cmd], { + encoding: 'utf8', timeout: 25000, + }); + const stdout = (r.stdout || '').trim(); + const stderr = (r.stderr || '').trim(); + + if (r.error || r.status === null) { + passed = false; + detail = r.error ? r.error.message : 'SSH process failed'; + } else { + passed = stdout.startsWith('PASS'); + detail = stdout.startsWith('FAIL') ? stdout.replace(/^FAIL\s*/, '') : (stderr || `exit ${r.status}`); + } + + if (passed) { + console.log(` ✓ PASS ${desc}`); + totalPass++; + } else { + console.log(` ✗ FAIL ${desc}`); + if (detail) console.log(` → ${detail}`); + totalFail++; + failures.push({ quest: quest.id, desc, detail }); + } + } + } + + // --------------------------------------------------------------------------- + // Summary + // --------------------------------------------------------------------------- + + console.log('\n═══════════════════════════════════════'); + console.log(` Results: ${totalPass} passed ${totalFail} failed ${totalSkip} skipped`); + + if (totalFail > 0) { + console.log('\n Failed checks:'); + for (const f of failures) { + console.log(` ✗ [${f.quest}] ${f.desc}`); + if (f.detail) console.log(` → ${f.detail}`); + } + console.log('\n ✗ Clue fingerprint verification FAILED'); + console.log('═══════════════════════════════════════\n'); + process.exit(1); + } + + if (!dryRun && totalPass === 0 && totalSkip > 0) { + console.log('\n ⚠ No checks ran. Are the VMs running?'); + console.log(' Tips:'); + console.log(' --start-vms start VMs then check on current state'); + console.log(' --revert revert to baseline snapshot then check'); + console.log('═══════════════════════════════════════\n'); + process.exit(1); + } + + if (dryRun) { + console.log('\n Dry-run complete. No checks were executed.'); + } else { + console.log('\n ✓ All evidence confirmed in VM state'); + } + console.log('═══════════════════════════════════════\n'); + process.exit(0); +} + +main(); diff --git a/tools/dev/test-content.sh b/tools/dev/test-content.sh new file mode 100644 index 0000000..a56f0d5 --- /dev/null +++ b/tools/dev/test-content.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# test-content.sh — Run content validation from CLI +# +# Usage: +# bash tools/dev/test-content.sh +# bash tools/dev/test-content.sh --verbose +# bash tools/dev/test-content.sh --quests-only + +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +if ! command -v node &>/dev/null; then + echo "ERROR: node not found. Install Node.js to run content validation." + exit 1 +fi + +exec node "$PROJECT_ROOT/tools/content/validate-content.js" "$@" diff --git a/tools/lib/config.sh b/tools/lib/config.sh new file mode 100755 index 0000000..7a598c7 --- /dev/null +++ b/tools/lib/config.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Install config management for Sysadmin Chronicles. +# Config lives at ~/.config/sysadmin-chronicles/config (survives game dir moves). +# Source this file; do not execute directly. + +SC_CONFIG_DIR="${SC_CONFIG_DIR:-$HOME/.config/sysadmin-chronicles}" +SC_CONFIG_FILE="$SC_CONFIG_DIR/config" + +config_read() { + [ -f "$SC_CONFIG_FILE" ] && source "$SC_CONFIG_FILE" || true +} + +config_write() { + local key="$1" + local value="$2" + mkdir -p "$SC_CONFIG_DIR" + local tmp + tmp="$(mktemp "$SC_CONFIG_DIR/config.XXXXXX")" + if [ -f "$SC_CONFIG_FILE" ]; then + awk -v key="$key" -v value="$value" ' + BEGIN { found = 0 } + index($0, key "=") == 1 { + print key "=" value + found = 1 + next + } + { print } + END { + if (!found) { + print key "=" value + } + } + ' "$SC_CONFIG_FILE" > "$tmp" + else + printf '%s=%s\n' "$key" "$value" > "$tmp" + fi + mv "$tmp" "$SC_CONFIG_FILE" +} + +config_show() { + if [ ! -f "$SC_CONFIG_FILE" ]; then + echo " (no config file at $SC_CONFIG_FILE)" + return + fi + echo " Config: $SC_CONFIG_FILE" + local line key value + while IFS= read -r line; do + [[ "$line" =~ ^# ]] && continue + [[ -z "$line" ]] && continue + key="${line%%=*}" + value="${line#*=}" + printf ' %-28s %s\n' "$key" "$value" + done < "$SC_CONFIG_FILE" +} diff --git a/tools/lib/deps.sh b/tools/lib/deps.sh new file mode 100755 index 0000000..ee8f6f7 --- /dev/null +++ b/tools/lib/deps.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# Dependency detection and installation for Sysadmin Chronicles. +# Source this file; do not execute directly. + +SC_DISTRO="" + +detect_distro() { + if [ -f /etc/arch-release ]; then + SC_DISTRO=arch + elif grep -qi ubuntu /etc/os-release 2>/dev/null; then + SC_DISTRO=ubuntu + elif [ -f /etc/debian_version ]; then + SC_DISTRO=debian + elif [ -f /etc/fedora-release ]; then + SC_DISTRO=fedora + elif grep -qi opensuse /etc/os-release 2>/dev/null; then + SC_DISTRO=opensuse + else + SC_DISTRO=unknown + fi +} + +# Per-distro package names for canonical dep names. +# Format: canonical:arch:debian:ubuntu:fedora:opensuse +# Empty field = not applicable / same as above / handled specially. +_SC_DEP_MAP=( + "libvirt:libvirt:libvirt-daemon-system:libvirt-daemon-system:libvirt:libvirt" + "qemu-system:qemu-system-x86:qemu-system-x86:qemu-kvm:qemu-kvm:qemu-kvm" + "qemu-img:qemu-img:qemu-utils:qemu-utils:qemu-img:qemu-tools" + "virt-install:virt-install:virtinst:virtinst:virt-install:virt-install" + "virt-viewer:virt-viewer:virt-viewer:virt-viewer:virt-viewer:virt-viewer" + "cloud-localds:cloud-image-utils:cloud-image-utils:cloud-image-utils:cloud-utils:cloud-utils" + "genisoimage:cdrtools:genisoimage:genisoimage:genisoimage:genisoimage" + "xorriso:libisoburn:xorriso:xorriso:xorriso:xorriso" + "nodejs:nodejs:nodejs:nodejs:nodejs:nodejs" + "openssh:openssh:openssh-client:openssh-client:openssh-clients:openssh-clients" +) + +# Arch-only QEMU SPICE/display extras installed alongside qemu-system +_SC_ARCH_QEMU_EXTRAS=( + qemu-hw-display-qxl + qemu-hw-display-virtio-gpu + qemu-ui-spice-core + qemu-chardev-spice + qemu-audio-spice +) + +map_package() { + local dep="$1" + local distro="${2:-$SC_DISTRO}" + local entry + for entry in "${_SC_DEP_MAP[@]}"; do + IFS=':' read -r name arch debian ubuntu fedora opensuse <<< "$entry" + if [ "$name" = "$dep" ]; then + case "$distro" in + arch) printf '%s' "$arch" ;; + debian) printf '%s' "$debian" ;; + ubuntu) printf '%s' "$ubuntu" ;; + fedora) printf '%s' "$fedora" ;; + opensuse) printf '%s' "$opensuse" ;; + esac + return + fi + done +} + +# Outputs canonical dep names that are not yet installed (one per line) +check_deps() { + local missing=() + command -v virsh >/dev/null 2>&1 || missing+=(libvirt) + command -v qemu-system-x86_64 >/dev/null 2>&1 || missing+=(qemu-system) + command -v qemu-img >/dev/null 2>&1 || missing+=(qemu-img) + command -v virt-install >/dev/null 2>&1 || missing+=(virt-install) + command -v remote-viewer >/dev/null 2>&1 || missing+=(virt-viewer) + command -v node >/dev/null 2>&1 || missing+=(nodejs) + command -v ssh >/dev/null 2>&1 || missing+=(openssh) + # Need at least one cloud-init ISO tool + if ! command -v cloud-localds >/dev/null 2>&1 \ + && ! command -v genisoimage >/dev/null 2>&1 \ + && ! command -v mkisofs >/dev/null 2>&1 \ + && ! command -v xorriso >/dev/null 2>&1; then + missing+=(cloud-localds genisoimage xorriso) + fi + [ "${#missing[@]}" -gt 0 ] && printf '%s\n' "${missing[@]}" || true +} + +# Install a list of canonical dep names. Logs to SC_INSTALL_LOG if set. +install_deps() { + local deps=("$@") + local pkgs=() + + for dep in "${deps[@]}"; do + local pkg + pkg="$(map_package "$dep")" + [ -n "$pkg" ] && pkgs+=("$pkg") + done + + if [ "$SC_DISTRO" = "arch" ]; then + pkgs+=("${_SC_ARCH_QEMU_EXTRAS[@]}") + fi + + [ "${#pkgs[@]}" -eq 0 ] && return 0 + + local -a pm_cmd + case "$SC_DISTRO" in + arch) pm_cmd=(pacman -S --noconfirm --needed) ;; + debian|ubuntu) pm_cmd=(apt-get install -y) ;; + fedora) pm_cmd=(dnf install -y) ;; + opensuse) pm_cmd=(zypper install -y) ;; + *) + echo " Unsupported distro '$SC_DISTRO' — install these manually:" + printf ' %s\n' "${pkgs[@]}" + return 1 + ;; + esac + + sudo "${pm_cmd[@]}" "${pkgs[@]}" + + if [ -n "${SC_INSTALL_LOG:-}" ] && [ "$SC_INSTALL_LOG" != "/dev/null" ]; then + mkdir -p "$(dirname "$SC_INSTALL_LOG")" + local distro_label="$SC_DISTRO" + for pkg in "${pkgs[@]}"; do + local ver="" + case "$SC_DISTRO" in + arch) ver="$(pacman -Q "$pkg" 2>/dev/null | awk '{print $2}' || true)" ;; + debian|ubuntu) ver="$(dpkg -l "$pkg" 2>/dev/null | awk '/^ii/{print $3}' | head -1 || true)" ;; + fedora) ver="$(rpm -q --queryformat '%{VERSION}' "$pkg" 2>/dev/null || true)" ;; + esac + printf '[INSTALLED] %-36s %-14s via %s\n' "$pkg" "${ver:-}" "$distro_label" \ + >> "$SC_INSTALL_LOG" + done + fi +} + +# Outputs already-installed canonical deps (for log completeness) +log_present_deps() { + local log_file="${SC_INSTALL_LOG:-}" + [ -z "$log_file" ] || [ "$log_file" = "/dev/null" ] && return + local distro_label="$SC_DISTRO" + for dep in libvirt qemu-system qemu-img virt-install virt-viewer nodejs openssh; do + local bin + case "$dep" in + libvirt) bin=virsh ;; + qemu-system) bin=qemu-system-x86_64 ;; + qemu-img) bin=qemu-img ;; + virt-install) bin=virt-install ;; + virt-viewer) bin=remote-viewer ;; + nodejs) bin=node ;; + openssh) bin=ssh ;; + esac + if command -v "$bin" >/dev/null 2>&1; then + printf '[SKIPPED] %-36s already installed\n' "$dep" >> "$log_file" + fi + done +} + +dep_label() { + case "$1" in + libvirt) echo "Virtual machine manager (libvirt)" ;; + qemu-system) echo "KVM virtualization support (QEMU)" ;; + qemu-img) echo "VM disk image tools (qemu-img)" ;; + virt-install) echo "VM installer (virt-install)" ;; + virt-viewer) echo "SPICE display viewer (virt-viewer)" ;; + cloud-localds|genisoimage|xorriso) echo "Cloud image tools" ;; + nodejs) echo "Node.js runtime" ;; + openssh) echo "SSH client" ;; + *) echo "$1" ;; + esac +} diff --git a/tools/lib/internal-https.sh b/tools/lib/internal-https.sh new file mode 100644 index 0000000..2464317 --- /dev/null +++ b/tools/lib/internal-https.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Shared internal HTTPS/URL helpers for Sysadmin Chronicles launch and VM build scripts. +# Source this file; do not execute directly. + +sc_internal_port() { + printf '%s\n' "${PORT:-3000}" +} + +sc_cert_dir() { + printf '%s\n' "${SC_CERT_DIR:-$HOME/.local/share/sysadmin-chronicles/certs}" +} + +sc_tls_cert() { + printf '%s/server.crt\n' "$(sc_cert_dir)" +} + +sc_tls_key() { + printf '%s/server.key\n' "$(sc_cert_dir)" +} + +sc_ca_cert() { + printf '%s/ca.crt\n' "$(sc_cert_dir)" +} + +sc_hud_url() { + printf '%s\n' "${SC_HUD_URL:-https://portal.axiomworks.internal:$(sc_internal_port)}" +} + +sc_sage_url() { + printf '%s\n' "${SC_SAGE_URL:-https://sage.axiomworks.internal:$(sc_internal_port)/sage/}" +} + +sc_company_url() { + printf '%s\n' "${SC_COMPANY_URL:-https://www.axiomworks.corp/}" +} + +sc_have_internal_certs() { + [[ -f "$(sc_tls_cert)" && -f "$(sc_tls_key)" && -f "$(sc_ca_cert)" ]] +} + +sc_ensure_internal_certs() { + local project_root="$1" + if sc_have_internal_certs; then + return 0 + fi + bash "$project_root/tools/setup/generate-certs.sh" +} + +sc_export_internal_https_env() { + export SC_CERT_DIR="$(sc_cert_dir)" + export SC_TLS_CERT="$(sc_tls_cert)" + export SC_TLS_KEY="$(sc_tls_key)" + export SC_HUD_URL="$(sc_hud_url)" + export SC_SAGE_URL="$(sc_sage_url)" + export SC_COMPANY_URL="$(sc_company_url)" +} + +sc_listen_pids() { + local port="$1" + if command -v lsof >/dev/null 2>&1; then + lsof -tiTCP:"$port" -sTCP:LISTEN 2>/dev/null | sort -u + return 0 + fi + ss -H -ltnp "sport = :$port" 2>/dev/null \ + | sed -n 's/.*pid=\([0-9][0-9]*\).*/\1/p' \ + | sort -u +} + +sc_pid_is_repo_server() { + local pid="$1" + local project_root="$2" + local server_dir="$project_root/server" + local cwd="" + local cmdline="" + + [[ -r "/proc/$pid/cmdline" ]] || return 1 + cwd="$(readlink -f "/proc/$pid/cwd" 2>/dev/null || true)" + cmdline="$(tr '\0' ' ' < "/proc/$pid/cmdline" 2>/dev/null || true)" + + [[ "$cwd" == "$server_dir" ]] || return 1 + [[ "$cmdline" == *"node"* && "$cmdline" == *"src/index.js"* ]] +} + +sc_pid_has_internal_tls() { + local pid="$1" + [[ -r "/proc/$pid/environ" ]] || return 1 + tr '\0' '\n' < "/proc/$pid/environ" 2>/dev/null \ + | grep -q '^SC_TLS_CERT=.*server\.crt$' \ + && tr '\0' '\n' < "/proc/$pid/environ" 2>/dev/null \ + | grep -q '^SC_TLS_KEY=.*server\.key$' +} + +sc_stop_pid() { + local pid="$1" + kill "$pid" 2>/dev/null || true + for _ in 1 2 3 4 5 6 7 8 9 10; do + kill -0 "$pid" 2>/dev/null || return 0 + sleep 0.2 + done + kill -TERM "$pid" 2>/dev/null || true +} diff --git a/tools/lib/libvirt.sh b/tools/lib/libvirt.sh new file mode 100755 index 0000000..7dfcad4 --- /dev/null +++ b/tools/lib/libvirt.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# libvirt wrappers for Sysadmin Chronicles. +# Source this file; do not execute directly. +# Expects LIBVIRT_DEFAULT_URI to be set by the caller. +# +# SC_VIRSH_SUDO=true — prefix all virsh calls with sudo. +# Set this in install.sh when the current session doesn't yet have +# the libvirt group active (e.g., right after usermod -aG libvirt). + +_virsh() { + if [ "${SC_VIRSH_SUDO:-false}" = true ]; then + sudo virsh "$@" + else + virsh "$@" + fi +} + +ensure_network() { + local name="$1" + local xml_path="$2" + if _virsh net-list --all 2>/dev/null | grep -q "\\b${name}\\b"; then + if ! _virsh net-info "$name" 2>/dev/null | grep -q "Active:.*yes"; then + _virsh net-start "$name" >/dev/null 2>&1 + fi + return 0 + fi + _virsh net-define "$xml_path" >/dev/null 2>&1 + _virsh net-autostart "$name" >/dev/null 2>&1 + _virsh net-start "$name" >/dev/null 2>&1 +} + +ensure_pool() { + local name="$1" + local path="$2" + if _virsh pool-list --all 2>/dev/null | grep -q "\\b${name}\\b"; then + if ! _virsh pool-info "$name" 2>/dev/null | grep -q "State:.*running"; then + _virsh pool-start "$name" >/dev/null 2>&1 + fi + return 0 + fi + mkdir -p "$path" + _virsh pool-define-as "$name" dir --target "$path" >/dev/null 2>&1 + _virsh pool-autostart "$name" >/dev/null 2>&1 + _virsh pool-start "$name" >/dev/null 2>&1 +} + +pool_path() { + local name="$1" + _virsh pool-dumpxml "$name" 2>/dev/null \ + | sed -n 's:.*\(.*\).*:\1:p' \ + | head -n1 +} + +domain_exists() { + _virsh dominfo "$1" >/dev/null 2>&1 +} + +domain_state() { + _virsh domstate "$1" 2>/dev/null | tr -d ' \n' +} + +network_active() { + _virsh net-info "$1" 2>/dev/null | grep -q "Active:.*yes" +} + +ensure_network_active() { + local name="$1" + _virsh net-list --all 2>/dev/null | grep -q "\\b${name}\\b" || return 1 + network_active "$name" || _virsh net-start "$name" >/dev/null 2>&1 +} + +snapshot_exists() { + _virsh snapshot-info "$1" "$2" >/dev/null 2>&1 +} + +snapshot_create() { + local domain="$1" + local name="$2" + local desc="${3:-}" + _virsh snapshot-delete "$domain" "$name" >/dev/null 2>&1 || true + _virsh snapshot-create-as "$domain" "$name" --description "$desc" --atomic +} + +snapshot_revert() { + _virsh snapshot-revert "$1" "$2" --running +} + +snapshot_delete() { + _virsh snapshot-delete "$1" "$2" +} + +snapshot_list_names() { + _virsh snapshot-list "$1" --name 2>/dev/null || true +} + +# Returns approximate qcow2 disk usage for a domain in human-readable form +domain_disk_usage() { + local domain="$1" + local total=0 + local disk + for disk in $(_virsh domblklist "$domain" --details 2>/dev/null | awk '/disk/ && $4 != "-" {print $4}' || true); do + [ -f "$disk" ] || continue + local sz + sz="$(du -sb "$disk" 2>/dev/null | awk '{print $1}' || echo 0)" + total=$(( total + sz )) + done + if [ "$total" -gt 0 ]; then + numfmt --to=iec-i --suffix=B "$total" 2>/dev/null || echo "${total}B" + else + echo "0B" + fi +} diff --git a/tools/lib/save.sh b/tools/lib/save.sh new file mode 100755 index 0000000..d133284 --- /dev/null +++ b/tools/lib/save.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Save slot management for Sysadmin Chronicles. +# Source this file; do not execute directly. + +SC_SAVE_DIR="${SC_SAVE_DIR:-$HOME/.local/share/sysadmin-chronicles/saves}" + +_save_path() { printf '%s/%s.json' "$SC_SAVE_DIR" "$1"; } + +_save_active_slot() { + local slot="" + if [ -f "$SC_SAVE_DIR/.active" ]; then + slot="$(cat "$SC_SAVE_DIR/.active")" + fi + printf '%s' "${slot:-autosave}" +} + +_save_valid_slot() { + case "$1" in + autosave|slot-1|slot-2|slot-3) return 0 ;; + *) echo " ✗ Invalid slot name: $1 (use autosave, slot-1, slot-2, or slot-3)"; return 1 ;; + esac +} + +_new_game_json() { + local slot="$1" + printf '{"slot":"%s","day":1,"trust":50,"questsCompleted":0,"quests":{},"flags":{},"inbox":[],"clock":{"shift":1,"day":1}}\n' \ + "$slot" +} + +save_list() { + mkdir -p "$SC_SAVE_DIR" + local active + active="$(_save_active_slot)" + printf ' %-14s %-10s %-10s %-10s\n' "Slot" "Day" "Trust" "Quests" + printf ' %-14s %-10s %-10s %-10s\n' "──────────────" "──────────" "──────────" "──────────" + local slot + for slot in autosave slot-1 slot-2 slot-3; do + local path + path="$(_save_path "$slot")" + if [ -f "$path" ]; then + local day trust quests marker="" + day="$( grep -o '"day":[0-9]*' "$path" 2>/dev/null | head -1 | cut -d: -f2 || echo '?')" + trust="$( grep -o '"trust":[0-9]*' "$path" 2>/dev/null | head -1 | cut -d: -f2 || echo '?')" + quests="$( grep -o '"questsCompleted":[0-9]*' "$path" 2>/dev/null | head -1 | cut -d: -f2 || echo '?')" + [ "$slot" = "$active" ] && marker=" [active]" + printf ' %-14s %-10s %-10s %-10s%s\n' "$slot" "Day $day" "T:$trust" "Q:$quests" "$marker" + else + printf ' %-14s %s\n' "$slot" "—empty—" + fi + done +} + +save_switch() { + local slot="$1" + _save_valid_slot "$slot" || return 1 + [ -f "$(_save_path "$slot")" ] || { echo " ✗ No save in slot: $slot"; return 1; } + printf '%s' "$slot" > "$SC_SAVE_DIR/.active" + echo " ✓ Switched to $slot" +} + +save_new() { + local slot="$1" + _save_valid_slot "$slot" || return 1 + mkdir -p "$SC_SAVE_DIR" + _new_game_json "$slot" > "$(_save_path "$slot")" + echo " ✓ Created new save: $slot" +} + +save_reset() { + local slot="${1:-$(_save_active_slot)}" + _save_valid_slot "$slot" || return 1 + mkdir -p "$SC_SAVE_DIR" + _new_game_json "$slot" > "$(_save_path "$slot")" + echo " ✓ Reset $slot to new game state" +} + +save_export() { + local slot="$1" + local dest="$2" + _save_valid_slot "$slot" || return 1 + [ -f "$(_save_path "$slot")" ] || { echo " ✗ No save in slot: $slot"; return 1; } + [ -n "$dest" ] || { echo " ✗ No destination path given"; return 1; } + cp "$(_save_path "$slot")" "$dest" + echo " ✓ Exported $slot → $dest" +} + +save_import() { + local src="$1" + local slot="$2" + _save_valid_slot "$slot" || return 1 + [ -f "$src" ] || { echo " ✗ File not found: $src"; return 1; } + # Basic JSON sanity check + grep -q '^{' "$src" 2>/dev/null || { echo " ✗ File does not look like a save: $src"; return 1; } + mkdir -p "$SC_SAVE_DIR" + cp "$src" "$(_save_path "$slot")" + echo " ✓ Imported $src → $slot" +} diff --git a/tools/lib/ui.sh b/tools/lib/ui.sh new file mode 100755 index 0000000..78c878c --- /dev/null +++ b/tools/lib/ui.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Shared UI helpers for Sysadmin Chronicles scripts. +# Source this file; do not execute directly. + +_SC_STEP_N=0 + +# Colors — disabled if stdout is not a terminal or NO_COLOR is set +if [ -t 1 ] && [ -z "${NO_COLOR:-}" ]; then + _C_RESET='\033[0m' + _C_BOLD='\033[1m' + _C_GREEN='\033[0;32m' + _C_YELLOW='\033[0;33m' + _C_RED='\033[0;31m' + _C_CYAN='\033[0;36m' + _C_DIM='\033[2m' +else + _C_RESET='' _C_BOLD='' _C_GREEN='' _C_YELLOW='' _C_RED='' _C_CYAN='' _C_DIM='' +fi + +sc_header() { + local title="${1:-Sysadmin Chronicles}" + echo "" + printf "${_C_CYAN}${_C_BOLD}" + echo "╔══════════════════════════════════════════╗" + printf "║ %-40s║\n" "$title" + echo "╚══════════════════════════════════════════╝" + printf "${_C_RESET}" + echo "" +} + +sc_done_banner() { + echo "" + printf "${_C_GREEN}${_C_BOLD}" + echo "╔══════════════════════════════════════════╗" + printf "║ %-40s║\n" "SETUP COMPLETE!" + echo "╚══════════════════════════════════════════╝" + printf "${_C_RESET}" + echo "" +} + +sc_section() { + echo "" + printf "${_C_BOLD}── %s ${_C_DIM}─────────────────────────────────${_C_RESET}\n" "$*" +} + +sc_step() { + (( _SC_STEP_N++ )) || true + echo "" + printf "${_C_BOLD}── Step %d: %s${_C_RESET}\n" "$_SC_STEP_N" "$*" +} + +sc_ok() { printf " ${_C_GREEN}✓${_C_RESET} %s\n" "$*"; } +sc_warn() { printf " ${_C_YELLOW}⚠${_C_RESET} %s\n" "$*"; } +sc_err() { printf " ${_C_RED}✗${_C_RESET} %s\n" "$*" >&2; } +sc_fail() { sc_err "$*"; exit 1; } +sc_info() { printf " ${_C_DIM}→${_C_RESET} %s\n" "$*"; } + +# Prompt — writes question to /dev/tty, returns answer on stdout +sc_prompt() { + local question="$1" + local default="${2:-}" + if [ -n "$default" ]; then + printf " %s [%s] > " "$question" "$default" >/dev/tty + else + printf " %s > " "$question" >/dev/tty + fi + local answer + read -r answer /dev/tty + local answer + read -r answer /dev/tty + ( + local frames=('⠋' '⠙' '⠹' '⠸' '⠼' '⠴' '⠦' '⠧' '⠇' '⠏') + local i=0 + while true; do + printf "\r %s %s " "$label" "${frames[$((i % ${#frames[@]}))]}" >/dev/tty + sleep 0.12 + (( i++ )) || true + done + ) & + _SC_SPINNER_PID=$! + disown "$_SC_SPINNER_PID" 2>/dev/null || true +} + +sc_spinner_stop() { + if [ -n "${_SC_SPINNER_PID:-}" ]; then + kill "$_SC_SPINNER_PID" 2>/dev/null || true + wait "$_SC_SPINNER_PID" 2>/dev/null || true + _SC_SPINNER_PID="" + printf "\r%-60s\r" "" >/dev/tty + fi +} diff --git a/tools/lib/vm.sh b/tools/lib/vm.sh new file mode 100755 index 0000000..c5b179b --- /dev/null +++ b/tools/lib/vm.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# VM operations for Sysadmin Chronicles. +# Source this file; requires lib/libvirt.sh and PROJECT_ROOT set. + +SC_VM_TOOLS="${SC_VM_TOOLS:-${PROJECT_ROOT:-}/tools/vm}" + +_sc_validate_snapshot_name() { + [[ "$1" =~ ^[a-zA-Z0-9][a-zA-Z0-9-]*$ ]] +} + +_sc_protected_snapshot() { + [[ "$1" == baseline.* ]] || [[ "$1" == checkpoint.* ]] +} + +vm_build() { + local profile="$1" + shift + local dry_run=false force=false + for arg in "$@"; do + case "$arg" in + --dry-run) dry_run=true ;; + --force) force=true ;; + esac + done + local script="$SC_VM_TOOLS/build-${profile}.sh" + [ -f "$script" ] || { echo " ✗ No build script for profile: $profile"; return 1; } + local args=() + [ "$dry_run" = true ] && args+=(--dry-run) + [ "$force" = true ] && args+=(--force) + bash "$script" "${args[@]}" +} + +vm_rebuild() { + local profile="$1" + shift + local dry_run=false + for arg in "$@"; do [ "$arg" = "--dry-run" ] && dry_run=true; done + + local domain="sc-${profile}" + if domain_exists "$domain" && [ "$dry_run" = false ]; then + virsh destroy "$domain" >/dev/null 2>&1 || true + virsh undefine "$domain" --nvram --snapshots-metadata >/dev/null 2>&1 \ + || virsh undefine "$domain" --snapshots-metadata >/dev/null 2>&1 \ + || virsh undefine "$domain" >/dev/null 2>&1 || true + fi + local extra_args=() + [ "$dry_run" = true ] && extra_args+=(--dry-run) + vm_build "$profile" "${extra_args[@]}" +} + +vm_revert() { + snapshot_revert "$1" "$2" +} + +vm_status() { + local vm_id="$1" + domain_exists "$vm_id" && domain_state "$vm_id" || printf 'missing' +} + +vm_start() { + virsh start "$1" >/dev/null 2>&1 +} + +vm_stop() { + virsh shutdown "$1" >/dev/null 2>&1 || virsh destroy "$1" >/dev/null 2>&1 || true +} + +vm_snapshot_create() { + local vm_id="$1" + local name="$2" + _sc_validate_snapshot_name "$name" \ + || { echo " ✗ Invalid name (letters, numbers, hyphens only): $name"; return 1; } + snapshot_create "$vm_id" "$name" "User snapshot — $(date '+%Y-%m-%d %H:%M')" +} + +vm_snapshot_list() { + local vm_id="$1" + virsh snapshot-list "$vm_id" 2>/dev/null || true +} + +vm_snapshot_revert() { + local vm_id="$1" + local name="$2" + snapshot_exists "$vm_id" "$name" \ + || { echo " ✗ Snapshot not found: $name"; return 1; } + snapshot_revert "$vm_id" "$name" +} + +vm_snapshot_delete() { + local vm_id="$1" + local name="$2" + if _sc_protected_snapshot "$name"; then + echo " ✗ Cannot delete protected snapshot: $name" + return 1 + fi + snapshot_delete "$vm_id" "$name" +} diff --git a/tools/save/manage-saves.sh b/tools/save/manage-saves.sh new file mode 100755 index 0000000..ce1252f --- /dev/null +++ b/tools/save/manage-saves.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +# Save slot and VM snapshot management for Sysadmin Chronicles. +# +# Usage: +# manage-saves.sh Interactive menu +# manage-saves.sh --list List save slots (non-interactive) +# manage-saves.sh --reset Reset current save +# manage-saves.sh --reset slot-1 Reset a specific slot + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +source "$PROJECT_ROOT/tools/lib/ui.sh" +source "$PROJECT_ROOT/tools/lib/config.sh" +source "$PROJECT_ROOT/tools/lib/save.sh" +source "$PROJECT_ROOT/tools/lib/libvirt.sh" +source "$PROJECT_ROOT/tools/lib/vm.sh" + +config_read || true +export LIBVIRT_DEFAULT_URI="${SC_LIBVIRT_URI:-${LIBVIRT_DEFAULT_URI:-qemu:///system}}" + +# Non-interactive flags +for arg in "$@"; do + case "$arg" in + --list) + save_list + exit 0 + ;; + esac +done + +if [[ "${1:-}" == "--reset" ]]; then + slot="${2:-}" + if [ -n "$slot" ]; then + save_reset "$slot" + else + save_reset + fi + exit 0 +fi + +# --------------------------------------------------------------------------- +# Interactive menu +# --------------------------------------------------------------------------- + +declare -A VM_LABEL=( + [sc-workstation]="workstation (ares)" + [sc-web-server]="web server (hermes)" + [sc-build-machine]="build server (vulcan)" +) +ALL_VMS=(sc-workstation sc-web-server sc-build-machine) + +sc_header "SYSADMIN CHRONICLES — SAVE MANAGEMENT" + +_main_menu() { + while true; do + echo "" + save_list + echo "" + echo " ── Save Actions ─────────────────────────────" + echo " s) Switch active save slot" + echo " n) New save slot" + echo " r) Reset a save slot" + echo " e) Export save to file" + echo " i) Import save from file" + echo "" + echo " ── VM Snapshots ─────────────────────────────" + echo " v) View and manage VM snapshots" + echo "" + echo " q) Quit" + echo "" + printf " > " >/dev/tty + read -r choice " >/dev/tty + read -r slot /dev/null | head -1 | cut -d: -f2 || echo '')" + if [ -n "$active_day" ] && [ "$active_day" -gt 1 ] 2>/dev/null; then + echo "" + sc_warn "This save is on Day $active_day. Your VMs may not match this slot's expected state." + sc_info "If things look wrong, use 'tools/vm/rebuild-vms.sh' to revert VMs." + fi +} + +_new_slot() { + echo " Create in which slot? (slot-1 / slot-2 / slot-3)" + printf " > " >/dev/tty + read -r slot " >/dev/tty + read -r slot " >/dev/tty + read -r slot " >/dev/tty + read -r dest " >/dev/tty + read -r src " >/dev/tty + read -r slot /dev/null; then + snaps="$(virsh snapshot-list "$vm" --name 2>/dev/null | grep -v '^$' || true)" + if [ -n "$snaps" ]; then + while IFS= read -r snap; do + ts="$(virsh snapshot-info "$vm" "$snap" 2>/dev/null | grep 'Creation Time' | awk '{print $3}' || true)" + if [[ "$snap" == baseline.* ]] || [[ "$snap" == checkpoint.* ]]; then + prot=" [protected]" + else + prot="" + fi + printf " %-36s %-12s%s\n" "$snap" "$ts" "$prot" + done <<< "$snaps" + else + echo " (no snapshots)" + fi + else + echo " (VM not found)" + fi + echo "" + done + + echo " Actions: [t]ake snapshot [r]evert [d]elete [q]uit" + echo "" + printf " > " >/dev/tty + local snap_action + read -r snap_action " >/dev/tty + read -r snap_name " >/dev/tty + read -r snap_name " >/dev/tty + read -r snap_name " >/dev/tty + read -r _result_ref /dev/null 2>&1 +} + +check_cmd() { + local cmd="$1" + local label="${2:-$cmd}" + if command -v "$cmd" &>/dev/null; then + echo " $PASS $label" + else + echo " $FAIL $label — NOT FOUND" + ((errors+=1)) + fi +} + +check_file() { + local path="$1" + local label="$2" + local required="${3:-true}" + if [ -e "$path" ]; then + echo " $PASS $label ($path)" + else + if [ "$required" = "true" ]; then + echo " $FAIL $label — NOT FOUND ($path)" + ((errors+=1)) + else + echo " $WARN $label — not found ($path) [optional]" + ((warnings+=1)) + fi + fi +} + +check_group() { + local group="$1" + if id -nG "$OWNER_USER" | grep -qw "$group"; then + echo " $PASS User is in group: $group" + else + echo " $WARN Not in group '$group' — libvirt access may require sudo or group add" + ((warnings+=1)) + fi +} + +check_kvm_access() { + if id -nG "$OWNER_USER" | grep -qw kvm; then + echo " $PASS User is in group: kvm" + elif [ -r /dev/kvm ] && [ -w /dev/kvm ]; then + echo " $PASS /dev/kvm is accessible to this session" + else + echo " $WARN Not in group 'kvm' — KVM acceleration may require sudo or group add" + ((warnings+=1)) + fi +} + +libvirt_ready=false +socket_ready=false + +if run_virsh_quick -q list --all; then + libvirt_ready=true +fi + +if systemctl is-active --quiet libvirtd.socket 2>/dev/null || \ + systemctl is-active --quiet virtqemud.socket 2>/dev/null; then + socket_ready=true +fi + +# --------------------------------------------------------------------------- + +echo "" +echo "══════════════════════════════════════════════════" +echo " Sysadmin Chronicles — Host Prerequisite Check" +echo "══════════════════════════════════════════════════" +echo "" + +echo "── Virtualization ─────────────────────────────────" +check_file "/dev/kvm" "KVM device node" +check_cmd "virsh" "virsh (libvirt CLI)" +check_cmd "qemu-system-x86_64" "QEMU (x86_64)" + +# libvirt runtime +if [ "$libvirt_ready" = "true" ]; then + echo " $PASS libvirt responds to virsh" +elif [ "$socket_ready" = "true" ]; then + echo " $WARN libvirt socket activation is available, but this user cannot reach $LIBVIRT_DEFAULT_URI" + echo " Add yourself to the libvirt group or use sudo for setup." + ((warnings+=1)) +else + echo " $FAIL libvirt is not reachable — start socket activation or the daemon" + echo " Example: sudo systemctl enable --now libvirtd.socket" + ((errors+=1)) +fi + +check_group "libvirt" +check_kvm_access + +echo "" +echo "── Storage ────────────────────────────────────────" +check_cmd "qemu-img" "qemu-img (disk image tool)" +# Storage pool exists check +if run_virsh_quick pool-info sc-images; then + echo " $PASS sc-images storage pool exists" +else + echo " $WARN sc-images storage pool not yet created (run first-run-setup.sh)" + ((warnings+=1)) +fi + +echo "" +echo "── Networking ─────────────────────────────────────" +if run_virsh_quick net-info sc-internal; then + echo " $PASS sc-internal network exists" + if timeout 5 virsh net-dumpxml sc-internal 2>/dev/null | grep -q "/dev/null 2>&1 +} + +libvirt_socket_available() { + systemctl is-active --quiet libvirtd.socket 2>/dev/null || \ + systemctl is-active --quiet virtqemud.socket 2>/dev/null +} + +echo "" +echo "══════════════════════════════════════════════════" +echo " Sysadmin Chronicles — First-Run Setup" +echo "══════════════════════════════════════════════════" + +# Sanity: check libvirt access +step "Checking libvirt access" +if libvirt_reachable; then + ok "libvirt responds to virsh" +elif libvirt_socket_available; then + ok "libvirt socket activation is available" +else + echo " ERROR: libvirt is not reachable." + echo " Run: sudo systemctl enable --now libvirtd.socket" + exit 1 +fi + +# --------------------------------------------------------------------------- +step "Creating sc-internal libvirt network" +# --------------------------------------------------------------------------- + +NETWORK_XML="$SCRIPT_DIR/../vm/network-sc-internal.xml" + +write_network_xml() { + local target="$1" + cat > "$target" << 'EOF' + + sc-internal + + + + + + + + +EOF +} + +network_has_nat() { + virsh net-dumpxml sc-internal 2>/dev/null | grep -q "/dev/null 2>&1 || true + run virsh net-undefine sc-internal + define_network "sc-internal network recreated and started" + fi +else + info "Creating sc-internal (private NAT, game-scoped network)..." + define_network "sc-internal network created and started" +fi + +# --------------------------------------------------------------------------- +step "Creating sc-images storage pool" +# --------------------------------------------------------------------------- + +if [ "${LIBVIRT_DEFAULT_URI}" = "qemu:///system" ]; then + IMAGES_DIR="/var/lib/libvirt/images/sysadmin-chronicles" +else + IMAGES_DIR="$OWNER_HOME/.local/share/sysadmin-chronicles/images" +fi + +if virsh pool-list --all | grep -q "sc-images"; then + CURRENT_POOL_PATH="$(virsh pool-dumpxml sc-images 2>/dev/null | sed -n 's:.*\(.*\).*:\1:p' | head -n1)" + if [ "$CURRENT_POOL_PATH" = "$IMAGES_DIR" ]; then + ok "sc-images pool already exists" + else + info "Recreating sc-images pool at $IMAGES_DIR (was $CURRENT_POOL_PATH)..." + run mkdir -p "$IMAGES_DIR" + run virsh pool-destroy sc-images >/dev/null 2>&1 || true + run virsh pool-undefine sc-images + run virsh pool-define-as sc-images dir --target "$IMAGES_DIR" + run virsh pool-autostart sc-images + run virsh pool-start sc-images + ok "sc-images pool recreated" + fi +else + info "Creating sc-images pool at $IMAGES_DIR..." + run mkdir -p "$IMAGES_DIR" + run virsh pool-define-as sc-images dir --target "$IMAGES_DIR" + run virsh pool-autostart sc-images + run virsh pool-start sc-images + ok "sc-images pool created" +fi + +# --------------------------------------------------------------------------- +step "Generating SSH key pair" +# --------------------------------------------------------------------------- + +KEY_PATH="$OWNER_HOME/.ssh/sc_host_key" +if [ -f "$KEY_PATH" ]; then + run chown "$OWNER_USER:$OWNER_USER" "$KEY_PATH" "${KEY_PATH}.pub" >/dev/null 2>&1 || true + run chmod 600 "$KEY_PATH" >/dev/null 2>&1 || true + run chmod 644 "${KEY_PATH}.pub" >/dev/null 2>&1 || true + ok "SSH key already exists: $KEY_PATH" +else + info "Generating ed25519 key: $KEY_PATH" + run mkdir -p "$(dirname "$KEY_PATH")" + run ssh-keygen -t ed25519 -N "" -C "sysadmin-chronicles-host" -f "$KEY_PATH" + run chmod 600 "$KEY_PATH" + run chmod 644 "${KEY_PATH}.pub" + run chown "$OWNER_USER:$OWNER_USER" "$KEY_PATH" "${KEY_PATH}.pub" + ok "SSH key generated" + info "Public key (add to VM authorized_keys during build):" + if [ "$DRY_RUN" = "false" ]; then + cat "${KEY_PATH}.pub" + fi +fi + +# --------------------------------------------------------------------------- +step "Verifying group membership" +# --------------------------------------------------------------------------- + +if id -nG | grep -qw "libvirt"; then + ok "User is in libvirt group" +else + info "Consider adding yourself to the libvirt group for passwordless VM control:" + info " sudo usermod -aG libvirt \$USER && newgrp libvirt" +fi + +# --------------------------------------------------------------------------- + +echo "" +echo "══════════════════════════════════════════════════" +echo " Setup complete." +echo " Next step: bash tools/setup/seed-vms.sh" +echo "══════════════════════════════════════════════════" +echo "" diff --git a/tools/setup/generate-certs.sh b/tools/setup/generate-certs.sh new file mode 100755 index 0000000..dfa9cab --- /dev/null +++ b/tools/setup/generate-certs.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Generates a self-signed CA and server certificate for Sysadmin Chronicles TLS. +# Idempotent — skips if certs already exist. +# Run this before building VMs. Called by install.sh automatically. +set -euo pipefail + +SC_CERT_DIR="${SC_CERT_DIR:-$HOME/.local/share/sysadmin-chronicles/certs}" +mkdir -p "$SC_CERT_DIR" +chmod 700 "$SC_CERT_DIR" + +if [[ -f "$SC_CERT_DIR/server.crt" && -f "$SC_CERT_DIR/server.key" && -f "$SC_CERT_DIR/ca.crt" ]]; then + echo "TLS certs already exist at $SC_CERT_DIR — skipping." + exit 0 +fi + +echo "Generating Axiom Works internal CA..." +openssl genrsa -out "$SC_CERT_DIR/ca.key" 4096 2>/dev/null +openssl req -new -x509 -days 3650 \ + -key "$SC_CERT_DIR/ca.key" \ + -out "$SC_CERT_DIR/ca.crt" \ + -subj "/CN=Axiom Works Internal CA/O=Axiom Works" 2>/dev/null + +echo "Generating server certificate..." +openssl genrsa -out "$SC_CERT_DIR/server.key" 4096 2>/dev/null +openssl req -new \ + -key "$SC_CERT_DIR/server.key" \ + -out "$SC_CERT_DIR/server.csr" \ + -subj "/CN=portal.axiomworks.internal/O=Axiom Works" 2>/dev/null + +cat > "$SC_CERT_DIR/server.ext" <<'EXTEOF' +subjectAltName=DNS:portal.axiomworks.internal,DNS:sage.axiomworks.internal,DNS:axiomworks.corp,DNS:www.axiomworks.corp,DNS:*.axiomworks.internal,DNS:*.axiomworks.corp +EXTEOF + +openssl x509 -req -days 3650 \ + -in "$SC_CERT_DIR/server.csr" \ + -CA "$SC_CERT_DIR/ca.crt" \ + -CAkey "$SC_CERT_DIR/ca.key" \ + -CAcreateserial \ + -out "$SC_CERT_DIR/server.crt" \ + -extfile "$SC_CERT_DIR/server.ext" 2>/dev/null + +chmod 600 "$SC_CERT_DIR/ca.key" "$SC_CERT_DIR/server.key" +rm -f "$SC_CERT_DIR/server.csr" "$SC_CERT_DIR/server.ext" + +echo "TLS certs generated at $SC_CERT_DIR" +echo " CA cert: $SC_CERT_DIR/ca.crt" +echo " Server cert: $SC_CERT_DIR/server.crt" +echo " Server key: $SC_CERT_DIR/server.key" diff --git a/tools/setup/seed-vms.sh b/tools/setup/seed-vms.sh new file mode 100644 index 0000000..bbe2a57 --- /dev/null +++ b/tools/setup/seed-vms.sh @@ -0,0 +1,327 @@ +#!/usr/bin/env bash +# seed-vms.sh — Build all game VMs and create baseline snapshots. +# +# This script orchestrates the full VM provisioning pipeline: +# 1. Build base VM images (cloud-init or manual install) +# 2. Install guest helper binaries +# 3. Run quest-prep scripts for each Tier 1 quest +# 4. Take named baseline snapshots +# +# Prerequisites: Run first-run-setup.sh first (creates networks + pool). +# +# Usage: +# bash tools/setup/seed-vms.sh # Build all VMs +# bash tools/setup/seed-vms.sh --dry-run # Preview only +# bash tools/setup/seed-vms.sh --vm workstation # One VM only +# bash tools/setup/seed-vms.sh --skip-build # Prep scripts + snapshots only +# +# AGENT RULES: +# - Never run quest-prep scripts against live player VMs. +# - All prep scripts must be idempotent (safe to run twice). +# - Snapshots are only taken after prep scripts complete successfully. + +set -euo pipefail + +OWNER_USER="${SUDO_USER:-$USER}" +OWNER_HOME="$(getent passwd "$OWNER_USER" | cut -d: -f6)" +OWNER_HOME="${OWNER_HOME:-$HOME}" +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" +export SC_OWNER_USER="$OWNER_USER" +export SC_OWNER_HOME="$OWNER_HOME" +export SC_SSH_KEY="${SC_SSH_KEY:-$OWNER_HOME/.ssh/sc_host_key}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +VM_TOOLS="$PROJECT_ROOT/tools/vm" +QUEST_PREP="$VM_TOOLS/quest-prep" + +source "$PROJECT_ROOT/tools/lib/config.sh" +config_read || true + +normalize_dir_path() { + local path="${1:-}" + while [[ "$path" == *//* ]]; do + path="${path//\/\//\/}" + done + while [ "$path" != "/" ] && [ "${path%/}" != "$path" ]; do + path="${path%/}" + done + printf '%s\n' "$path" +} + +if [ -n "${SC_IMAGES_DIR:-}" ]; then + SC_IMAGES_DIR="$(normalize_dir_path "$SC_IMAGES_DIR")" + export SC_IMAGE_ROOT="$SC_IMAGES_DIR" +fi +export SC_POOL_NAME="${SC_POOL_NAME:-sc-images}" +export SC_NETWORK_NAME="${SC_NETWORK_NAME:-sc-internal}" + +DRY_RUN=false +SKIP_BUILD=false +SINGLE_VM="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=true; shift ;; + --skip-build) SKIP_BUILD=true; shift ;; + --vm) SINGLE_VM="$2"; shift 2 ;; + *) echo "Unknown argument: $1"; exit 1 ;; + esac +done + +run() { + if [ "$DRY_RUN" = "true" ]; then + echo " [DRY-RUN] $*" + else + "$@" + fi +} + +step() { echo ""; echo "── $* ───────────────────────────────────────"; } +ok() { echo " ✓ $*"; } +info() { echo " → $*"; } +fail() { echo " ✗ $*"; exit 1; } +vm_selected() { + local key="$1" + [ -z "$SINGLE_VM" ] || [ "$SINGLE_VM" = "$key" ] +} +domain_selected() { + local domain="$1" + case "$domain" in + sc-workstation) vm_selected "workstation" ;; + sc-web-server) vm_selected "web_server" ;; + sc-build-machine) vm_selected "build_machine" ;; + *) return 1 ;; + esac +} +require_file() { + local path="$1" + local label="$2" + if [ ! -f "$path" ]; then + fail "$label is missing: $path" + fi +} + +echo "" +echo "══════════════════════════════════════════════════" +echo " Sysadmin Chronicles — VM Seed Pipeline" +echo "══════════════════════════════════════════════════" +[ "$DRY_RUN" = "true" ] && echo " [DRY-RUN mode]" +echo "" + +step "Validating provisioning toolchain" +require_file "$QUEST_PREP/Q001-prep.sh" "Q001 prep script" +require_file "$QUEST_PREP/Q002-prep.sh" "Q002 prep script" +require_file "$QUEST_PREP/Q003-prep.sh" "Q003 prep script" +require_file "$QUEST_PREP/Q004-prep.sh" "Q004 prep script" +require_file "$QUEST_PREP/Q006-prep.sh" "Q006 prep script" +require_file "$QUEST_PREP/Q006-post-clean.sh" "Q006 post-clean script" + +if [ "$SKIP_BUILD" = "false" ]; then + missing_scripts=() + for script in \ + "$VM_TOOLS/build-workstation.sh" \ + "$VM_TOOLS/build-web-server.sh" \ + "$VM_TOOLS/build-build-machine.sh" \ + "$VM_TOOLS/install-guest-helper.sh" \ + "$VM_TOOLS/suppress-maintenance-noise.sh" + do + if [ ! -f "$script" ]; then + missing_scripts+=("$script") + fi + done + + if [ "${#missing_scripts[@]}" -gt 0 ]; then + echo " ✗ VM provisioning pipeline is incomplete in this repo checkout." + echo " Missing files:" + for script in "${missing_scripts[@]}"; do + echo " - $script" + done + echo "" + echo " Current state:" + echo " - The Godot game and authored content are present." + echo " - The VM image build/provision helper scripts are not." + echo "" + echo " Real VM seeding cannot complete until those scripts are added." + exit 1 + fi +fi + +# --------------------------------------------------------------------------- +# STEP 1 — Build base images +# --------------------------------------------------------------------------- + +if [ "$SKIP_BUILD" = "false" ]; then + step "Building VM base images" + info "NOTE: VM image builds require cloud-init ISO or manual install." + info " See docs/ARCHITECTURE.md §5.3.1 for workstation profile guidance." + info " See tools/vm/build-*.sh scripts for per-VM build details." + echo "" + + if vm_selected "workstation"; then + info "Building workstation (ares) — Debian XFCE desktop..." + run bash "$VM_TOOLS/build-workstation.sh" $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + fi + + if vm_selected "web_server"; then + info "Building web_server (hermes) — headless Debian..." + run bash "$VM_TOOLS/build-web-server.sh" $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + fi + + if vm_selected "build_machine"; then + info "Building build_machine (vulcan) — headless Arch..." + run bash "$VM_TOOLS/build-build-machine.sh" $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + fi +fi + +# --------------------------------------------------------------------------- +# STEP 1b — Verify baseline connectivity +# --------------------------------------------------------------------------- + +if [ "$SKIP_BUILD" = "false" ] && [ "$DRY_RUN" = "false" ]; then + step "Verifying baseline connectivity" + for dom_host in "sc-web-server:hermes" "sc-build-machine:vulcan"; do + dom="${dom_host%%:*}" + host="${dom_host##*:}" + addr="$(virsh domifaddr "$dom" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | head -n1 || true)" + if [ -z "$addr" ]; then + info "$dom: no IP yet — skipping connectivity check" + continue + fi + result="$(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=10 -i "$SC_SSH_KEY" "player@$addr" hostname 2>/dev/null || echo FAIL)" + if [ "$result" = "FAIL" ] || [ -z "$result" ]; then + fail "$dom ($host): 'hostname' failed — check inetutils and shell PATH provisioning" + fi + ok "$dom ($host): hostname=$result" + done +fi + +# --------------------------------------------------------------------------- +# STEP 2 — Suppress guest maintenance noise +# --------------------------------------------------------------------------- + +step "Suppressing guest maintenance noise" +info "Tuning base images to suppress package manager notices..." +for dom in sc-workstation sc-web-server sc-build-machine; do + domain_selected "$dom" || continue + if virsh dominfo "$dom" &>/dev/null 2>&1 || [ "$DRY_RUN" = "true" ]; then + run bash "$VM_TOOLS/suppress-maintenance-noise.sh" "$dom" \ + $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + ok "$dom: maintenance noise suppressed" + fi +done + +# --------------------------------------------------------------------------- +# STEP 3 — Install guest helpers +# --------------------------------------------------------------------------- + +step "Installing guest helpers" +info "Guest helpers are non-authoritative — advisory signals only." +for dom in sc-workstation sc-web-server sc-build-machine; do + domain_selected "$dom" || continue + if virsh dominfo "$dom" &>/dev/null 2>&1 || [ "$DRY_RUN" = "true" ]; then + run bash "$VM_TOOLS/install-guest-helper.sh" "$dom" \ + $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + ok "$dom: guest helper installed" + fi +done + +# --------------------------------------------------------------------------- +# STEP 4 — Run quest-prep scripts and snapshot +# --------------------------------------------------------------------------- + +step "Running quest-prep scripts and snapshotting" + +run_prep_and_snapshot() { + local quest_id="$1" + local domain="$2" + local snapshot_name="$3" + local prep_script="$QUEST_PREP/${quest_id}-prep.sh" + + if [ ! -f "$prep_script" ]; then + echo " ⚠ No prep script found for $quest_id — skipping" + return + fi + + info "Running $quest_id prep on $domain..." + run bash "$prep_script" "$domain" $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + + info "Taking snapshot '$snapshot_name' on $domain..." + run virsh snapshot-delete "$domain" "$snapshot_name" >/dev/null 2>&1 || true + run virsh snapshot-create-as "$domain" "$snapshot_name" \ + --description "${quest_id} baseline — created by seed-vms.sh" \ + --atomic + ok "$domain → $snapshot_name" +} + +run_post_clean_and_snapshot() { + local quest_id="$1" + local domain="$2" + local snapshot_name="$3" + local clean_script="$QUEST_PREP/${quest_id}-post-clean.sh" + + if [ ! -f "$clean_script" ]; then + echo " ⚠ No post-clean script found for $quest_id — skipping" + return + fi + + info "Applying ${quest_id} clean branch state on $domain..." + run bash "$clean_script" "$domain" $([ "$DRY_RUN" = "true" ] && echo "--dry-run") + + info "Taking snapshot '$snapshot_name' on $domain..." + run virsh snapshot-delete "$domain" "$snapshot_name" >/dev/null 2>&1 || true + run virsh snapshot-create-as "$domain" "$snapshot_name" \ + --description "${quest_id} clean branch baseline — created by seed-vms.sh" \ + --atomic + ok "$domain → $snapshot_name" +} + +# Q001: workstation day-one state +if vm_selected "workstation"; then + run_prep_and_snapshot "Q001" "sc-workstation" "baseline.day-one" +fi + +# Q002–Q004 share hermes clean baseline; prep scripts layer on top +if vm_selected "web_server"; then + run_prep_and_snapshot "Q002" "sc-web-server" "baseline.clean" + run_prep_and_snapshot "Q003" "sc-web-server" "baseline.post-q002" + run_prep_and_snapshot "Q004" "sc-web-server" "baseline.post-q003" +fi + +# Q005 and Q007 use post-q004 baseline +if vm_selected "web_server"; then + info "Creating baseline.post-q004 snapshot (used by Q005, Q007)..." + run virsh snapshot-delete "sc-web-server" "baseline.post-q004" >/dev/null 2>&1 || true + run virsh snapshot-create-as "sc-web-server" "baseline.post-q004" \ + --description "Post-Q004 baseline" --atomic || true +fi + +# Q006: build machine broken baseline, then authored clean handoff for later quests +if vm_selected "build_machine"; then + run_prep_and_snapshot "Q006" "sc-build-machine" "baseline.clean" + run_post_clean_and_snapshot "Q006" "sc-build-machine" "baseline.post-q006" +fi + +info "Q008 remains a multi-VM authored-state gap and is not provisioned by seed-vms.sh yet." + +# Take recovery snapshots (always available as fallback) +step "Creating recovery snapshots" +for dom in sc-workstation sc-web-server sc-build-machine; do + domain_selected "$dom" || continue + info "Creating baseline.recovery on $dom..." + run virsh snapshot-delete "$dom" "baseline.recovery" >/dev/null 2>&1 || true + run virsh snapshot-create-as "$dom" "baseline.recovery" \ + --description "Recovery fallback — created by seed-vms.sh" \ + --atomic || true + ok "$dom → baseline.recovery" +done + +# --------------------------------------------------------------------------- + +echo "" +echo "══════════════════════════════════════════════════" +echo " Seed pipeline complete." +echo " Verify with: bash tools/setup/check-host.sh" +echo " Run content validation: bash tools/dev/test-content.sh" +echo "══════════════════════════════════════════════════" +echo "" diff --git a/tools/setup/uninstall.sh b/tools/setup/uninstall.sh new file mode 100644 index 0000000..d3fa0a9 --- /dev/null +++ b/tools/setup/uninstall.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# uninstall.sh — Remove all Sysadmin Chronicles game-owned host resources. +# +# Removes: +# - sc- prefixed libvirt VM domains (after confirmation) +# - sc- prefixed libvirt networks +# - sc-images storage pool and its directory +# - SSH key pair (~/.ssh/sc_host_key) +# +# Does NOT remove: +# - The game directory itself (remove manually if desired) +# - Any system-wide libvirt config +# - Any resources not prefixed with sc- +# +# Usage: +# bash tools/setup/uninstall.sh +# bash tools/setup/uninstall.sh --dry-run +# bash tools/setup/uninstall.sh --yes (skip confirmation) + +set -euo pipefail + +DRY_RUN=false +ASSUME_YES=false + +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=true ;; + --yes) ASSUME_YES=true ;; + esac +done + +run() { + if [ "$DRY_RUN" = "true" ]; then echo " [DRY-RUN] $*"; else "$@"; fi +} + +echo "" +echo "══════════════════════════════════════════════════" +echo " Sysadmin Chronicles — Uninstall" +echo "══════════════════════════════════════════════════" +[ "$DRY_RUN" = "true" ] && echo " [DRY-RUN mode — no changes]" +echo "" +echo " This will PERMANENTLY remove all sc- prefixed VMs," +echo " networks, storage, and SSH keys." +echo "" + +if [ "$ASSUME_YES" = "false" ] && [ "$DRY_RUN" = "false" ]; then + read -rp " Type YES to confirm uninstall: " confirm + if [ "$confirm" != "YES" ]; then + echo " Aborted." + exit 0 + fi +fi + +# --------------------------------------------------------------------------- +# Remove VMs +# --------------------------------------------------------------------------- +echo "" +echo "── Removing VM domains ──────────────────────────" +for domain in $(virsh list --all --name 2>/dev/null | grep "^sc-" || true); do + echo " Removing $domain..." + # Stop if running + if virsh domstate "$domain" 2>/dev/null | grep -q "running"; then + run virsh destroy "$domain" + fi + # Remove all snapshots + for snap in $(virsh snapshot-list "$domain" --name 2>/dev/null || true); do + run virsh snapshot-delete "$domain" "$snap" + done + run virsh undefine "$domain" --remove-all-storage + echo " ✓ $domain removed" +done + +# --------------------------------------------------------------------------- +# Remove networks +# --------------------------------------------------------------------------- +echo "" +echo "── Removing sc- networks ─────────────────────────" +for net in $(virsh net-list --all --name 2>/dev/null | grep "^sc-" || true); do + echo " Removing network $net..." + if virsh net-info "$net" 2>/dev/null | grep -q "Active:.*yes"; then + run virsh net-destroy "$net" + fi + run virsh net-undefine "$net" + echo " ✓ $net removed" +done + +# --------------------------------------------------------------------------- +# Remove storage pool +# --------------------------------------------------------------------------- +echo "" +echo "── Removing sc-images storage pool ──────────────" +if virsh pool-list --all | grep -q "sc-images"; then + POOL_PATH=$(virsh pool-dumpxml sc-images 2>/dev/null | grep -oP '(?<=)[^<]+' || echo "") + if virsh pool-info sc-images 2>/dev/null | grep -q "State:.*running"; then + run virsh pool-destroy sc-images + fi + run virsh pool-undefine sc-images + if [ -n "$POOL_PATH" ] && [ -d "$POOL_PATH" ]; then + run rm -rf "$POOL_PATH" + fi + echo " ✓ sc-images pool removed" +else + echo " (sc-images pool not found — skipping)" +fi + +# --------------------------------------------------------------------------- +# Remove SSH keys +# --------------------------------------------------------------------------- +echo "" +echo "── Removing SSH keys ─────────────────────────────" +KEY="$HOME/.ssh/sc_host_key" +if [ -f "$KEY" ]; then + run rm -f "$KEY" "${KEY}.pub" + echo " ✓ SSH keys removed" +else + echo " (No sc_host_key found — skipping)" +fi + +echo "" +echo "══════════════════════════════════════════════════" +echo " Uninstall complete." +echo " Game files (this directory) were not removed." +echo "══════════════════════════════════════════════════" +echo "" diff --git a/tools/vm/build-build-machine.sh b/tools/vm/build-build-machine.sh new file mode 100755 index 0000000..cbd4047 --- /dev/null +++ b/tools/vm/build-build-machine.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# Wrapper — delegates to the modular build-vm.sh driver. +exec "$(dirname "$0")/build-vm.sh" "$(dirname "$0")/profiles/build-machine.sh" "$@" diff --git a/tools/vm/build-vm.sh b/tools/vm/build-vm.sh new file mode 100755 index 0000000..74e40e0 --- /dev/null +++ b/tools/vm/build-vm.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +# build-vm.sh — Modular VM builder. Sources a profile file that declares VM +# variables and a generate_user_data() function, then runs the common build +# pipeline against it. +# +# Usage: +# ./build-vm.sh [--dry-run] [--force] +# +# Example: +# ./build-vm.sh profiles/web-server.sh --force + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [--dry-run] [--force]" + exit 1 +fi + +PROFILE_ARG="$1"; shift + +DRY_RUN=false +FORCE=false +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=true; shift ;; + --force) FORCE=true; shift ;; + *) echo "Unknown argument: $1"; exit 1 ;; + esac +done + +source "$PROJECT_ROOT/tools/lib/config.sh" +config_read || true +if [ -n "${SC_IMAGES_DIR:-}" ]; then + SC_IMAGE_ROOT="${SC_IMAGE_ROOT:-$SC_IMAGES_DIR}" + export SC_IMAGE_ROOT +fi +if [ -n "${SC_LIBVIRT_URI:-}" ]; then + LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-$SC_LIBVIRT_URI}" + export LIBVIRT_DEFAULT_URI +fi + +source "$SCRIPT_DIR/lib/common.sh" + +# Resolve profile path: bare name (e.g. "web-server") or explicit path. +if [[ -f "$PROFILE_ARG" ]]; then + PROFILE="$PROFILE_ARG" +elif [[ -f "$SCRIPT_DIR/profiles/${PROFILE_ARG}.sh" ]]; then + PROFILE="$SCRIPT_DIR/profiles/${PROFILE_ARG}.sh" +elif [[ -f "$SCRIPT_DIR/profiles/${PROFILE_ARG}" ]]; then + PROFILE="$SCRIPT_DIR/profiles/${PROFILE_ARG}" +else + echo "Profile not found: $PROFILE_ARG" + echo "Available profiles:" + ls "$SCRIPT_DIR/profiles/" + exit 1 +fi + +source "$PROFILE" + +# Validate required profile variables. +for var in DOMAIN HOSTNAME RAM_MB VCPUS DISK_SIZE GRAPHICS BASE_URL BASE_IMAGE; do + [[ -n "${!var:-}" ]] || { echo "Profile must set $var"; exit 1; } +done +declare -f generate_user_data >/dev/null || { echo "Profile must define generate_user_data()"; exit 1; } + +GAME_HOST_IP="${SC_GAME_HOST_IP:-10.42.0.1}" +POOL_DIR="$(pool_path)" +DISK_PATH="$POOL_DIR/${DOMAIN}.qcow2" +SEED_ISO="$SC_SEED_DIR/${DOMAIN}-seed.iso" +PUBKEY="$(<"${SC_SSH_KEY}.pub")" + +export DOMAIN HOSTNAME RAM_MB VCPUS DISK_SIZE GRAPHICS BASE_URL BASE_IMAGE +export GAME_HOST_IP POOL_DIR DISK_PATH SEED_ISO PUBKEY + +ensure_vm_tooling + +echo "" +echo "══════════════════════════════════════════════════" +echo " Building VM: $DOMAIN ($HOSTNAME)" +echo " Profile: $(basename "$PROFILE")" +echo " RAM: ${RAM_MB} MB vCPUs: ${VCPUS} Disk: ${DISK_SIZE}" +echo "══════════════════════════════════════════════════" + +if domain_exists "$DOMAIN" && [ "$FORCE" = "false" ]; then + ok "$DOMAIN already exists. Use --force to rebuild it." + exit 0 +fi + +step "Preparing base image" +download_if_missing "$BASE_URL" "$BASE_IMAGE" + +step "Preparing cloud-init seed" +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' EXIT + +generate_user_data > "$tmpdir/user-data" + +cat > "$tmpdir/meta-data" < [--dry-run]" + exit 1 +fi + +if [[ "${2:-}" == "--dry-run" ]]; then + DRY_RUN=true +fi + +source "$SCRIPT_DIR/lib/common.sh" + +helper_name="" +case "$DOMAIN" in + sc-workstation) helper_name="atlas-index" ;; + sc-web-server) helper_name="yardd" ;; + sc-build-machine) helper_name="ops-telemetry-cache" ;; + *) echo "Unknown domain: $DOMAIN"; exit 1 ;; +esac + +ensure_vm_tooling + +tmp_script="$(mktemp)" +cat > "$tmp_script" < /usr/local/bin/${helper_name} <<'HELPER' +#!/usr/bin/env bash +set -euo pipefail +printf '{"helper":"%s","hostname":"%s","timestamp":"%s"}\n' \ + "${helper_name}" \ + "\$(hostname)" \ + "\$(date -Iseconds)" +HELPER +chmod 755 /usr/local/bin/${helper_name} +EOF + +info "Installing guest helper ${helper_name} on ${DOMAIN}" +guest_run_sudo_script "$DOMAIN" "$tmp_script" +rm -f "$tmp_script" +ok "${DOMAIN}: helper ${helper_name} installed" diff --git a/tools/vm/lib/common.sh b/tools/vm/lib/common.sh new file mode 100644 index 0000000..bbd0fad --- /dev/null +++ b/tools/vm/lib/common.sh @@ -0,0 +1,427 @@ +#!/usr/bin/env bash + +set -euo pipefail + +COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VM_TOOLS_DIR="$(cd "$COMMON_DIR/.." && pwd)" +PROJECT_ROOT="$(cd "$VM_TOOLS_DIR/../.." && pwd)" +SC_OWNER_USER="${SC_OWNER_USER:-${SUDO_USER:-$USER}}" +SC_OWNER_HOME="${SC_OWNER_HOME:-$(getent passwd "$SC_OWNER_USER" | cut -d: -f6)}" +SC_OWNER_HOME="${SC_OWNER_HOME:-$HOME}" +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" +if [ "${LIBVIRT_DEFAULT_URI}" = "qemu:///system" ]; then + SC_HOME="${SC_HOME:-/var/lib/libvirt/sysadmin-chronicles}" + SC_IMAGE_ROOT="${SC_IMAGE_ROOT:-/var/lib/libvirt/images/sysadmin-chronicles}" +else + SC_HOME="${SC_HOME:-$SC_OWNER_HOME/.local/share/sysadmin-chronicles}" + SC_IMAGE_ROOT="${SC_IMAGE_ROOT:-$SC_HOME/images}" +fi +SC_BASE_DIR="$SC_IMAGE_ROOT/base" +SC_SEED_DIR="$SC_IMAGE_ROOT/seed" +SC_POOL_NAME="${SC_POOL_NAME:-sc-images}" +SC_NETWORK_NAME="${SC_NETWORK_NAME:-sc-internal}" +SC_SSH_KEY="${SC_SSH_KEY:-$SC_OWNER_HOME/.ssh/sc_host_key}" +DRY_RUN="${DRY_RUN:-false}" + +_virsh() { + if [ "${SC_VIRSH_SUDO:-false}" = true ]; then + sudo virsh "$@" + else + virsh "$@" + fi +} + +_virt_install() { + if [ "${SC_VIRSH_SUDO:-false}" = true ]; then + sudo virt-install "$@" + else + virt-install "$@" + fi +} + +step() { echo ""; echo "── $* ───────────────────────────────────────"; } +ok() { echo " ✓ $*"; } +info() { echo " → $*"; } +fail() { echo " ✗ $*"; exit 1; } + +ssh_login_user() { + local domain="${1:-}" + case "$domain" in + sc-workstation) printf '%s\n' "opsbridge" ;; + *) printf '%s\n' "player" ;; + esac +} + +run() { + if [ "$DRY_RUN" = "true" ]; then + echo " [DRY-RUN] $*" + else + "$@" + fi +} + +require_cmd() { + local cmd="$1" + command -v "$cmd" >/dev/null 2>&1 || fail "Required command not found: $cmd" +} + +ensure_vm_tooling() { + require_cmd virsh + require_cmd qemu-img + require_cmd curl + require_cmd virt-install + + if ! command -v cloud-localds >/dev/null 2>&1 \ + && ! command -v genisoimage >/dev/null 2>&1 \ + && ! command -v mkisofs >/dev/null 2>&1 \ + && ! command -v xorriso >/dev/null 2>&1; then + fail "Need cloud-localds, genisoimage, mkisofs, or xorriso to build NoCloud seed images" + fi + + [ -f "$SC_SSH_KEY" ] || fail "Missing SSH private key: $SC_SSH_KEY" + [ -f "${SC_SSH_KEY}.pub" ] || fail "Missing SSH public key: ${SC_SSH_KEY}.pub" + + run mkdir -p "$SC_BASE_DIR" "$SC_SEED_DIR" + + _virsh pool-info "$SC_POOL_NAME" >/dev/null 2>&1 || fail "Missing libvirt pool: $SC_POOL_NAME" + _virsh net-info "$SC_NETWORK_NAME" >/dev/null 2>&1 || fail "Missing libvirt network: $SC_NETWORK_NAME" +} + +pool_path() { + local path + path="$(_virsh pool-dumpxml "$SC_POOL_NAME" | sed -n 's:.*\(.*\).*:\1:p' | head -n1)" + [ -n "$path" ] || fail "Could not determine pool path for $SC_POOL_NAME" + printf '%s\n' "$path" +} + +domain_exists() { + local domain="$1" + _virsh dominfo "$domain" >/dev/null 2>&1 +} + +download_if_missing() { + local url="$1" + local dest="$2" + if [ -f "$dest" ]; then + ok "Using cached base image: $(basename "$dest")" + return + fi + info "Downloading $(basename "$dest")" + run curl -L --fail --output "$dest" "$url" +} + +create_backing_disk() { + local base_image="$1" + local target_disk="$2" + local disk_size="${3:-}" + run mkdir -p "$(dirname "$target_disk")" + run rm -f "$target_disk" + if [ -n "$disk_size" ]; then + run qemu-img create -f qcow2 -F qcow2 -b "$base_image" "$target_disk" "$disk_size" + else + run qemu-img create -f qcow2 -F qcow2 -b "$base_image" "$target_disk" + fi +} + +create_seed_iso() { + local user_data="$1" + local meta_data="$2" + local output_iso="$3" + local seed_dir + seed_dir="$(mktemp -d)" + cp "$user_data" "$seed_dir/user-data" + cp "$meta_data" "$seed_dir/meta-data" + run rm -f "$output_iso" + + if command -v cloud-localds >/dev/null 2>&1; then + run cloud-localds "$output_iso" "$seed_dir/user-data" "$seed_dir/meta-data" + elif command -v genisoimage >/dev/null 2>&1; then + run genisoimage -quiet -output "$output_iso" -volid cidata -joliet -rock "$seed_dir/user-data" "$seed_dir/meta-data" + elif command -v mkisofs >/dev/null 2>&1; then + run mkisofs -quiet -output "$output_iso" -volid cidata -joliet -rock "$seed_dir/user-data" "$seed_dir/meta-data" + else + run xorriso -as mkisofs -quiet -output "$output_iso" -volid cidata -joliet -rock "$seed_dir/user-data" "$seed_dir/meta-data" + fi + + rm -rf "$seed_dir" +} + +destroy_domain() { + local domain="$1" + if ! domain_exists "$domain"; then + return + fi + info "Removing existing domain definition: $domain" + run _virsh destroy "$domain" >/dev/null 2>&1 || true + run _virsh undefine "$domain" --nvram --snapshots-metadata >/dev/null 2>&1 \ + || run _virsh undefine "$domain" --snapshots-metadata >/dev/null 2>&1 \ + || run _virsh undefine "$domain" --nvram >/dev/null 2>&1 \ + || run _virsh undefine "$domain" >/dev/null 2>&1 \ + || true +} + +build_import_domain() { + local domain="$1" + local disk_path="$2" + local seed_iso="$3" + local ram_mb="$4" + local vcpus="$5" + local graphics_mode="$6" + + local args=( + --name "$domain" + --memory "$ram_mb" + --vcpus "$vcpus" + --import + --disk "path=$disk_path,format=qcow2,bus=virtio" + --disk "path=$seed_iso,device=cdrom" + --network "network=$SC_NETWORK_NAME,model=virtio" + --channel "unix,target_type=virtio,name=org.qemu.guest_agent.0" + --rng /dev/urandom + --osinfo detect=on,require=off + --noautoconsole + ) + + case "$graphics_mode" in + none) + args+=(--graphics none --console pty,target_type=serial) + ;; + vnc) + args+=(--graphics vnc,listen=127.0.0.1) + ;; + spice) + args+=( + --graphics spice,listen=127.0.0.1 + --video virtio + --channel "spicevmc,target_type=virtio,name=com.redhat.spice.0" + ) + ;; + spice-qxl) + args+=( + --graphics spice,listen=127.0.0.1 + --video qxl + --channel "spicevmc,target_type=virtio,name=com.redhat.spice.0" + ) + ;; + *) + fail "Unknown graphics mode: $graphics_mode" + ;; + esac + + run _virt_install "${args[@]}" + run _virsh autostart "$domain" +} + +seed_cdrom_target() { + local domain="$1" + local seed_iso="$2" + _virsh dumpxml "$domain" 2>/dev/null \ + | awk -v seed="$seed_iso" ' + // { + if (matched && target != "") { + print target + exit + } + in_disk=0 + matched=0 + target="" + } + ' +} + +detach_seed_iso() { + local domain="$1" + local seed_iso="$2" + local target + + target="$(seed_cdrom_target "$domain" "$seed_iso" || true)" + if [ -z "$target" ]; then + info "No cloud-init seed ISO attached to $domain" + return 0 + fi + + info "Detaching cloud-init seed ISO from $domain ($target)" + if _virsh domstate "$domain" 2>/dev/null | grep -qi running; then + run _virsh detach-disk "$domain" "$target" --config >/dev/null 2>&1 || true + run _virsh detach-disk "$domain" "$target" --live >/dev/null 2>&1 || true + if seed_cdrom_target "$domain" "$seed_iso" >/dev/null 2>&1; then + info "Restarting $domain to apply cloud-init seed ISO detach" + run _virsh shutdown "$domain" >/dev/null 2>&1 || true + local waited=0 + while [ "$waited" -lt 60 ] && _virsh domstate "$domain" 2>/dev/null | grep -qi running; do + sleep 2 + waited=$((waited + 2)) + done + if _virsh domstate "$domain" 2>/dev/null | grep -qi running; then + run _virsh destroy "$domain" >/dev/null 2>&1 || true + fi + run _virsh start "$domain" >/dev/null 2>&1 + wait_for_agent_ip "$domain" 180 >/dev/null || true + fi + else + run _virsh detach-disk "$domain" "$target" --config >/dev/null 2>&1 || true + fi +} + +domain_mac() { + local domain="$1" + _virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.*/dev/null \ + | awk -v mac="$mac" '$0 ~ mac {print $5}' \ + | cut -d/ -f1 \ + | head -n1 +} + +valid_guest_ip() { + local addr="${1:-}" + [[ -n "$addr" ]] || return 1 + [[ "$addr" != 127.* ]] || return 1 + [[ "$addr" != "0.0.0.0" ]] || return 1 + return 0 +} + +wait_for_agent_ip() { + local domain="$1" + local timeout_sec="${2:-300}" + local waited=0 + + while [ "$waited" -lt "$timeout_sec" ]; do + local addr + addr="$(_virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | head -n1 || true)" + if ! valid_guest_ip "$addr"; then + addr="" + fi + if [ -z "$addr" ]; then + addr="$(dhcp_lease_ip "$domain" || true)" + fi + if valid_guest_ip "$addr"; then + printf '%s\n' "$addr" + return 0 + fi + sleep 5 + waited=$((waited + 5)) + done + + return 1 +} + +wait_for_ssh() { + local domain="$1" + local timeout_sec="${2:-180}" + local waited=0 + local login_user + login_user="$(ssh_login_user "$domain")" + + while [ "$waited" -lt "$timeout_sec" ]; do + local addr + addr="$(wait_for_agent_ip "$domain" 10 || true)" + if [ -n "$addr" ]; then + if ssh_base_args "$addr" "$login_user" true >/dev/null 2>&1; then + return 0 + fi + fi + sleep 5 + waited=$((waited + 5)) + done + + return 1 +} + +wait_for_guest_command() { + local domain="$1" + local timeout_sec="$2" + local command="$3" + local progress_command="${4:-}" + local progress_every_sec="${5:-30}" + local waited=0 + local last_progress=-9999 + local login_user + login_user="$(ssh_login_user "$domain")" + + while [ "$waited" -lt "$timeout_sec" ]; do + local addr + addr="$(wait_for_agent_ip "$domain" 10 || true)" + if [ -n "$addr" ]; then + if ssh_base_args "$addr" "$login_user" "$command" >/dev/null 2>&1; then + return 0 + fi + if [ -n "$progress_command" ] && [ $((waited - last_progress)) -ge "$progress_every_sec" ]; then + last_progress="$waited" + info "Guest progress for $domain:" + ssh_base_args "$addr" "$login_user" "$progress_command" 2>/dev/null || true + fi + fi + sleep 5 + waited=$((waited + 5)) + done + + return 1 +} + +ssh_base_args() { + local host="$1" + local login_user="${2:-player}" + shift + shift || true + ssh \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o BatchMode=yes \ + -o ConnectTimeout=5 \ + -o LogLevel=ERROR \ + -i "$SC_SSH_KEY" \ + "${login_user}@${host}" \ + "$@" +} + +guest_run() { + local domain="$1" + shift + wait_for_ssh "$domain" 180 >/dev/null || fail "SSH did not become ready for $domain" + local addr + addr="$(wait_for_agent_ip "$domain" 120)" || fail "Could not resolve IP for $domain" + local login_user + login_user="$(ssh_login_user "$domain")" + if [ "$DRY_RUN" = "true" ]; then + echo " [DRY-RUN][SSH $domain@$addr] $*" + return 0 + fi + ssh_base_args "$addr" "$login_user" "$@" +} + +guest_run_sudo_script() { + local domain="$1" + local script_file="$2" + wait_for_ssh "$domain" 180 >/dev/null || fail "SSH did not become ready for $domain" + local addr + addr="$(wait_for_agent_ip "$domain" 120)" || fail "Could not resolve IP for $domain" + local login_user + login_user="$(ssh_login_user "$domain")" + if [ "$DRY_RUN" = "true" ]; then + echo " [DRY-RUN][SSH $domain@$addr] sudo bash -s < $script_file" + return 0 + fi + ssh \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o BatchMode=yes \ + -o ConnectTimeout=5 \ + -o LogLevel=ERROR \ + -i "$SC_SSH_KEY" \ + "${login_user}@${addr}" "sudo bash -s" < "$script_file" +} diff --git a/tools/vm/network-sc-internal.xml b/tools/vm/network-sc-internal.xml new file mode 100644 index 0000000..b00d44d --- /dev/null +++ b/tools/vm/network-sc-internal.xml @@ -0,0 +1,14 @@ + + sc-internal + + + + + + + + + + + + diff --git a/tools/vm/profiles/build-machine.sh b/tools/vm/profiles/build-machine.sh new file mode 100755 index 0000000..86a684b --- /dev/null +++ b/tools/vm/profiles/build-machine.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Profile: sc-build-machine (vulcan) +# Role: Arch Linux build machine — compiles AxiomFlow artifacts, runs scheduled +# jobs, deploys to hermes. Intentionally different distro from Debian servers. +# Distro: Arch Linux cloud image + +DOMAIN="sc-build-machine" +HOSTNAME="vulcan" +RAM_MB=768 +VCPUS=2 +DISK_SIZE="10G" +GRAPHICS="vnc" +BASE_URL="https://geo.mirror.pkgbuild.com/images/latest/Arch-Linux-x86_64-cloudimg.qcow2" +BASE_IMAGE="$SC_BASE_DIR/Arch-Linux-x86_64-cloudimg.qcow2" + +generate_user_data() { +cat < /srv/repo/README.txt + - dd if=/dev/zero of=/swapfile bs=1M count=1024 status=progress + - chmod 600 /swapfile + - mkswap /swapfile + - swapon /swapfile + - echo '/swapfile none swap sw 0 0' >> /etc/fstab + - sysctl -p /etc/sysctl.d/99-sc-vulcan.conf + - chown -R player:player /home/player /srv/repo /srv/builds + - systemctl disable ModemManager || true + - systemctl mask sleep.target suspend.target hibernate.target hybrid-sleep.target +final_message: "Vulcan build machine is ready." +EOF +} diff --git a/tools/vm/profiles/web-server.sh b/tools/vm/profiles/web-server.sh new file mode 100755 index 0000000..01454dd --- /dev/null +++ b/tools/vm/profiles/web-server.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# Profile: sc-web-server (hermes) +# Role: nginx web/app server — staging and demo environment for AxiomFlow. +# Distro: Debian 12 (bookworm) cloud image + +DOMAIN="sc-web-server" +HOSTNAME="hermes" +RAM_MB=512 +VCPUS=1 +DISK_SIZE="8G" +GRAPHICS="vnc" +BASE_URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2" +BASE_IMAGE="$SC_BASE_DIR/debian-12-genericcloud-amd64.qcow2" + +generate_user_data() { +cat < + AxiomFlow +

AxiomFlow Staging

Build not yet deployed.

+ + - path: /opt/deploy/deploy.sh + owner: root:root + permissions: '0755' + content: | + #!/usr/bin/env bash + set -euo pipefail + SRC="\${1:-/home/player/build/dist}" + rsync -av --delete "\$SRC/" /var/www/axiomworks/ + echo "\$(date) Deploy from \$SRC complete." >> /var/log/axiomworks/deploy.log + - path: /home/player/.bashrc + owner: root:root + permissions: '0644' + content: | + [ -z "\$PS1" ] && return + export TERM=xterm-256color + export EDITOR=vim + PS1='\[\e[0;33m\]\u@\h\[\e[0m\]:\[\e[0;34m\]\w\[\e[0m\]\$ ' + HISTSIZE=5000 + HISTFILESIZE=10000 + HISTCONTROL=ignoredups:erasedups + shopt -s histappend + alias ll='ls -lh --color=auto' + alias la='ls -lha --color=auto' + alias grep='grep --color=auto' + alias ..='cd ..' + alias nginx-test='nginx -t' + alias nginx-reload='systemctl reload nginx' + alias logs='journalctl -f' + if [ -f /usr/share/bash-completion/bash_completion ]; then + . /usr/share/bash-completion/bash_completion + fi + - path: /etc/sysctl.d/99-sc-hermes.conf + owner: root:root + permissions: '0644' + content: | + vm.swappiness=10 + vm.vfs_cache_pressure=50 + vm.dirty_ratio=15 + vm.dirty_background_ratio=3 + net.ipv6.conf.all.disable_ipv6=1 + net.ipv6.conf.default.disable_ipv6=1 +runcmd: + - ln -sf /etc/nginx/sites-available/axiomworks.conf /etc/nginx/sites-enabled/axiomworks.conf + - rm -f /etc/nginx/sites-enabled/default + - mkdir -p /var/www/axiomworks /var/log/axiomworks /opt/deploy + - chown -R www-data:www-data /var/www/axiomworks + - touch /var/log/axiomworks/deploy.log + - chown www-data:www-data /var/log/axiomworks/deploy.log + - chown -R player:player /home/player + - fallocate -l 512M /swapfile && chmod 600 /swapfile && mkswap /swapfile && swapon /swapfile && echo '/swapfile none swap sw 0 0' >> /etc/fstab + - sysctl -p /etc/sysctl.d/99-sc-hermes.conf + - systemctl enable --now qemu-guest-agent ssh nginx + - systemctl disable --now unattended-upgrades || true + - systemctl disable --now apt-daily.timer apt-daily-upgrade.timer || true + - systemctl disable --now ModemManager || true + - systemctl mask sleep.target suspend.target hibernate.target hybrid-sleep.target +final_message: "Hermes web server is ready." +EOF +} diff --git a/tools/vm/profiles/workstation.sh b/tools/vm/profiles/workstation.sh new file mode 100755 index 0000000..ce4c7cb --- /dev/null +++ b/tools/vm/profiles/workstation.sh @@ -0,0 +1,734 @@ +#!/usr/bin/env bash +# Profile: sc-workstation (ares) +# Role: XFCE desktop workstation — where the player works. +# Distro: Debian 12 (bookworm) cloud image + +DOMAIN="sc-workstation" +HOSTNAME="ares" +RAM_MB=2048 +VCPUS=2 +DISK_SIZE="20G" +GRAPHICS="${SC_WORKSTATION_GRAPHICS:-spice}" +BASE_URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2" +BASE_IMAGE="$SC_BASE_DIR/debian-12-genericcloud-amd64.qcow2" +READY_TIMEOUT=1200 +READY_COMMAND='cloud-init status 2>/dev/null | grep -q "status: done" && ! uname -r | grep -q cloud && test -e /dev/dri/card0 && systemctl is-active --quiet lightdm' +READY_PROGRESS_COMMAND='cloud-init status --long; echo "---"; tail -n 12 /var/log/cloud-init-output.log' +READY_WATCH_TEMPLATE='ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o ConnectTimeout=5 -o LogLevel=ERROR -i ~/.ssh/sc_host_key opsbridge@{ADDR} "sudo tail -f /var/log/cloud-init-output.log"' + +# Extra variables used in user-data +WALLPAPER_PATH="${SC_WALLPAPER_PATH:-$PROJECT_ROOT/server/public/wallpaper.png}" +WALLPAPER_B64="" +if [ -f "$WALLPAPER_PATH" ]; then + WALLPAPER_B64="$(base64 -w 0 "$WALLPAPER_PATH")" +fi +WALLPAPER_B64_INDENT="$(printf '%s\n' "$WALLPAPER_B64" | fold -w 76 | sed 's/^/ /')" + +PRIVKEY_INDENT="$(sed 's/^/ /' "$SC_SSH_KEY")" + +source "$PROJECT_ROOT/tools/lib/internal-https.sh" +sc_ensure_internal_certs "$PROJECT_ROOT" +sc_export_internal_https_env + +SC_CERT_DIR="$(sc_cert_dir)" +HUD_URL="$(sc_hud_url)" +SAGE_URL="$(sc_sage_url)" +COMPANY_URL="$(sc_company_url)" +_SC_CA_CERT_PEM="" +_SC_SERVER_CERT_PEM="" +_SC_SERVER_KEY_PEM="" +if [[ -f "$(sc_ca_cert)" && -f "$(sc_tls_cert)" && -f "$(sc_tls_key)" ]]; then + _SC_CA_CERT_PEM="$(cat "$SC_CERT_DIR/ca.crt")" + _SC_SERVER_CERT_PEM="$(cat "$SC_CERT_DIR/server.crt")" + _SC_SERVER_KEY_PEM="$(cat "$SC_CERT_DIR/server.key")" +fi +_SC_CA_CERT_INDENT="$(printf '%s\n' "$_SC_CA_CERT_PEM" | sed 's/^/ /')" +_SC_SERVER_CERT_INDENT="$(printf '%s\n' "$_SC_SERVER_CERT_PEM" | sed 's/^/ /')" +_SC_SERVER_KEY_INDENT="$(printf '%s\n' "$_SC_SERVER_KEY_PEM" | sed 's/^/ /')" +_SC_CA_CERT_JSON="$(printf '%s' "$_SC_CA_CERT_PEM" | tr '\n' '|' | sed 's/|/\\n/g')" + +PLAYER_SSH_CONFIG="$(cat <<'EOF' +Host hermes + HostName 10.42.0.40 + User player + IdentityFile ~/.ssh/sc_host_key + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + BatchMode yes + ConnectTimeout 5 + LogLevel ERROR + +Host vulcan + HostName 10.42.0.24 + User player + IdentityFile ~/.ssh/sc_host_key + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + BatchMode yes + ConnectTimeout 5 + LogLevel ERROR + +Host 10.42.0.* + User player + IdentityFile ~/.ssh/sc_host_key + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + BatchMode yes + ConnectTimeout 5 + LogLevel ERROR +EOF +)" +PLAYER_SSH_CONFIG_INDENT="$(printf '%s\n' "$PLAYER_SSH_CONFIG" | sed 's/^/ /')" + +_nginx_config() { + printf '%s\n' \ + ' server {' \ + ' listen 443 ssl;' \ + ' server_name axiomworks.corp www.axiomworks.corp;' \ + ' ssl_certificate /etc/nginx/certs/server.crt;' \ + ' ssl_certificate_key /etc/nginx/certs/server.key;' \ + ' location / {' \ + " proxy_pass https://${GAME_HOST_IP}:3000/company/;" \ + ' proxy_ssl_verify off;' \ + ' proxy_set_header Host $host;' \ + ' proxy_set_header X-Real-IP $remote_addr;' \ + ' }' \ + ' }' \ + ' server {' \ + ' listen 80;' \ + ' server_name axiomworks.corp www.axiomworks.corp;' \ + ' return 301 https://$host$request_uri;' \ + ' }' +} + +_cert_write_files() { + if [[ -z "$_SC_CA_CERT_PEM" ]]; then return; fi + printf '%s\n' ' - path: /usr/local/share/ca-certificates/axiomworks-ca.crt' + printf '%s\n' ' owner: root:root' + printf '%s\n' " permissions: '0644'" + printf '%s\n' ' content: |' + printf '%s\n' "$_SC_CA_CERT_INDENT" + printf '%s\n' ' - path: /etc/nginx/certs/server.crt' + printf '%s\n' ' owner: root:root' + printf '%s\n' " permissions: '0644'" + printf '%s\n' ' content: |' + printf '%s\n' "$_SC_SERVER_CERT_INDENT" + printf '%s\n' ' - path: /etc/nginx/certs/server.key' + printf '%s\n' ' owner: root:root' + printf '%s\n' " permissions: '0600'" + printf '%s\n' ' content: |' + printf '%s\n' "$_SC_SERVER_KEY_INDENT" + printf '%s\n' ' - path: /etc/chromium/policies/managed/axiomworks-ca.json' + printf '%s\n' ' owner: root:root' + printf '%s\n' " permissions: '0644'" + printf '%s\n' ' content: |' + printf '%s\n' ' {' + printf '%s\n' ' "AdditionalTrustAnchors": [' + printf '%s\n' " \"$_SC_CA_CERT_JSON\"" + printf '%s\n' ' ]' + printf '%s\n' ' }' +} + +generate_user_data() { +cat </dev/null 2>&1; do sleep 2; done + exec chromium --no-first-run --no-default-browser-check --new-window "${HUD_URL}" + - path: /usr/local/share/axiomworks-wallpaper.png + owner: root:root + permissions: '0644' + encoding: b64 + content: | +${WALLPAPER_B64_INDENT} + - path: /usr/share/backgrounds/wallpaper.png + owner: root:root + permissions: '0644' + encoding: b64 + content: | +${WALLPAPER_B64_INDENT} + - path: /home/player/Desktop/Portal.desktop + owner: root:root + permissions: '0755' + content: | + [Desktop Entry] + Type=Application + Name=Axiom Works Portal + Exec=chromium --no-first-run --no-default-browser-check --new-window ${HUD_URL} + Icon=chromium + Terminal=false + - path: /home/player/Desktop/Terminal.desktop + owner: root:root + permissions: '0755' + content: | + [Desktop Entry] + Type=Application + Name=Terminal + Exec=tilix + Icon=utilities-terminal + Terminal=false + Path=/home/player + - path: /usr/local/bin/trust-desktop-launchers + owner: root:root + permissions: '0755' + content: | + #!/bin/bash + # Trust every player desktop launcher from the real player login session. + set -u + PATH=/usr/local/bin:/usr/bin:/bin + player_uid="\$(id -u player)" + desktop_dir=/home/player/Desktop + export HOME=/home/player + export USER=player + export LOGNAME=player + export DISPLAY="\${DISPLAY:-:0}" + export XAUTHORITY="\${XAUTHORITY:-/home/player/.Xauthority}" + export XDG_RUNTIME_DIR="/run/user/\$player_uid" + if [ -S "\$XDG_RUNTIME_DIR/bus" ]; then + export DBUS_SESSION_BUS_ADDRESS="unix:path=\$XDG_RUNTIME_DIR/bus" + fi + + metadata_daemon="" + for candidate in /usr/libexec/gvfsd-metadata /usr/lib/gvfs/gvfsd-metadata /usr/lib/x86_64-linux-gnu/gvfs/gvfsd-metadata; do + if [ -x "\$candidate" ]; then + metadata_daemon="\$candidate" + break + fi + done + if [ -n "\$metadata_daemon" ] && ! /usr/bin/pgrep -u "\$player_uid" -x gvfsd-metadata >/dev/null 2>&1; then + "\$metadata_daemon" >/dev/null 2>&1 & + sleep 1 + fi + + for i in \$(/usr/bin/seq 1 20); do + trusted_any=false + failed=false + for launcher in "\$desktop_dir"/*.desktop; do + [ -e "\$launcher" ] || continue + chmod 0755 "\$launcher" 2>/dev/null || true + checksum="\$(/usr/bin/sha256sum "\$launcher" | /usr/bin/awk '{print \$1}')" || { + failed=true + continue + } + if /usr/bin/gio set -t string "\$launcher" metadata::xfce-exe-checksum "\$checksum" 2>/dev/null; then + actual_checksum="\$(/usr/bin/gio info -a metadata::xfce-exe-checksum "\$launcher" 2>/dev/null | /usr/bin/awk -F': ' '/metadata::xfce-exe-checksum:/ {print \$2; exit}')" + owner_mode="\$(/usr/bin/stat -c '%U:%G %a' "\$launcher" 2>/dev/null || true)" + if [ "\$actual_checksum" != "\$checksum" ] || [ "\$owner_mode" != "player:player 755" ]; then + failed=true + continue + fi + trusted_any=true + else + failed=true + fi + done + if [ "\$trusted_any" = true ] && [ "\$failed" = false ]; then + /usr/bin/xfdesktop --reload >/dev/null 2>&1 || /usr/bin/pkill -HUP xfdesktop 2>/dev/null || true + rm -f /home/player/.config/autostart/trust-launchers.desktop + exit 0 + fi + sleep 1 + done + # gvfsd not ready — will retry next login + exit 1 + - path: /home/player/.local/bin/trust-desktop-launchers.sh + owner: root:root + permissions: '0755' + content: | + #!/bin/bash + exec /usr/local/bin/trust-desktop-launchers + - path: /home/player/.config/autostart/trust-launchers.desktop + owner: root:root + permissions: '0644' + content: | + [Desktop Entry] + Type=Application + Name=Trust Desktop Launchers + Exec=/usr/local/bin/trust-desktop-launchers + Terminal=false + X-GNOME-Autostart-enabled=true + Hidden=false + NoDisplay=true + - path: /home/player/Desktop/VIEWER_HELP.txt + owner: root:root + permissions: '0644' + content: | + Workstation Viewer — Quick Reference + ===================================== + Toggle fullscreen: F11 + Release mouse/kb: Shift+F12 (or Ctrl+Alt on some builds) + Scale display: View → Zoom (or Ctrl+scroll) + Copy from guest: Select text, then right-click → Copy + Paste to guest: Right-click input field → Paste + Switch USB redirect: Input → USB Device Redirection + - path: /home/player/.config/xfce4/desktop/icons.screen0-1264x757.rc + owner: root:root + permissions: '0644' + content: | + [xfdesktop-version-4.10.3+-rcfile_format] + 4.10.3+=true + + [/home/player/Desktop/VIEWER_HELP.txt] + row=6 + col=0 + + [/home/player/Desktop/Terminal.desktop] + row=0 + col=6 + + [/home/player/Desktop/Portal.desktop] + row=0 + col=7 + + [Trash] + row=6 + col=11 + + [/] + row=0 + col=4 + + [/home/player] + row=0 + col=5 + - path: /home/player/.config/xfce4/desktop/icons.screen.latest.rc + owner: root:root + permissions: '0644' + content: | + [xfdesktop-version-4.10.3+-rcfile_format] + 4.10.3+=true + + [/home/player/Desktop/VIEWER_HELP.txt] + row=6 + col=0 + + [/home/player/Desktop/Terminal.desktop] + row=0 + col=6 + + [/home/player/Desktop/Portal.desktop] + row=0 + col=7 + + [Trash] + row=6 + col=11 + + [/] + row=0 + col=4 + + [/home/player] + row=0 + col=5 + - path: /home/player/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml + owner: root:root + permissions: '0644' + content: | + + + + + + + - path: /home/player/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml + owner: root:root + permissions: '0644' + content: | + + + + + + + + + + - path: /home/player/.ssh/sc_host_key + owner: root:root + permissions: '0600' + content: | +${PRIVKEY_INDENT} + - path: /home/player/.ssh/config + owner: root:root + permissions: '0600' + content: | +${PLAYER_SSH_CONFIG_INDENT} + - path: /home/player/.config/chromium/Default/Preferences + owner: root:root + permissions: '0644' + content: | + { + "bookmark_bar": { "show_on_all_tabs": true }, + "browser": { + "check_default_browser": false, + "show_home_button": false + }, + "background_mode": { "enabled": false }, + "signin": { "allowed": false }, + "metrics": { "reporting_enabled": false }, + "safebrowsing": { "enabled": false }, + "translate": { "enabled": false } + } + - path: /home/player/.config/chromium/First Run + owner: root:root + permissions: '0644' + content: '' + - path: /home/player/.config/xfce4/xfconf/xfce-perchannel-xml/xsettings.xml + owner: root:root + permissions: '0644' + content: | + + + + + + + + + + + - path: /home/player/.config/xfce4/helpers.rc + owner: root:root + permissions: '0644' + content: | + TerminalEmulator=tilix + - path: /home/player/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-desktop.xml + owner: root:root + permissions: '0644' + content: | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - path: /home/player/.bashrc + owner: root:root + permissions: '0644' + content: | + [ -z "\$PS1" ] && return + export TERM=xterm-256color + export EDITOR=nano + PS1='\[\e[0;32m\]\u@\h\[\e[0m\]:\[\e[0;34m\]\w\[\e[0m\]\$ ' + HISTSIZE=5000 + HISTFILESIZE=10000 + HISTCONTROL=ignoredups:erasedups + shopt -s histappend + alias ll='ls -lh --color=auto' + alias la='ls -lha --color=auto' + alias l='ls -CF --color=auto' + alias grep='grep --color=auto' + alias ..='cd ..' + alias ...='cd ../..' + export LS_COLORS='di=0;34:ln=0;36:ex=0;32:' + if [ -f /usr/share/bash-completion/bash_completion ]; then + . /usr/share/bash-completion/bash_completion + fi + - path: /etc/sysctl.d/99-sc-workstation.conf + owner: root:root + permissions: '0644' + content: | + vm.swappiness=10 + vm.vfs_cache_pressure=50 + vm.dirty_ratio=20 + vm.dirty_background_ratio=5 + net.ipv6.conf.all.disable_ipv6=1 + net.ipv6.conf.default.disable_ipv6=1 + - path: /etc/udev/rules.d/90-sysadmin-chronicles-hide-system-disk.rules + owner: root:root + permissions: '0644' + content: | + # Hide the internal VirtIO system disk from desktop file-manager device lists. + KERNEL=="vd[a-z]", ENV{UDISKS_IGNORE}="1" + KERNEL=="vd[a-z][0-9]*", ENV{UDISKS_IGNORE}="1" + - path: /etc/nginx/sites-available/axiomworks + owner: root:root + permissions: '0644' + content: | +$(_nginx_config) +$(_cert_write_files) +runcmd: + - mkdir -p /home/player/Desktop /home/player/projects /home/player/.ssh /home/player/.config/autostart /home/player/.config/xfce4/desktop /home/player/.config/xfce4/xfconf/xfce-perchannel-xml /home/player/.config/chromium/Default /home/opsbridge/.ssh /home/player/.local/bin + - chown -R player:player /home/player + - chown -R opsbridge:opsbridge /home/opsbridge + - passwd -d player + - chmod 700 /home/player/.ssh + - chmod 700 /home/opsbridge/.ssh + - touch /home/player/.ssh/authorized_keys + - touch /home/opsbridge/.ssh/authorized_keys + - chown player:player /home/player/.ssh/authorized_keys + - chown opsbridge:opsbridge /home/opsbridge/.ssh/authorized_keys + - chmod 600 /home/player/.ssh/authorized_keys + - chmod 600 /home/opsbridge/.ssh/authorized_keys + - printf '%s\n' 'Axiom Works workstation ready.' > /home/player/notes.txt + - chown player:player /home/player/notes.txt + - mkdir -p /var/lib/lightdm/data + - chown lightdm:lightdm /var/lib/lightdm/data || chown 108:114 /var/lib/lightdm/data || true + - test -f /swapfile || fallocate -l 1G /swapfile + - chmod 600 /swapfile + - mkswap -f /swapfile + - swapon /swapfile || true + - grep -q '^/swapfile ' /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab + - ln -sf /etc/nginx/sites-available/axiomworks /etc/nginx/sites-enabled/axiomworks + - mkdir -p /etc/nginx/certs + - test -f /usr/local/share/ca-certificates/axiomworks-ca.crt && update-ca-certificates || true + - mkdir -p /etc/chromium/policies/managed + - | + if [ -f /usr/local/share/ca-certificates/axiomworks-ca.crt ]; then + mkdir -p /home/player/.pki/nssdb + certutil -d sql:/home/player/.pki/nssdb -N --empty-password 2>/dev/null || true + certutil -d sql:/home/player/.pki/nssdb -A -t "CT,," -n "Axiom Works CA" -i /usr/local/share/ca-certificates/axiomworks-ca.crt 2>/dev/null || true + chown -R player:player /home/player/.pki + fi + - rm -f /etc/nginx/sites-enabled/default + - systemctl enable --now nginx + - systemctl enable --now qemu-guest-agent ssh spice-vdagent + - systemctl enable lightdm + - systemctl set-default graphical.target + - DEBIAN_FRONTEND=noninteractive apt-get purge -y plymouth plymouth-label || true + - DEBIAN_FRONTEND=noninteractive apt-get install -y linux-image-amd64 + - cloud_kernels="\$(dpkg-query -W -f='\${Package}\\n' 'linux-image-*-cloud-amd64' 2>/dev/null | tr '\\n' ' ')"; if [ -n "\$cloud_kernels" ]; then DEBIAN_FRONTEND=noninteractive apt-get purge -y linux-image-cloud-amd64 \$cloud_kernels; fi + - update-grub || true + - update-alternatives --set x-www-browser /usr/bin/chromium || true + - update-alternatives --set x-terminal-emulator /usr/bin/tilix || true + - sysctl -p /etc/sysctl.d/99-sc-workstation.conf + - udevadm control --reload-rules || true + - udevadm trigger --subsystem-match=block || true + - systemctl enable --now avahi-daemon + - "sed -i 's/^hosts:.*/hosts: files mdns4_minimal [NOTFOUND=return] dns/' /etc/nsswitch.conf" + - systemctl disable --now unattended-upgrades || true + - systemctl disable --now apt-daily.timer apt-daily-upgrade.timer || true + - systemctl disable --now ModemManager || true + - systemctl mask sleep.target suspend.target hibernate.target hybrid-sleep.target + - rm -f /home/player/.config/autostart/game-hud.desktop + - rm -f /home/player/.Xauthority /home/player/.ICEauthority + - find /home/player/Desktop -maxdepth 1 -type f -name '*.desktop' -exec chmod 0755 {} + + - chown -R player:player /home/player +power_state: + mode: reboot + timeout: 30 + condition: true +final_message: "Ares XFCE workstation is ready." +EOF +} diff --git a/tools/vm/quest-prep/Q001-prep.sh b/tools/vm/quest-prep/Q001-prep.sh new file mode 100644 index 0000000..4fffe9c --- /dev/null +++ b/tools/vm/quest-prep/Q001-prep.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Q001-prep.sh — Workstation baseline: SSH key missing +# +# Prepares the workstation VM for Q001 "Welcome Aboard". +# The player's SSH key was never added during provisioning. +# +# What this does: +# - Ensures the player account exists +# - Removes /home/player/.ssh/authorized_keys (key not provisioned) +# - Leaves /var/log/auth.log with a "Permission denied (publickey)" entry +# +# Idempotent: safe to run multiple times. +# AGENT RULES: Never run against a live player session. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-workstation}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +SC_SSH_KEY="${SC_SSH_KEY:-${HOME}/.ssh/sc_host_key}" +SSH_USER="${SSH_USER:-opsbridge}" +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o ConnectTimeout=10 -o LogLevel=ERROR -i $SC_SSH_KEY" + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.*> /var/log/auth.log; rm -f /home/player/.ssh/authorized_keys; echo Q001-prep: authorized_keys removed'" + +echo "Q001-prep: Done." diff --git a/tools/vm/quest-prep/Q002-prep.sh b/tools/vm/quest-prep/Q002-prep.sh new file mode 100644 index 0000000..3787c35 --- /dev/null +++ b/tools/vm/quest-prep/Q002-prep.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Q002-prep.sh — hermes baseline: nginx config syntax error +# +# Prepares sc-web-server for Q002 "Syntax Error in Aisle Four". +# Introduces a deliberate nginx config syntax error that breaks the service. +# +# What this does: +# - Installs nginx if not present +# - Writes a broken /etc/nginx/sites-enabled/axiomworks.conf +# (missing semicolon on the server_name line) +# - Stops nginx so the player finds it down +# - Adds error log evidence +# +# Idempotent: safe to run multiple times. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-web-server}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.* /etc/nginx/sites-enabled/axiomworks.conf <<'NGINX_CONF' +server { + listen 80; + server_name axiomworks.internal # <-- MISSING SEMICOLON: this is the bug + root /var/www/axiomworks; + index index.html; + + location / { + try_files \$uri \$uri/ =404; + } +} +NGINX_CONF" + +# Disable the default site to make this the only relevant config +run_in_vm "rm -f /etc/nginx/sites-enabled/default" + +# Stop nginx (it fails to start with bad config) +run_in_vm "systemctl stop nginx || true" + +# Populate nginx error log with the kind of evidence a player would find +run_in_vm "mkdir -p /var/log/nginx && echo '[emerg] unexpected \";\" in /etc/nginx/sites-enabled/axiomworks.conf:3' >> /var/log/nginx/error.log" + +# Create the web root (nginx would serve from here if config were valid) +run_in_vm "mkdir -p /var/www/axiomworks && echo '

Axiom Works

' > /var/www/axiomworks/index.html" + +echo "Q002-prep: Done. nginx is stopped with broken config on $DOMAIN." diff --git a/tools/vm/quest-prep/Q003-prep.sh b/tools/vm/quest-prep/Q003-prep.sh new file mode 100644 index 0000000..95dc2f1 --- /dev/null +++ b/tools/vm/quest-prep/Q003-prep.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Q003-prep.sh — hermes baseline: logrotate missing, nginx access log ballooning +# +# Prepares sc-web-server for Q003 "The Log That Ate the Disk". +# Assumes Q002 is already resolved (nginx is running, config is clean). +# +# What this does: +# - Removes /etc/logrotate.d/nginx (log rotation not configured) +# - Grows /var/log/nginx/access.log to ~80% disk pressure +# - Disk usage should read >85% on /var so player sees the pressure +# +# Idempotent: safe to run multiple times. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-web-server}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.*> /var/log/nginx/access.log" + +echo "Q003-prep: Done. /var/log/nginx/access.log inflated on $DOMAIN." +echo " Check disk pressure with: df -h (on the VM)" diff --git a/tools/vm/quest-prep/Q004-prep.sh b/tools/vm/quest-prep/Q004-prep.sh new file mode 100644 index 0000000..9a5cd8e --- /dev/null +++ b/tools/vm/quest-prep/Q004-prep.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Q004-prep.sh — hermes baseline: web root owned by root, deploy script in place +# +# Prepares sc-web-server for Q004 "Not My Files". +# A bad deploy re-ran as root and chowned the web root to root. +# The deploy script itself is in /opt/deploy/deploy.sh. +# +# What this does: +# - Chowns /var/www/axiomworks and all contents to root:root +# - Places a deploy script at /opt/deploy/deploy.sh (chowned player:player) +# - Ensures nginx is running (deploy will fail but nginx serves stale content) +# +# Idempotent: safe to run multiple times. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-web-server}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.* /opt/deploy/deploy.sh <<'DEPLOY_SCRIPT' +#!/usr/bin/env bash +# deploy.sh — Axiom Works web deploy +# Copies build artifacts to /var/www/axiomworks/ +set -e +SRC=\"\${1:-/home/player/build/dist}\" +rsync -av \"\$SRC/\" /var/www/axiomworks/ +echo 'Deploy complete.' +DEPLOY_SCRIPT" +run_in_vm "chown player:player /opt/deploy/deploy.sh && chmod 755 /opt/deploy/deploy.sh" + +# Ensure nginx is running (serves stale content with root-owned files) +run_in_vm "systemctl start nginx || true" + +echo "Q004-prep: Done. /var/www/axiomworks is owned by root on $DOMAIN." +echo " Player must: sudo chown -R player:player /var/www/axiomworks" diff --git a/tools/vm/quest-prep/Q006-post-clean.sh b/tools/vm/quest-prep/Q006-post-clean.sh new file mode 100644 index 0000000..ba5ff6c --- /dev/null +++ b/tools/vm/quest-prep/Q006-post-clean.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Q006-post-clean.sh — vulcan clean branch state after Q006 +# +# Applies the authored clean outcome of Q006 so seed-vms.sh can materialize +# baseline.post-q006 for later quests. +# +# What this does: +# - Enables and starts systemd-timesyncd +# - Verifies archlinux-keyring is installed +# - Replaces pacman.log failure evidence with a healthy update trail +# +# Idempotent: safe to run multiple times. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-build-machine}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.*/dev/null" +run_in_vm "timedatectl set-ntp true || true" +run_in_vm "systemctl enable --now systemd-timesyncd" + +run_in_vm "cat > /var/log/pacman.log <<'PACMAN_LOG' +[2026-04-23T09:02:14-0400] [PACMAN] synchronizing package lists +[2026-04-23T09:02:19-0400] [ALPM] transaction started +[2026-04-23T09:02:19-0400] [ALPM] upgraded archlinux-keyring (20260401-1 -> 20260420-1) +[2026-04-23T09:02:20-0400] [ALPM] transaction completed +PACMAN_LOG" + +run_in_vm "cat > /var/log/axiomworks/time-drift.note <<'NOTE' +Time sync restored. +systemd-timesyncd is enabled and active. +archlinux-keyring is present and package operations are healthy. +NOTE" + +echo "Q006-post-clean: Done. systemd-timesyncd is active and baseline.post-q006 is ready on $DOMAIN." diff --git a/tools/vm/quest-prep/Q006-prep.sh b/tools/vm/quest-prep/Q006-prep.sh new file mode 100644 index 0000000..4e0c67a --- /dev/null +++ b/tools/vm/quest-prep/Q006-prep.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Q006-prep.sh — vulcan baseline: time sync disabled, pacman signature errors logged +# +# Prepares sc-build-machine for Q006 "Time Is A Flat Circle". +# The machine clock is drifting because time sync was disabled, which surfaces +# as pacman signature verification failures. +# +# What this does: +# - Disables and stops common NTP services +# - Seeds pacman.log with realistic signature failure evidence +# - Leaves a small operator note pointing at time drift symptoms +# +# Idempotent: safe to run multiple times. + +set -euo pipefail + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +DOMAIN="${1:-sc-build-machine}" +DRY_RUN=false +[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true + +get_vm_ip() { + local domain="$1" + local addr="" + addr="$(virsh domifaddr "$domain" --source agent 2>/dev/null | awk '/ipv4/ {print $4}' | cut -d/ -f1 | grep -v '^127\.' | head -n1 || true)" + if [ -n "$addr" ]; then + printf '%s\n' "$addr" + return 0 + fi + local mac="" + mac="$(virsh dumpxml "$domain" 2>/dev/null | sed -n "s/.*/dev/null || true" +run_in_vm "systemctl disable systemd-timesyncd ntpd chronyd 2>/dev/null || true" +run_in_vm "mkdir -p /var/log/axiomworks /srv/repo /srv/builds" + +run_in_vm "cat > /var/log/pacman.log <<'PACMAN_LOG' +[2026-04-23T08:10:51-0400] [PACMAN] synchronizing package lists +[2026-04-23T08:10:57-0400] [ALPM] transaction started +[2026-04-23T08:10:58-0400] [ALPM] warning: Public keyring not found; have you run 'pacman-key --init'? +[2026-04-23T08:10:58-0400] [ALPM] error: archlinux-keyring: signature from \"Arch Linux Master Key\" is invalid +[2026-04-23T08:10:58-0400] [ALPM] error: failed to commit transaction (invalid or corrupted package (PGP signature)) +[2026-04-23T08:10:58-0400] [ALPM] transaction failed +PACMAN_LOG" + +run_in_vm "cat > /var/log/axiomworks/time-drift.note <<'NOTE' +Builds started failing after the machine clock fell behind. +Symptoms: + - pacman reports invalid or corrupted package (PGP signature) + - signed packages appear to come from the future + - timedatectl shows NTP inactive +NOTE" + +echo "Q006-prep: Done. NTP is disabled and pacman signature failures are seeded on $DOMAIN." diff --git a/tools/vm/rebuild-vms.sh b/tools/vm/rebuild-vms.sh new file mode 100755 index 0000000..96a7821 --- /dev/null +++ b/tools/vm/rebuild-vms.sh @@ -0,0 +1,311 @@ +#!/usr/bin/env bash +# Rebuild or revert game virtual machines. +# +# Usage: +# rebuild-vms.sh Interactive menu +# rebuild-vms.sh --vm workstation Rebuild a single VM (interactive) +# rebuild-vms.sh --revert Revert all VMs to baseline snapshot +# rebuild-vms.sh --revert --vm workstation +# rebuild-vms.sh --snapshot --vm workstation --name before-risky-thing +# rebuild-vms.sh --snapshot --all --name pre-shift-4 +# rebuild-vms.sh --revert --name before-risky-thing --vm workstation +# rebuild-vms.sh --dry-run [other flags] + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +source "$PROJECT_ROOT/tools/lib/ui.sh" +source "$PROJECT_ROOT/tools/lib/config.sh" +source "$PROJECT_ROOT/tools/lib/libvirt.sh" +source "$PROJECT_ROOT/tools/lib/vm.sh" + +config_read || true + +_normalize_dir_path() { + local path="${1:-}" + while [[ "$path" == *//* ]]; do + path="${path//\/\//\/}" + done + while [ "$path" != "/" ] && [ "${path%/}" != "$path" ]; do + path="${path%/}" + done + printf '%s\n' "$path" +} + +if [ -n "${SC_GAME_DIR:-}" ]; then + normalized_game_dir="$(_normalize_dir_path "$SC_GAME_DIR")" + if [ "$normalized_game_dir" != "$SC_GAME_DIR" ]; then + SC_GAME_DIR="$normalized_game_dir" + config_write SC_GAME_DIR "$SC_GAME_DIR" + fi +fi + +if [ -n "${SC_IMAGES_DIR:-}" ]; then + normalized_images_dir="$(_normalize_dir_path "$SC_IMAGES_DIR")" +elif [ -n "${SC_GAME_DIR:-}" ]; then + normalized_images_dir="$SC_GAME_DIR/images" +else + normalized_images_dir="" +fi + +if [ -n "$normalized_images_dir" ]; then + if [ "${SC_IMAGES_DIR:-}" != "$normalized_images_dir" ]; then + SC_IMAGES_DIR="$normalized_images_dir" + config_write SC_IMAGES_DIR "$SC_IMAGES_DIR" + fi + export SC_IMAGE_ROOT="$SC_IMAGES_DIR" +fi + +export LIBVIRT_DEFAULT_URI="${SC_LIBVIRT_URI:-${LIBVIRT_DEFAULT_URI:-qemu:///system}}" +export SC_POOL_NAME="${SC_POOL_NAME:-sc-images}" +export SC_NETWORK_NAME="${SC_NETWORK_NAME:-sc-internal}" + +# VM display names +declare -A VM_LABEL=( + [sc-workstation]="workstation" + [sc-web-server]="web server" + [sc-build-machine]="build server" +) +declare -A VM_PROFILE=( + [sc-workstation]=workstation + [sc-web-server]=web-server + [sc-build-machine]=build-machine +) +ALL_VMS=(sc-workstation sc-web-server sc-build-machine) + +DRY_RUN=false +MODE="" +SINGLE_VM="" +SNAP_NAME="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=true; shift ;; + --revert) MODE=revert; shift ;; + --snapshot) MODE=snapshot; shift ;; + --vm) SINGLE_VM="sc-$2"; shift 2 ;; + --name) SNAP_NAME="$2"; shift 2 ;; + --all) SINGLE_VM=""; shift ;; + *) echo "Unknown argument: $1"; exit 1 ;; + esac +done + +run() { + if [ "$DRY_RUN" = true ]; then + echo " [dry-run] $*" + else + "$@" + fi +} + +# Revert one VM to its newest baseline snapshot; prints result. +_revert_to_baseline() { + local vm="$1" + local label="${VM_LABEL[$vm]:-$vm}" + local candidate local_snap="" + for candidate in "baseline.recovery" "baseline.day-one" "baseline.clean"; do + if snapshot_exists "$vm" "$candidate" 2>/dev/null; then + local_snap="$candidate" + break + fi + done + if [ -n "$local_snap" ]; then + sc_info "Reverting $label to $local_snap..." + run snapshot_revert "$vm" "$local_snap" + sc_ok "$label reverted to $local_snap" + else + sc_warn "No baseline snapshot found for $vm — skipping" + fi +} + +_target_vms() { + if [ -n "$SINGLE_VM" ]; then + echo "$SINGLE_VM" + else + printf '%s\n' "${ALL_VMS[@]}" + fi +} + +# --------------------------------------------------------------------------- +# Non-interactive flag modes +# --------------------------------------------------------------------------- + +if [ "$MODE" = "revert" ] && [ -n "$SNAP_NAME" ]; then + sc_header "REVERTING TO SNAPSHOT: $SNAP_NAME" + while IFS= read -r vm; do + label="${VM_LABEL[$vm]:-$vm}" + sc_info "Reverting $label..." + if snapshot_exists "$vm" "$SNAP_NAME"; then + run snapshot_revert "$vm" "$SNAP_NAME" + sc_ok "$label reverted to $SNAP_NAME" + else + sc_warn "Snapshot '$SNAP_NAME' not found on $vm — skipping" + fi + done < <(_target_vms) + exit 0 +fi + +if [ "$MODE" = "snapshot" ]; then + [ -n "$SNAP_NAME" ] || { echo " --snapshot requires --name"; exit 1; } + while IFS= read -r vm; do + label="${VM_LABEL[$vm]:-$vm}" + sc_info "Snapshotting $label as '$SNAP_NAME'..." + run vm_snapshot_create "$vm" "$SNAP_NAME" + sc_ok "$label → $SNAP_NAME" + done < <(_target_vms) + exit 0 +fi + +if [ "$MODE" = "revert" ]; then + sc_header "REVERTING TO BASELINE" + while IFS= read -r vm; do + _revert_to_baseline "$vm" + done < <(_target_vms) + exit 0 +fi + +# --------------------------------------------------------------------------- +# Interactive menu +# --------------------------------------------------------------------------- + +sc_header "SYSADMIN CHRONICLES — VM TOOLS" + +while true; do + echo " What would you like to do?" + echo "" + echo " 1) Revert all VMs to last known good (fast — ~30s)" + echo " 2) Rebuild workstation (~8 min)" + echo " 3) Rebuild web server (~4 min)" + echo " 4) Rebuild build server (~5 min)" + echo " 5) Rebuild everything (~20 min)" + echo " 6) Take a snapshot" + echo " 7) Revert to a named snapshot" + echo "" + echo " q) Cancel" + echo "" + printf " > " >/dev/tty + read -r choice "$logfile" 2>&1; then + elapsed=$(( $(date +%s) - start_ts )) + printf "✓ %dm %02ds\n" $(( elapsed / 60 )) $(( elapsed % 60 )) + else + printf "✗\n" + sc_warn "Rebuild failed — see $logfile" + overall_status=1 + continue + fi + # Re-run quest prep and re-snapshot + sc_info "Re-running quest prep for $vm..." + if run bash "$PROJECT_ROOT/tools/setup/seed-vms.sh" --skip-build --vm "${profile//-/_}" \ + >> "$logfile" 2>&1; then + sc_ok "$label rebuild complete" + else + sc_warn "Quest prep had errors — see $logfile" + overall_status=1 + fi + done + [ "$overall_status" -eq 0 ] || exit "$overall_status" + break + ;; + 6) + echo " Take a snapshot" + echo "" + echo " Which VM?" + for i in "${!ALL_VMS[@]}"; do + vm="${ALL_VMS[$i]}" + printf " %d) %s\n" $(( i + 1 )) "${VM_LABEL[$vm]:-$vm}" + done + printf " > " >/dev/tty + read -r vm_choice /dev/tty + read -r snap_name " >/dev/tty + read -r vm_choice /dev/null | grep -v '^$' | sed 's/^/ /' || true + echo "" + printf " Snapshot name to revert to: " >/dev/tty + read -r snap_name /dev/null | grep 'Creation Time' | awk '{print $3, $4}' || echo "")" + [ -n "$snap_date" ] && sc_info "Snapshot date: $snap_date" + if sc_confirm "Revert ${VM_LABEL[$vm]:-$vm} to '$snap_name'?" "N"; then + run vm_snapshot_revert "$vm" "$snap_name" \ + && sc_ok "Reverted to $snap_name" \ + || sc_warn "Revert failed." + fi + break + ;; + *) + sc_warn "Invalid choice — enter 1–7 or q." + ;; + esac +done + +echo "" diff --git a/tools/vm/repair-workstation-launchers.sh b/tools/vm/repair-workstation-launchers.sh new file mode 100755 index 0000000..63a84d5 --- /dev/null +++ b/tools/vm/repair-workstation-launchers.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Repair trusted desktop launcher metadata in an existing sc-workstation VM. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib/common.sh" + +DOMAIN="${SC_WORKSTATION_DOMAIN:-sc-workstation}" + +tmp_script="$(mktemp)" +trap 'rm -f "$tmp_script"' EXIT + +cat > "$tmp_script" <<'GUESTEOF' +set -euo pipefail + +install -d -o player -g player /home/player/Desktop /home/player/.local/bin /home/player/.config/autostart +find /home/player/Desktop -maxdepth 1 -type f -name '*.desktop' -exec chown player:player {} + +find /home/player/Desktop -maxdepth 1 -type f -name '*.desktop' -exec chmod 0755 {} + +if [ -f /home/player/.config/chromium/Default/Bookmarks ]; then + sudo -u player sed -i 's#http://www\.axiomworks\.corp/#https://www.axiomworks.corp/#g' /home/player/.config/chromium/Default/Bookmarks +fi + +cat > /usr/local/bin/trust-desktop-launchers <<'SCRIPTEOF' +#!/bin/bash +set -u +PATH=/usr/local/bin:/usr/bin:/bin +player_uid="$(id -u player)" +desktop_dir=/home/player/Desktop +export HOME=/home/player +export USER=player +export LOGNAME=player +export DISPLAY="${DISPLAY:-:0}" +export XAUTHORITY="${XAUTHORITY:-/home/player/.Xauthority}" +export XDG_RUNTIME_DIR="/run/user/$player_uid" +if [ -S "$XDG_RUNTIME_DIR/bus" ]; then + export DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus" +fi + +metadata_daemon="" +for candidate in /usr/libexec/gvfsd-metadata /usr/lib/gvfs/gvfsd-metadata /usr/lib/x86_64-linux-gnu/gvfs/gvfsd-metadata; do + if [ -x "$candidate" ]; then + metadata_daemon="$candidate" + break + fi +done +if [ -n "$metadata_daemon" ] && ! /usr/bin/pgrep -u "$player_uid" -x gvfsd-metadata >/dev/null 2>&1; then + "$metadata_daemon" >/dev/null 2>&1 & + sleep 1 +fi + +for i in $(/usr/bin/seq 1 20); do + trusted_any=false + failed=false + for launcher in "$desktop_dir"/*.desktop; do + [ -e "$launcher" ] || continue + chmod 0755 "$launcher" 2>/dev/null || true + checksum="$(/usr/bin/sha256sum "$launcher" | /usr/bin/awk '{print $1}')" || { + failed=true + continue + } + if /usr/bin/gio set -t string "$launcher" metadata::xfce-exe-checksum "$checksum" 2>/dev/null; then + actual_checksum="$(/usr/bin/gio info -a metadata::xfce-exe-checksum "$launcher" 2>/dev/null | /usr/bin/awk -F': ' '/metadata::xfce-exe-checksum:/ {print $2; exit}')" + owner_mode="$(/usr/bin/stat -c '%U:%G %a' "$launcher" 2>/dev/null || true)" + if [ "$actual_checksum" != "$checksum" ] || [ "$owner_mode" != "player:player 755" ]; then + failed=true + continue + fi + trusted_any=true + else + failed=true + fi + done + if [ "$trusted_any" = true ] && [ "$failed" = false ]; then + /usr/bin/xfdesktop --reload >/dev/null 2>&1 || /usr/bin/pkill -HUP xfdesktop 2>/dev/null || true + rm -f /home/player/.config/autostart/trust-launchers.desktop + exit 0 + fi + sleep 1 +done +exit 1 +SCRIPTEOF +chmod 0755 /usr/local/bin/trust-desktop-launchers + +cat > /home/player/.local/bin/trust-desktop-launchers.sh <<'SCRIPTEOF' +#!/bin/bash +exec /usr/local/bin/trust-desktop-launchers +SCRIPTEOF +chown player:player /home/player/.local/bin/trust-desktop-launchers.sh +chmod 0755 /home/player/.local/bin/trust-desktop-launchers.sh + +cat > /home/player/.config/autostart/trust-launchers.desktop <<'DESKTOPEOF' +[Desktop Entry] +Type=Application +Name=Trust Desktop Launchers +Exec=/usr/local/bin/trust-desktop-launchers +Terminal=false +X-GNOME-Autostart-enabled=true +Hidden=false +NoDisplay=true +DESKTOPEOF +chown player:player /home/player/.config/autostart/trust-launchers.desktop +chmod 0644 /home/player/.config/autostart/trust-launchers.desktop + +if [ -S "/run/user/$(id -u player)/bus" ]; then + sudo -u player env HOME=/home/player /usr/local/bin/trust-desktop-launchers +else + echo "Player DBus session is not active; repair will retry on next graphical login." >&2 +fi +GUESTEOF + +guest_run_sudo_script "$DOMAIN" "$tmp_script" +ok "Desktop launcher repair applied to $DOMAIN" diff --git a/tools/vm/snapshot-all.sh b/tools/vm/snapshot-all.sh new file mode 100644 index 0000000..08d603b --- /dev/null +++ b/tools/vm/snapshot-all.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# snapshot-all.sh — Snapshot or revert all game VMs at once. +# +# Usage: +# bash tools/vm/snapshot-all.sh --snapshot Create named snapshot on all VMs +# bash tools/vm/snapshot-all.sh --revert-to Revert all VMs to named snapshot +# bash tools/vm/snapshot-all.sh --list List all snapshots per VM +# bash tools/vm/snapshot-all.sh --dry-run --revert-to ... Dry run (no state changes) +# +# SAFETY: +# - Only operates on sc- prefixed domains. +# - Always prints a summary before modifying state. +# - --revert-to requires explicit confirmation (skipped with --yes flag). +# - This script is for developer use only. It is NOT available in-game. +# +# AGENT RULES: +# - Never run --revert-to without explicit user instruction. +# - Never run against domains that don't start with sc-. + +set -euo pipefail + +VMS=("sc-workstation" "sc-web-server" "sc-build-machine") +DRY_RUN=false +ASSUME_YES=false +SNAPSHOT_NAME="" +REVERT_NAME="" +LIST_MODE=false + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --snapshot) SNAPSHOT_NAME="$2"; shift 2 ;; + --revert-to) REVERT_NAME="$2"; shift 2 ;; + --list) LIST_MODE=true; shift ;; + --dry-run) DRY_RUN=true; shift ;; + --yes) ASSUME_YES=true; shift ;; + *) echo "Unknown argument: $1"; exit 1 ;; + esac +done + +run() { + if [ "$DRY_RUN" = "true" ]; then + echo " [DRY-RUN] $*" + else + "$@" + fi +} + +guard_prefix() { + local dom="$1" + if [[ "$dom" != sc-* ]]; then + echo "SAFETY: refusing to operate on non-game domain: $dom" + exit 1 + fi +} + +# --------------------------------------------------------------------------- + +if [ "$LIST_MODE" = "true" ]; then + echo "" + echo "── Snapshots per VM ─────────────────────────────" + for dom in "${VMS[@]}"; do + echo "" + echo " $dom:" + if virsh dominfo "$dom" &>/dev/null 2>&1; then + virsh snapshot-list "$dom" --name 2>/dev/null | sed 's/^/ /' || echo " (none)" + else + echo " (domain does not exist)" + fi + done + echo "" + exit 0 +fi + +# --------------------------------------------------------------------------- +# SNAPSHOT +# --------------------------------------------------------------------------- + +if [ -n "$SNAPSHOT_NAME" ]; then + echo "" + echo "Creating snapshot '$SNAPSHOT_NAME' on all game VMs..." + [ "$DRY_RUN" = "true" ] && echo "[DRY-RUN mode]" + echo "" + for dom in "${VMS[@]}"; do + guard_prefix "$dom" + if virsh dominfo "$dom" &>/dev/null 2>&1; then + echo " Snapshotting $dom..." + run virsh snapshot-create-as "$dom" "$SNAPSHOT_NAME" \ + --description "Created by snapshot-all.sh" \ + --atomic + echo " ✓ $dom → $SNAPSHOT_NAME" + else + echo " ⚠ $dom not found — skipping" + fi + done + echo "" + echo "Done." + exit 0 +fi + +# --------------------------------------------------------------------------- +# REVERT +# --------------------------------------------------------------------------- + +if [ -n "$REVERT_NAME" ]; then + echo "" + echo "══════════════════════════════════════════════════" + echo " REVERT ALL VMs TO: $REVERT_NAME" + echo "══════════════════════════════════════════════════" + echo " VMs: ${VMS[*]}" + echo " This will DISCARD all unsaved VM state." + [ "$DRY_RUN" = "true" ] && echo " [DRY-RUN mode — no changes will be made]" + echo "" + + if [ "$ASSUME_YES" = "false" ] && [ "$DRY_RUN" = "false" ]; then + read -rp " Type YES to confirm revert: " confirm + if [ "$confirm" != "YES" ]; then + echo " Aborted." + exit 0 + fi + fi + + for dom in "${VMS[@]}"; do + guard_prefix "$dom" + if virsh dominfo "$dom" &>/dev/null 2>&1; then + echo " Reverting $dom..." + # Stop VM first if running + if virsh domstate "$dom" 2>/dev/null | grep -q "running"; then + run virsh destroy "$dom" + fi + run virsh snapshot-revert "$dom" "$REVERT_NAME" --running + echo " ✓ $dom → $REVERT_NAME" + else + echo " ⚠ $dom not found — skipping" + fi + done + echo "" + echo "Revert complete." + exit 0 +fi + +# No mode selected +echo "Usage:" +echo " bash snapshot-all.sh --snapshot " +echo " bash snapshot-all.sh --revert-to " +echo " bash snapshot-all.sh --list" +echo " Add --dry-run to preview without changes." +exit 1 diff --git a/tools/vm/suppress-maintenance-noise.sh b/tools/vm/suppress-maintenance-noise.sh new file mode 100644 index 0000000..3068456 --- /dev/null +++ b/tools/vm/suppress-maintenance-noise.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# suppress-maintenance-noise.sh — Reduce guest maintenance output noise. +# +# Suppresses on Debian/Ubuntu guests: +# - APT periodic background updates +# - MOTD dynamic scripts (package counts, landscape-sysinfo, news) +# - PAM motd modules (dynamic MOTD printed at login) +# - "X updates can be applied immediately" login banner +# +# Suppresses on Arch guests: +# - pkgfile update timer (if present) +# - quiet-mode marker for game to detect + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DRY_RUN=false +DOMAIN="${1:-}" + +if [ -z "$DOMAIN" ]; then + echo "Usage: bash tools/vm/suppress-maintenance-noise.sh [--dry-run]" + exit 1 +fi + +if [[ "${2:-}" == "--dry-run" ]]; then + DRY_RUN=true +fi + +source "$SCRIPT_DIR/lib/common.sh" +ensure_vm_tooling + +tmp_script="$(mktemp)" +cat > "$tmp_script" <<'EOF' +# --- Debian/Ubuntu --- +if command -v apt-get >/dev/null 2>&1; then + # Disable APT background periodic tasks + mkdir -p /etc/apt/apt.conf.d + cat > /etc/apt/apt.conf.d/99sysadmin-chronicles-quiet <<'APT' +APT::Periodic::Enable "0"; +APT::Periodic::Update-Package-Lists "0"; +APT::Periodic::Unattended-Upgrade "0"; +APT::Periodic::Download-Upgradeable-Packages "0"; +Acquire::Languages "none"; +APT + + # Disable dynamic MOTD scripts that show update counts, ads, news + if [ -d /etc/update-motd.d ]; then + chmod -x /etc/update-motd.d/* 2>/dev/null || true + # Preserve a minimal placeholder so PAM doesn't error + printf '#!/bin/sh\n' > /etc/update-motd.d/00-sysadmin-chronicles + chmod +x /etc/update-motd.d/00-sysadmin-chronicles + fi + + # Remove static /etc/motd content if present + if [ -f /etc/motd ]; then + printf '' > /etc/motd + fi + + # Disable PAM dynamic motd in sshd PAM config (suppresses update counts at login) + for pam_file in /etc/pam.d/sshd /etc/pam.d/login; do + if [ -f "$pam_file" ]; then + sed -i 's/^session\s\+optional\s\+pam_motd\.so/#&/' "$pam_file" + fi + done + + # Suppress "X updates can be applied" from landscape-sysinfo / update-notifier + if [ -f /etc/landscape/client.conf ]; then + sed -i '/sysinfo/d' /etc/landscape/client.conf 2>/dev/null || true + fi + # Disable landscape-sysinfo if installed + if command -v landscape-sysinfo >/dev/null 2>&1; then + if [ -f /etc/landscape/client.conf ]; then + grep -q 'include_sysinfo_plugins' /etc/landscape/client.conf || \ + printf '[sysinfo]\ninclude_sysinfo_plugins =\n' >> /etc/landscape/client.conf + else + mkdir -p /etc/landscape + printf '[sysinfo]\ninclude_sysinfo_plugins =\n' > /etc/landscape/client.conf + fi + fi + + # Disable update-notifier login hint (Debian/Ubuntu) + if [ -d /etc/profile.d ]; then + for f in /etc/profile.d/update-notifier.sh /etc/profile.d/motd-news.sh; do + [ -f "$f" ] && chmod -x "$f" 2>/dev/null || true + done + fi +fi + +# --- Arch Linux --- +if command -v pacman >/dev/null 2>&1; then + # Disable pkgfile update timer if present (produces periodic output) + if systemctl list-unit-files pkgfile-update.timer &>/dev/null; then + systemctl disable --now pkgfile-update.timer 2>/dev/null || true + fi + + # Quiet-mode marker for game to detect + mkdir -p /etc/sysadmin-chronicles + printf 'managed=true\n' > /etc/sysadmin-chronicles/quiet-mode.conf +fi +EOF + +info "Suppressing maintenance noise on ${DOMAIN}" +guest_run_sudo_script "$DOMAIN" "$tmp_script" +rm -f "$tmp_script" +ok "${DOMAIN}: maintenance noise suppressed" diff --git a/uninstall.sh b/uninstall.sh new file mode 100755 index 0000000..c5342d8 --- /dev/null +++ b/uninstall.sh @@ -0,0 +1,292 @@ +#!/usr/bin/env bash +# Sysadmin Chronicles — Uninstaller +# Usage: bash uninstall.sh [--dry-run] + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$SCRIPT_DIR" + +source "$PROJECT_ROOT/tools/lib/ui.sh" +source "$PROJECT_ROOT/tools/lib/config.sh" +source "$PROJECT_ROOT/tools/lib/libvirt.sh" + +DRY_RUN=false +for arg in "$@"; do + [ "$arg" = "--dry-run" ] && DRY_RUN=true +done + +run() { + if [ "$DRY_RUN" = true ]; then + echo " [dry-run] $*" + else + "$@" + fi +} + +OWNER_USER="${SUDO_USER:-$USER}" +OWNER_HOME="$(getent passwd "$OWNER_USER" | cut -d: -f6)" +OWNER_HOME="${OWNER_HOME:-$HOME}" +SC_LOG_DIR="$OWNER_HOME/.local/share/sysadmin-chronicles" + +export LIBVIRT_DEFAULT_URI="${LIBVIRT_DEFAULT_URI:-qemu:///system}" + +# Load config for SC_IMAGES_DIR +config_read || true + +sc_header "SYSADMIN CHRONICLES — UNINSTALL" +[ "$DRY_RUN" = true ] && echo " [dry-run mode — nothing will be changed]" && echo "" + +# --------------------------------------------------------------------------- +# Menu +# --------------------------------------------------------------------------- + +cat << 'MENU' + What would you like to remove? + + 1) Everything — full uninstall (recommended) + 2) Game world only — remove VMs and snapshots, keep save data and game files + 3) Save data only — reset all saves to new game state + 4) Custom — choose what to remove + + q) Cancel + +MENU + +printf " > " >/dev/tty +read -r choice /dev/null | awk '{print $1}' || echo 0)" + total=$(( total + sz )) + done < <(virsh domblklist "$dom" --details 2>/dev/null | awk '/disk/ && $4 != "-" {print $4}' || true) + done < <(virsh list --all --name 2>/dev/null | grep "^sc-" || true) + [ "$total" -gt 0 ] \ + && numfmt --to=iec-i --suffix=B "$total" 2>/dev/null || echo "unknown" +} + +_saves_size() { + local d="$SC_LOG_DIR/saves" + [ -d "$d" ] && du -sh "$d" 2>/dev/null | awk '{print $1}' || echo "<1 KB" +} + +_images_size() { + local d="${SC_IMAGES_DIR:-}" + [ -n "$d" ] && [ -d "$d" ] && du -sh "$d" 2>/dev/null | awk '{print $1}' || echo "unknown" +} + +# --------------------------------------------------------------------------- +# "Everything" confirmation +# --------------------------------------------------------------------------- + +if [ "$choice" = "1" ]; then + local_vm_sz="$(_vm_total_size)" + local_img_sz="$(_images_size)" + cat << EOF + This will remove: + + Game virtual machines (3 VMs + all snapshots) ~${local_vm_sz} + Game network and storage configuration <1 MB + Game access keys (~/.ssh/sc_host_key) <1 KB + Desktop launcher (if it exists) <1 KB + + System packages (libvirt, QEMU, etc.) are NOT removed. + ↑ See $SC_INSTALL_LOG if you want to remove them manually. + + Keep VM image files on disk? Keeping them saves the ~30-minute + rebuild if you reinstall later. [Y/n — default: keep] + +EOF + printf " Keep images? [Y/n] > " >/dev/tty + read -r _keep_images " >/dev/tty + read -r _confirm /dev/null | grep -q "running"; then + virsh destroy "$dom" >/dev/null 2>&1 || true + fi + while IFS= read -r snap; do + [ -z "$snap" ] && continue + virsh snapshot-delete "$dom" "$snap" >/dev/null 2>&1 || true + done < <(virsh snapshot-list "$dom" --name 2>/dev/null || true) + virsh undefine "$dom" --remove-all-storage >/dev/null 2>&1 \ + || virsh undefine "$dom" >/dev/null 2>&1 || true + else + echo " [dry-run] would destroy and undefine $dom" + fi + sc_ok "$dom removed" + done < <(virsh list --all --name 2>/dev/null | grep "^sc-" || true) +fi + +if [ "$remove_images" = true ]; then + sc_section "Removing VM image files" + local_images="${SC_IMAGES_DIR:-}" + if [ -n "$local_images" ] && [ -d "$local_images" ]; then + sc_info "Removing $local_images" + run rm -rf "$local_images" + sc_ok "Image directory removed" + else + sc_info "(no image directory found — skipping)" + fi +fi + +if [ "$remove_network" = true ]; then + sc_section "Removing game network and storage config" + while IFS= read -r net; do + [ -z "$net" ] && continue + sc_info "Removing network $net" + if [ "$DRY_RUN" = false ]; then + virsh net-destroy "$net" >/dev/null 2>&1 || true + virsh net-undefine "$net" >/dev/null 2>&1 || true + else + echo " [dry-run] would destroy/undefine network $net" + fi + sc_ok "Network $net removed" + done < <(virsh net-list --all --name 2>/dev/null | grep "^sc-" || true) + if virsh pool-list --all 2>/dev/null | grep -q "sc-images"; then + sc_info "Removing storage pool sc-images" + if [ "$DRY_RUN" = false ]; then + virsh pool-destroy sc-images >/dev/null 2>&1 || true + virsh pool-undefine sc-images >/dev/null 2>&1 || true + else + echo " [dry-run] would destroy/undefine pool sc-images" + fi + sc_ok "Storage pool sc-images removed" + fi +fi + +if [ "$remove_keys" = true ]; then + sc_section "Removing game SSH keys" + KEY="$OWNER_HOME/.ssh/sc_host_key" + if [ -f "$KEY" ]; then + run rm -f "$KEY" "${KEY}.pub" + sc_ok "SSH keys removed" + else + sc_info "(no sc_host_key found — skipping)" + fi +fi + +if [ "$remove_saves" = true ]; then + sc_section "Resetting save data" + SAVES_DIR="$SC_LOG_DIR/saves" + if [ -d "$SAVES_DIR" ]; then + run rm -rf "$SAVES_DIR" + sc_ok "Save data removed" + else + sc_info "(no save data found — skipping)" + fi +fi + +if [ "$remove_launcher" = true ]; then + sc_section "Removing desktop launcher" + DESKTOP_FILE="$OWNER_HOME/.local/share/applications/sysadmin-chronicles.desktop" + if [ -f "$DESKTOP_FILE" ]; then + run rm -f "$DESKTOP_FILE" + sc_ok "Desktop launcher removed" + else + sc_info "(no desktop launcher found — skipping)" + fi +fi + +if [ "$remove_config" = true ]; then + sc_section "Removing install config" + run rm -f "$SC_CONFIG_FILE" + sc_ok "Config file removed" +fi + +# --------------------------------------------------------------------------- + +echo "" +echo " ──────────────────────────────────────────" +echo " Uninstall complete." +echo "" +echo " Game files (this directory) were not removed." +if [ -f "$SC_INSTALL_LOG" ]; then + echo " Install log kept at: $SC_INSTALL_LOG" +fi +echo " ──────────────────────────────────────────" +echo "" diff --git a/vm/probes/collect-state.sh b/vm/probes/collect-state.sh new file mode 100644 index 0000000..29bcd90 --- /dev/null +++ b/vm/probes/collect-state.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# collect-state.sh — Collect high-level VM state summary. +# +# Usage: bash collect-state.sh +# Output: JSON to stdout +# +# This is an advisory probe — output informs the game but is never the +# sole basis for quest validation. Host-authoritative checks always win. +# +# AGENT RULES: +# - Read-only. Never modify VM state. +# - Always output valid JSON, even on error (use {"error": "..."}). +# - Complete within 15 seconds. + +set -euo pipefail + +DOMAIN="${1:-}" + +ssh_login_user() { + case "$1" in + sc-workstation) printf '%s\n' "opsbridge" ;; + *) printf '%s\n' "player" ;; + esac +} +LOGIN_USER="$(ssh_login_user "$DOMAIN")" + +if [ -z "$DOMAIN" ]; then + echo '{"error": "No domain specified"}' + exit 0 +fi + +# Get VM IP via guest agent +VM_IP=$(virsh domifaddr "$DOMAIN" --source agent 2>/dev/null \ + | grep ipv4 | awk '{print $4}' | cut -d/ -f1 | head -1) + +if [ -z "$VM_IP" ]; then + echo '{"error": "Cannot get VM IP — guest agent may not be running", "domain": "'"$DOMAIN"'"}' + exit 0 +fi + +SSH_OPTS="-o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 -i $HOME/.ssh/sc_host_key" + +# Collect state via SSH +DISK_USAGE=$(ssh $SSH_OPTS "$LOGIN_USER"@"$VM_IP" "df -P / | tail -1 | awk '{print \$5}'" 2>/dev/null | tr -d '%' || echo "-1") +UPTIME=$(ssh $SSH_OPTS "$LOGIN_USER"@"$VM_IP" "cat /proc/uptime | awk '{print int(\$1)}'" 2>/dev/null || echo "0") +SERVICES=$(ssh $SSH_OPTS "$LOGIN_USER"@"$VM_IP" "systemctl list-units --state=failed --no-legend --no-pager | wc -l" 2>/dev/null || echo "0") + +cat << JSON +{ + "domain": "$DOMAIN", + "ip": "$VM_IP", + "disk_usage_root_pct": $DISK_USAGE, + "uptime_seconds": $UPTIME, + "failed_service_count": $SERVICES, + "collected_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)" +} +JSON diff --git a/vm/probes/host-validate.sh b/vm/probes/host-validate.sh new file mode 100644 index 0000000..dcd4355 --- /dev/null +++ b/vm/probes/host-validate.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# host-validate.sh — Quest-specific host validation probe. +# +# Usage: bash host-validate.sh +# Output: JSON to stdout +# +# This is a supplementary probe. ValidationService uses SSH-based checks +# as the authoritative path; this probe provides higher-level summaries +# for quests that need them. +# +# AGENT RULES: +# - Read-only. Never modify VM state. +# - Output valid JSON always, even on error. +# - Quest-specific logic should be minimal — prefer generic rule evaluation. + +set -euo pipefail + +DOMAIN="${1:-}" +QUEST_ID="${2:-}" + +ssh_login_user() { + case "$1" in + sc-workstation) printf '%s\n' "opsbridge" ;; + *) printf '%s\n' "player" ;; + esac +} +LOGIN_USER="$(ssh_login_user "$DOMAIN")" + +if [ -z "$DOMAIN" ] || [ -z "$QUEST_ID" ]; then + echo '{"error": "Usage: host-validate.sh "}' + exit 0 +fi + +VM_IP=$(virsh domifaddr "$DOMAIN" --source agent 2>/dev/null \ + | grep ipv4 | awk '{print $4}' | cut -d/ -f1 | head -1) + +if [ -z "$VM_IP" ]; then + echo '{"error": "Cannot get VM IP", "domain": "'"$DOMAIN"'", "quest_id": "'"$QUEST_ID"'"}' + exit 0 +fi + +SSH_OPTS="-o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 -i $HOME/.ssh/sc_host_key" +SSH="ssh $SSH_OPTS $LOGIN_USER@$VM_IP" + +case "$QUEST_ID" in + Q002) + # nginx config validity check + NGINX_OK=$($SSH "nginx -t 2>&1 | grep -c 'syntax is ok'" 2>/dev/null || echo "0") + NGINX_ACTIVE=$($SSH "systemctl is-active nginx 2>/dev/null" || echo "inactive") + echo "{\"quest_id\": \"Q002\", \"nginx_config_valid\": $([ "$NGINX_OK" -gt 0 ] && echo true || echo false), \"nginx_state\": \"$NGINX_ACTIVE\"}" + ;; + Q003) + # Disk and logrotate check + LOG_SIZE=$($SSH "du -sm /var/log/nginx/access.log 2>/dev/null | awk '{print \$1}'" || echo "0") + HAS_LOGROTATE=$($SSH "test -f /etc/logrotate.d/nginx && echo true || echo false" 2>/dev/null || echo "false") + DISK_PCT=$($SSH "df -P /var/log | tail -1 | awk '{print \$5}'" | tr -d '%' || echo "-1") + echo "{\"quest_id\": \"Q003\", \"access_log_size_mb\": $LOG_SIZE, \"logrotate_configured\": $HAS_LOGROTATE, \"disk_usage_pct\": $DISK_PCT}" + ;; + Q004) + # Web root ownership check + OWNER=$($SSH "stat -c '%U:%G' /var/www/axiomworks 2>/dev/null" || echo "unknown:unknown") + echo "{\"quest_id\": \"Q004\", \"webroot_owner\": \"$OWNER\", \"correct\": $([ "$OWNER" = "player:player" ] && echo true || echo false)}" + ;; + *) + # Generic state for unknown quests + echo "{\"quest_id\": \"$QUEST_ID\", \"domain\": \"$DOMAIN\", \"note\": \"No specific probe for this quest — use generic validation rules\"}" + ;; +esac