chore: bootstrap lean sysadmin-chronicles repo
Import the runnable game code, content, docs, scripts, and repo guidance while leaving local agent state, dependency installs, build output, and backup copies out of the published tree.
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
{
|
||||
"id": "Q001",
|
||||
"title": "Welcome Aboard",
|
||||
"tier": 1,
|
||||
"primary_vm": "workstation",
|
||||
"required_vms": ["workstation"],
|
||||
"ticket_id": "T001",
|
||||
"baseline_snapshot": "baseline.day-one",
|
||||
"summary": "The player's first task. Their SSH key was never added to the workstation's authorized_keys during provisioning. Marcus walks them through where things are. The fix is trivial but teaches navigation and file inspection.",
|
||||
"clue_fingerprint": {
|
||||
"description": "SSH key is missing from authorized_keys. The provisioning script ran but the key was never appended. Evidence is visible in ~/.ssh/authorized_keys being absent entirely and in /var/log/auth.log showing permission denied publickey.",
|
||||
"evidence": [
|
||||
{ "type": "file_absent", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" },
|
||||
{ "type": "log_contains", "vm": "workstation", "path": "/var/log/auth.log", "contains": "Permission denied (publickey)" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "ssh-dir-exists",
|
||||
"description": "Ensure the .ssh directory exists with correct permissions",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "directory_exists", "vm": "workstation", "path": "/home/player/.ssh" },
|
||||
{ "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh", "mode": "0700" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "authorized-key-present",
|
||||
"description": "Add the provided public key to authorized_keys",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" },
|
||||
{ "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "mode": "0600" },
|
||||
{ "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" }
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "correct-setup",
|
||||
"label": "Correct Setup",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" },
|
||||
{ "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "mode": "0600" },
|
||||
{ "type": "file_mode", "vm": "workstation", "path": "/home/player/.ssh", "mode": "0700" },
|
||||
{ "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["player_ssh_configured"],
|
||||
"follow_up_dialogue": "marcus-Q001-complete-clean",
|
||||
"follow_up_ticket": "T002"
|
||||
},
|
||||
{
|
||||
"id": "permissive-setup",
|
||||
"label": "Permissive Setup",
|
||||
"priority": 50,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_exists", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys" },
|
||||
{ "type": "file_owner", "vm": "workstation", "path": "/home/player/.ssh/authorized_keys", "user": "player", "group": "player" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 0,
|
||||
"world_flags": ["player_ssh_configured", "player_loose_permissions"],
|
||||
"follow_up_dialogue": "marcus-Q001-complete-permissive",
|
||||
"follow_up_ticket": "T002",
|
||||
"_note": "Key is present and owned correctly but permissions are too open. SSH will still reject it. Marcus will mention this later."
|
||||
}
|
||||
],
|
||||
"pressure_profile": null,
|
||||
"blast_radius": [],
|
||||
"unlock_requirements": [],
|
||||
"narrative_phase": "normal_work",
|
||||
"linux_concepts": ["ssh-keygen", "authorized_keys", "file permissions"],
|
||||
"failure_conditions": ["SSH keys not added", "authorized_keys permissions too broad"],
|
||||
"behavior_impact": {
|
||||
"correct-setup": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 },
|
||||
"permissive-setup": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 1, "suspicion_delta": 1 },
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": null,
|
||||
"access_requirements": {
|
||||
"minimum_access": { "workstation": "basic_user" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["onboarding", "ssh", "permissions", "workstation"],
|
||||
"internal_notes": "This quest has no time pressure and no incidents. It is purely tutorial. Marcus is present and talkative. The only failure mode is giving up, which cannot happen mechanically."
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
{
|
||||
"id": "Q002",
|
||||
"title": "Syntax Error in Aisle Four",
|
||||
"tier": 1,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server"],
|
||||
"ticket_id": "T002",
|
||||
"baseline_snapshot": "baseline.clean",
|
||||
"summary": "Someone edited nginx.conf and introduced a syntax error. Nginx will not start. The player needs to identify the broken config, fix it, and restore the service. This is a single-VM, single-symptom quest. Evidence is clear in the nginx error output. The config error is a missing semicolon on a listen directive.",
|
||||
"clue_fingerprint": {
|
||||
"description": "nginx -t reveals the syntax error. systemctl status nginx shows the unit failed with an exit code. journalctl -u nginx points at the line. The error is on the listen directive in /etc/nginx/sites-enabled/axiomworks.conf — a missing semicolon.",
|
||||
"evidence": [
|
||||
{ "type": "log_contains", "vm": "web_server", "path": "/var/log/nginx/error.log", "contains": "invalid parameter" },
|
||||
{ "type": "service_state_is", "vm": "web_server", "service": "nginx", "state": "failed" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/nginx/sites-enabled/axiomworks.conf", "contains": "listen 80" }
|
||||
],
|
||||
"_note": "The baseline snapshot has listen 80 without semicolon. nginx -t will report exactly which line. The player does not need to know where the file is in advance — the error output tells them."
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "nginx-running",
|
||||
"description": "Nginx is active and serving requests",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true }
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "config-fixed-enabled",
|
||||
"label": "Fixed and Enabled",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" },
|
||||
{ "type": "service_enabled", "vm": "web_server", "service": "nginx", "enabled": true },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/nginx/sites-enabled/axiomworks.conf", "contains": "listen 80;" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 2,
|
||||
"world_flags": ["nginx_stable", "hermes_web_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q002-complete-clean",
|
||||
"follow_up_ticket": "T003"
|
||||
},
|
||||
{
|
||||
"id": "config-fixed-not-enabled",
|
||||
"label": "Running But Not Enabled",
|
||||
"priority": 60,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" },
|
||||
{ "type": "service_enabled", "vm": "web_server", "service": "nginx", "enabled": false },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 80, "protocol": "tcp", "listening": true }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["nginx_unstable", "hermes_web_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q002-complete-not-enabled",
|
||||
"follow_up_ticket": "T003",
|
||||
"_note": "Service is running now but will not survive a reboot. Marcus notes this. Sets up a later incident."
|
||||
}
|
||||
],
|
||||
"pressure_profile": "web_outage_escalation",
|
||||
"blast_radius": [],
|
||||
"_blast_radius_note": "I001 removed — I001 triggers only from Q003's quick-fix branch, not from anything in Q002. See OI-007.",
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "normal_work",
|
||||
"linux_concepts": ["nginx", "systemctl", "service configuration", "config syntax"],
|
||||
"failure_conditions": ["nginx not running", "service not enabled at boot"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": null,
|
||||
"access_requirements": {
|
||||
"minimum_access": { "web_server": "basic_user" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["services", "nginx", "config", "web_server"],
|
||||
"internal_notes": "This is the first quest on hermes. The player SSHes from ares. They need basic SSH connectivity to be established from Q001. The config file path and the error line number both appear in nginx -t output — no guessing required. The fun is in reading the error correctly and knowing that a failed config means the service was running fine before someone touched it."
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
{
|
||||
"id": "Q003",
|
||||
"title": "The Log That Ate the Disk",
|
||||
"tier": 1,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server"],
|
||||
"ticket_id": "T003",
|
||||
"baseline_snapshot": "baseline.clean",
|
||||
"summary": "logrotate is installed but the nginx config for it was accidentally deleted. The access log has grown to fill most of the disk. The player needs to identify the disk pressure, find the cause, clean up the log safely, and restore log rotation. A simple 'rm the log' solution works short-term but sets up a repeat. The proper fix restores the logrotate config.",
|
||||
"clue_fingerprint": {
|
||||
"description": "df -h shows / near capacity. du on /var/log/nginx shows an enormous access.log. /etc/logrotate.d/nginx is absent. The system logrotate timer ran last night and skipped nginx because the config was missing.",
|
||||
"evidence": [
|
||||
{ "type": "disk_usage_above", "vm": "web_server", "path": "/", "threshold_percent": 90 },
|
||||
{ "type": "file_size_above", "vm": "web_server", "path": "/var/log/nginx/access.log", "threshold_bytes": 2000000000 },
|
||||
{ "type": "file_absent", "vm": "web_server", "path": "/etc/logrotate.d/nginx" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "disk-pressure-resolved",
|
||||
"description": "Free disk space to below 70% utilization",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "disk_usage_below",
|
||||
"vm": "web_server",
|
||||
"path": "/",
|
||||
"threshold_percent": 70
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "nginx-still-running",
|
||||
"description": "Nginx must remain operational throughout",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "service_state",
|
||||
"vm": "web_server",
|
||||
"service": "nginx",
|
||||
"state": "active"
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "logrotate-restored",
|
||||
"label": "Proper Fix — Rotation Restored",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "disk_usage_below", "vm": "web_server", "path": "/", "threshold_percent": 70 },
|
||||
{ "type": "file_exists", "vm": "web_server", "path": "/etc/logrotate.d/nginx" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/logrotate.d/nginx", "contains": "rotate" },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 3,
|
||||
"world_flags": ["hermes_logrotate_healthy", "hermes_disk_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q003-complete-clean",
|
||||
"follow_up_ticket": "T004"
|
||||
},
|
||||
{
|
||||
"id": "log-truncated-only",
|
||||
"label": "Quick Fix — Log Cleared, No Rotation",
|
||||
"priority": 50,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "disk_usage_below", "vm": "web_server", "path": "/", "threshold_percent": 70 },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 0,
|
||||
"world_flags": ["hermes_disk_healthy", "hermes_log_pressure_pending"],
|
||||
"follow_up_incident": "I001",
|
||||
"follow_up_dialogue": "marcus-Q003-complete-norotate",
|
||||
"follow_up_ticket": "T004",
|
||||
"_note": "Disk is clear but rotation is not restored. I001 triggers in a few in-game hours and fills the disk again."
|
||||
},
|
||||
{
|
||||
"id": "nginx-killed",
|
||||
"label": "Collateral — Nginx Down",
|
||||
"priority": 200,
|
||||
"validation": {
|
||||
"type": "service_state",
|
||||
"vm": "web_server",
|
||||
"service": "nginx",
|
||||
"state": "inactive"
|
||||
},
|
||||
"trust_delta": -3,
|
||||
"world_flags": ["hermes_web_down", "hermes_disk_healthy"],
|
||||
"follow_up_dialogue": "sarah-Q003-angry",
|
||||
"follow_up_dialogues": ["marcus-Q003-complete-down"],
|
||||
"_note": "Player freed disk by stopping nginx (or deleted the wrong thing). Disk may be clear but the site is down again. Negative branch — should be rare but possible."
|
||||
}
|
||||
],
|
||||
"pressure_profile": "disk_growth_slow",
|
||||
"blast_radius": ["I001"],
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "normal_work",
|
||||
"linux_concepts": ["logrotate", "disk usage", "df", "du"],
|
||||
"failure_conditions": ["disk still above threshold", "logrotate not restored", "nginx not running"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": null,
|
||||
"access_requirements": {
|
||||
"minimum_access": { "web_server": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["disk", "logs", "logrotate", "nginx", "web_server"],
|
||||
"internal_notes": "This quest teaches df, du, and logrotate. The clue trail is natural — disk alert, find the big file, notice logrotate is not configured. A good player restores the logrotate config from the package default or writes a correct one. A fast player just deletes the log. Both work short-term. The incident I001 makes the fast solution a problem later."
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
{
|
||||
"id": "Q004",
|
||||
"title": "Not My Files",
|
||||
"tier": 1,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server"],
|
||||
"ticket_id": "T004",
|
||||
"baseline_snapshot": "baseline.clean",
|
||||
"summary": "A deployment script runs as www-data to copy files into /var/www/axiomworks. Someone ran the script manually as root and now the files are owned by root. The www-data process cannot overwrite them on the next deploy. Sarah is reporting that her last deployment silently failed to apply.",
|
||||
"clue_fingerprint": {
|
||||
"description": "The deploy script lives at /opt/deploy/deploy.sh and runs as www-data via a systemd service. ls -la on /var/www/axiomworks shows files owned by root:root instead of www-data:www-data. The deploy service log shows permission denied errors.",
|
||||
"evidence": [
|
||||
{ "type": "log_contains", "vm": "web_server", "path": "/var/log/deploy.log", "contains": "Permission denied" },
|
||||
{ "type": "file_owner_is_not", "vm": "web_server", "path": "/var/www/axiomworks", "expected_user": "www-data" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/opt/deploy/deploy.sh", "contains": "www-data" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "ownership-corrected",
|
||||
"description": "Correct ownership of the web root",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "file_owner",
|
||||
"vm": "web_server",
|
||||
"path": "/var/www/axiomworks",
|
||||
"user": "www-data",
|
||||
"group": "www-data"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "deploy-can-run",
|
||||
"description": "The deploy service can execute without errors",
|
||||
"check_mode": "explicit",
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "nginx", "state": "active" }
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "recursive-chown",
|
||||
"label": "Full Recursive Fix",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" },
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/index.html", "user": "www-data", "group": "www-data" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 2,
|
||||
"world_flags": ["hermes_deploy_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q004-complete-clean",
|
||||
"follow_up_dialogues": ["sarah-Q004-complete-clean"]
|
||||
},
|
||||
{
|
||||
"id": "partial-chown",
|
||||
"label": "Partial Fix — Top Directory Only",
|
||||
"priority": 40,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks", "user": "www-data", "group": "www-data" },
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/www/axiomworks/index.html", "user": "root", "group": "root" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 0,
|
||||
"world_flags": ["hermes_deploy_partial"],
|
||||
"follow_up_dialogue": "marcus-Q004-complete-partial",
|
||||
"follow_up_dialogues": ["sarah-Q004-complete-partial"],
|
||||
"_note": "chown without -R. Top dir is correct but child files are still root-owned. Deploy will still fail on individual files."
|
||||
}
|
||||
],
|
||||
"pressure_profile": null,
|
||||
"blast_radius": [],
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "normal_work",
|
||||
"linux_concepts": ["chown", "file ownership", "deploy scripts"],
|
||||
"failure_conditions": ["web root ownership not fixed", "deploy service still failing"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": null,
|
||||
"access_requirements": {
|
||||
"minimum_access": { "web_server": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["permissions", "ownership", "deploy", "web_server"],
|
||||
"internal_notes": "Teaches chown -R and the importance of recursive operations. The two solution branches are differentiated by whether the player used -R. The explicit check_mode on the second objective means the player can trigger a test deploy to confirm it works."
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
{
|
||||
"id": "Q005",
|
||||
"title": "The Midnight Visitor",
|
||||
"tier": 2,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server"],
|
||||
"ticket_id": "T005",
|
||||
"baseline_snapshot": "baseline.post-q004",
|
||||
"summary": "A cron job that runs nightly database backups is executing as root instead of the dedicated backup user. It works, but it's leaving root-owned files in /var/backups/db/ that the backup user can't manage. The symptom is that the backup retention script — which runs as the backup user — fails to delete old backups, and the backup directory is filling up. Dave notices the disk warning. The root cause is a misconfigured crontab entry in /etc/cron.d/db-backup that specifies no user field (defaults to root) instead of the backup user.",
|
||||
"clue_fingerprint": {
|
||||
"description": "Disk is filling in /var/backups/db/. Files in that directory are owned by root. The backup service log shows permission denied when trying to delete old files. /etc/cron.d/db-backup has no user field on the job line — it defaults to root. /etc/passwd shows a backup-agent user exists. The correct entry should specify backup-agent as the executing user.",
|
||||
"evidence": [
|
||||
{ "type": "disk_usage_above", "vm": "web_server", "path": "/var/backups", "threshold_percent": 80 },
|
||||
{ "type": "file_owner_is_not", "vm": "web_server", "path": "/var/backups/db", "expected_user": "backup-agent" },
|
||||
{ "type": "log_contains", "vm": "web_server", "path": "/var/log/backup-agent.log", "contains": "Permission denied" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "db-backup.sh" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "crontab-correct-user",
|
||||
"description": "The cron job runs as backup-agent, not root",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "file_contains",
|
||||
"vm": "web_server",
|
||||
"path": "/etc/cron.d/db-backup",
|
||||
"contains": "backup-agent"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "backup-dir-ownership",
|
||||
"description": "Existing backup files are owned by backup-agent",
|
||||
"check_mode": "explicit",
|
||||
"validation": {
|
||||
"type": "file_owner",
|
||||
"vm": "web_server",
|
||||
"path": "/var/backups/db",
|
||||
"user": "backup-agent",
|
||||
"group": "backup-agent"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "disk-pressure-cleared",
|
||||
"description": "Backup directory is below disk threshold",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "disk_usage_below",
|
||||
"vm": "web_server",
|
||||
"path": "/var/backups",
|
||||
"threshold_percent": 70
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "full-fix",
|
||||
"label": "Full Fix — User Corrected and Ownership Cleaned",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" },
|
||||
{ "type": "file_owner", "vm": "web_server", "path": "/var/backups/db", "user": "backup-agent", "group": "backup-agent" },
|
||||
{ "type": "disk_usage_below", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 }
|
||||
]
|
||||
},
|
||||
"trust_delta": 3,
|
||||
"world_flags": ["hermes_backup_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q005-complete-clean"
|
||||
},
|
||||
{
|
||||
"id": "cron-fixed-only",
|
||||
"label": "Partial — Cron Fixed, Old Files Not Cleaned",
|
||||
"priority": 50,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" },
|
||||
{ "type": "disk_usage_above", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["hermes_backup_partial"],
|
||||
"follow_up_incident": "I002",
|
||||
"follow_up_dialogue": "marcus-Q005-complete-partial"
|
||||
},
|
||||
{
|
||||
"id": "disk-cleared-only",
|
||||
"label": "Wrong Fix — Disk Cleared, Root Still Running Job",
|
||||
"priority": 30,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "disk_usage_below", "vm": "web_server", "path": "/var/backups", "threshold_percent": 70 },
|
||||
{ "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/cron.d/db-backup", "contains": "backup-agent" } }
|
||||
]
|
||||
},
|
||||
"trust_delta": -1,
|
||||
"world_flags": ["hermes_backup_root_running", "hermes_disk_healthy"],
|
||||
"follow_up_incident": "I002",
|
||||
"follow_up_dialogue": "marcus-Q005-complete-wrong"
|
||||
}
|
||||
],
|
||||
"pressure_profile": "disk_growth_slow",
|
||||
"blast_radius": ["I002"],
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "unease",
|
||||
"linux_concepts": ["cron", "crontab user field", "backup management", "disk usage"],
|
||||
"failure_conditions": ["cron still running as root", "disk not cleared", "backup directory ownership not fixed"],
|
||||
"behavior_impact": {
|
||||
"full-fix": { "curiosity_delta": 1, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 },
|
||||
"cron-fixed-only": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 },
|
||||
"disk-cleared-only": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 1, "suspicion_delta": 1 },
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": {
|
||||
"id": "q005_backup_agent_history",
|
||||
"description": "backup-agent home directory contains a .bash_history with unusual commands that predate the current cron misconfiguration.",
|
||||
"discovery_method": "Player reads /home/backup-agent/.bash_history",
|
||||
"significance": "Dale configured this cron job. The history shows it was changed deliberately, not by accident."
|
||||
},
|
||||
"access_requirements": {
|
||||
"minimum_access": { "web_server": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["cron", "permissions", "backup", "disk", "web_server"],
|
||||
"internal_notes": "This is the first quest where the symptom (disk full) is the same as Q003 but the cause is completely different. Players who jump to 'find the big log' will find the backup directory instead and need to dig further. The cron user field omission is a real and common mistake. The three branches reward finding the root cause vs just clearing the symptom."
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"id": "Q006",
|
||||
"title": "Time Is A Flat Circle",
|
||||
"tier": 2,
|
||||
"primary_vm": "build_machine",
|
||||
"required_vms": ["workstation", "build_machine"],
|
||||
"ticket_id": "T006",
|
||||
"baseline_snapshot": "baseline.clean",
|
||||
"summary": "The build machine (vulcan, Arch Linux) has clock drift. NTP is not running because the service was disabled during a noisy audit period and never re-enabled. The clock is 40 minutes behind. As a result, pacman signature verification is failing — GPG signature timestamps appear to be in the future, which pacman treats as invalid. The player gets a ticket saying builds are broken and package installs fail. They need to diagnose the actual cause (clock drift), fix it (enable and start systemd-timesyncd or ntp), and then refresh the keyring.",
|
||||
"clue_fingerprint": {
|
||||
"description": "pacman -Syu fails with signature errors. gpg --verify on a downloaded package shows the signature timestamp is in the future relative to local time. timedatectl shows NTP is inactive and the local clock is significantly behind. journalctl -u systemd-timesyncd shows the service was stopped and disabled.",
|
||||
"evidence": [
|
||||
{ "type": "service_state_is", "vm": "build_machine", "service": "systemd-timesyncd", "state": "inactive" },
|
||||
{ "type": "service_enabled_is", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": false },
|
||||
{ "type": "log_contains", "vm": "build_machine", "path": "/var/log/pacman.log", "contains": "invalid or corrupted package (PGP signature)" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "ntp-running",
|
||||
"description": "Time synchronization is active",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" },
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" },
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "chronyd", "state": "active" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "ntp-enabled",
|
||||
"description": "Time synchronization is enabled on boot",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": true },
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "ntpd", "enabled": true },
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "chronyd", "enabled": true }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "package-installs-work",
|
||||
"description": "Package manager can install without signature errors",
|
||||
"check_mode": "explicit",
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" },
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" }
|
||||
]
|
||||
},
|
||||
{ "type": "package_installed", "vm": "build_machine", "package": "archlinux-keyring", "installed": true }
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "timesyncd-enabled-keyring-refreshed",
|
||||
"label": "Full Fix — NTP Enabled and Keyring Refreshed",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" },
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "ntpd", "state": "active" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": true },
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "ntpd", "enabled": true }
|
||||
]
|
||||
},
|
||||
{ "type": "package_installed", "vm": "build_machine", "package": "archlinux-keyring", "installed": true }
|
||||
]
|
||||
},
|
||||
"trust_delta": 3,
|
||||
"world_flags": ["vulcan_ntp_healthy", "vulcan_builds_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q006-complete-clean"
|
||||
},
|
||||
{
|
||||
"id": "ntp-running-not-enabled",
|
||||
"label": "Running But Not Enabled at Boot",
|
||||
"priority": 50,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "build_machine", "service": "systemd-timesyncd", "state": "active" },
|
||||
{ "type": "service_enabled", "vm": "build_machine", "service": "systemd-timesyncd", "enabled": false }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["vulcan_ntp_fragile", "vulcan_builds_healthy"],
|
||||
"follow_up_dialogue": "marcus-Q006-complete-fragile"
|
||||
}
|
||||
],
|
||||
"pressure_profile": null,
|
||||
"blast_radius": [],
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "unease",
|
||||
"linux_concepts": ["NTP", "systemd-timesyncd", "Arch Linux", "pacman", "package keyring"],
|
||||
"failure_conditions": ["NTP not enabled at boot", "package manager still failing signature checks"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 0, "obedience_delta": 1, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": null,
|
||||
"access_requirements": {
|
||||
"minimum_access": { "build_machine": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["ntp", "time", "pacman", "arch", "build_machine", "services"],
|
||||
"internal_notes": "First quest on vulcan. Introduces Arch Linux and pacman. The clock drift → GPG failure chain is real and genuinely confusing the first time you encounter it. The use of `or` on the NTP objective allows systemd-timesyncd, ntpd, or chronyd — any of them fixes the problem. The explicit check on package installs requires the player to confirm things work, not just that NTP is running."
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
{
|
||||
"id": "Q007",
|
||||
"title": "Security Theater",
|
||||
"tier": 2,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server"],
|
||||
"ticket_id": "T007",
|
||||
"baseline_snapshot": "baseline.post-q004",
|
||||
"summary": "Someone ran a hardening script on hermes that set AllowUsers in sshd_config to only allow a single user: deploy-bot. Now the web-admin group cannot SSH in. Priya filed the ticket after her access was blocked mid-incident response. The AllowUsers directive is correct in intent (locking down SSH) but was applied too aggressively — it needs to include the web-admin group or the relevant users. The player must fix sshd_config and reload sshd without breaking service continuity. Complication: the player must not lock themselves out during the fix, and they must validate that the specific users Priya listed can still SSH.",
|
||||
"clue_fingerprint": {
|
||||
"description": "SSH connection attempts from web-admin accounts fail with 'Permission denied'. sshd_config contains 'AllowUsers deploy-bot' with no other entries. /etc/group shows web-admin group members. The hardening script is in /opt/security/harden-ssh.sh and its log shows it ran last night.",
|
||||
"evidence": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers deploy-bot" },
|
||||
{ "type": "log_contains", "vm": "web_server", "path": "/var/log/auth.log", "contains": "User priya from" },
|
||||
{ "type": "file_exists", "vm": "web_server", "path": "/opt/security/harden-ssh.sh" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "sshd-config-corrected",
|
||||
"description": "sshd_config allows the web-admin group or its members",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups web-admin" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "priya" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "sshd-still-running",
|
||||
"description": "sshd remains active after config change",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "service_state",
|
||||
"vm": "web_server",
|
||||
"service": "sshd",
|
||||
"state": "active"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "deploy-bot-still-allowed",
|
||||
"description": "deploy-bot access is preserved",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "or",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "deploy-bot" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups" }
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "group-based-config",
|
||||
"label": "Proper Fix — Group-Based AllowGroups",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups web-admin" },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" },
|
||||
{ "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers" } }
|
||||
]
|
||||
},
|
||||
"trust_delta": 4,
|
||||
"world_flags": ["hermes_ssh_hardened_correct", "priya_access_restored"],
|
||||
"follow_up_dialogue": "priya-Q007-complete-clean",
|
||||
"follow_up_dialogues": ["marcus-Q007-complete-clean"],
|
||||
"_note": "Best fix. Switches from AllowUsers (fragile, breaks with new users) to AllowGroups (durable, group membership handles access). Trust bump is higher because this is the approach that will scale."
|
||||
},
|
||||
{
|
||||
"id": "allowusers-expanded",
|
||||
"label": "Acceptable Fix — AllowUsers Expanded",
|
||||
"priority": 60,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "priya" },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "deploy-bot" },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["hermes_ssh_allowusers_fragile", "priya_access_restored"],
|
||||
"follow_up_dialogue": "priya-Q007-complete-fragile",
|
||||
"follow_up_dialogues": ["marcus-Q007-complete-fragile"],
|
||||
"_note": "Access is restored but using AllowUsers. Every future new user will need to be manually added. Marcus or Priya will note this later."
|
||||
},
|
||||
{
|
||||
"id": "hardening-removed",
|
||||
"label": "Regression — SSH Restriction Removed Entirely",
|
||||
"priority": 200,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowUsers" } },
|
||||
{ "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/ssh/sshd_config", "contains": "AllowGroups" } },
|
||||
{ "type": "service_state", "vm": "web_server", "service": "sshd", "state": "active" }
|
||||
]
|
||||
},
|
||||
"trust_delta": -3,
|
||||
"world_flags": ["hermes_ssh_unrestricted", "priya_access_restored"],
|
||||
"follow_up_dialogue": "priya-Q007-complete-regression",
|
||||
"follow_up_dialogues": ["marcus-Q007-complete-regression"],
|
||||
"_note": "Player fixed access by removing all restrictions. Priya's access works but the hardening is gone. This is the worst valid outcome — Priya is back in but so is everyone else."
|
||||
}
|
||||
],
|
||||
"pressure_profile": "access_blocked_escalation",
|
||||
"blast_radius": [],
|
||||
"unlock_requirements": ["world_flag:player_ssh_configured"],
|
||||
"narrative_phase": "suspicion",
|
||||
"linux_concepts": ["sshd_config", "AllowGroups", "AllowUsers", "SSH access hardening"],
|
||||
"failure_conditions": ["Priya still locked out", "SSH restrictions removed entirely"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 1, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": {
|
||||
"id": "q007_dale_ssh_key",
|
||||
"description": "An SSH key in hermes /root/.ssh/authorized_keys does not match any current staff. The fingerprint matches no documented key.",
|
||||
"discovery_method": "Player reads /root/.ssh/authorized_keys on hermes",
|
||||
"significance": "Dale had root SSH access to hermes that was never formally revoked."
|
||||
},
|
||||
"access_requirements": {
|
||||
"minimum_access": { "web_server": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": ["sudo:web_server:sshd"]
|
||||
},
|
||||
"tags": ["ssh", "security", "hardening", "sshd", "web_server"],
|
||||
"internal_notes": "This quest introduces Priya as a character and establishes that the player's fixes can have security implications, not just operational ones. The 'regression' branch should feel bad — Priya's grateful but Marcus or a later audit will surface it. The proper fix (AllowGroups) tests whether the player knows the difference between AllowUsers and AllowGroups. The sshd reload vs restart distinction matters here — a player who restarts sshd drops existing connections, which is more disruptive than reload."
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
{
|
||||
"id": "Q008",
|
||||
"title": "Bad Upstream",
|
||||
"tier": 2,
|
||||
"primary_vm": "web_server",
|
||||
"required_vms": ["workstation", "web_server", "build_machine"],
|
||||
"ticket_id": "T008",
|
||||
"baseline_snapshot": "baseline.post-q006",
|
||||
"summary": "The internal package repository on vulcan is serving a broken version of the axiomworks-app package. A deploy on hermes pulled it in through the internal apt repo and the app is now crashing on startup. The player needs to identify that the problem is in the package (not the app config), trace it back to vulcan, find the broken build artifact, and either roll back the package on hermes or fix the build and republish. This is the first multi-VM quest — investigation crosses from hermes to vulcan.",
|
||||
"clue_fingerprint": {
|
||||
"description": "The app service (axiomworks-app) on hermes is failing. journalctl shows it exits immediately with a non-zero code. The package was updated yesterday via the internal repo at http://vulcan.internal/repo. On vulcan, /srv/repo/axiomworks-app_2.1.1-1_amd64.deb is present but was built from a broken source tarball. The previous version 2.1.0-1 is also in /srv/repo/ and works correctly.",
|
||||
"evidence": [
|
||||
{ "type": "service_state_is", "vm": "web_server", "service": "axiomworks-app", "state": "failed" },
|
||||
{ "type": "log_contains", "vm": "web_server", "path": "/var/log/axiomworks-app.log", "contains": "Exec format error" },
|
||||
{ "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.0-1_amd64.deb" },
|
||||
{ "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.1-1_amd64.deb" }
|
||||
]
|
||||
},
|
||||
"objectives": [
|
||||
{
|
||||
"id": "app-running",
|
||||
"description": "axiomworks-app is active and running",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "service_state",
|
||||
"vm": "web_server",
|
||||
"service": "axiomworks-app",
|
||||
"state": "active"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "app-port-listening",
|
||||
"description": "App is accepting connections on expected port",
|
||||
"check_mode": "passive",
|
||||
"validation": {
|
||||
"type": "port_listening",
|
||||
"vm": "web_server",
|
||||
"port": 8080,
|
||||
"protocol": "tcp",
|
||||
"listening": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"solution_branches": [
|
||||
{
|
||||
"id": "rollback-and-pin",
|
||||
"label": "Rollback to 2.1.0 and Pin Version",
|
||||
"priority": 100,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true },
|
||||
{ "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.0", "installed": true },
|
||||
{ "type": "file_contains", "vm": "web_server", "path": "/etc/apt/preferences.d/axiomworks-app", "contains": "Pin: version 2.1.0" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 3,
|
||||
"world_flags": ["hermes_app_running", "hermes_app_pinned_2-1-0", "vulcan_bad_build_known"],
|
||||
"follow_up_dialogue": "marcus-Q008-complete-rollback",
|
||||
"follow_up_dialogues": ["sarah-Q008-complete-pinned"],
|
||||
"_note": "Distinguished from rollback-only by an apt pin on hermes. The player must create an apt preferences file after rolling back."
|
||||
},
|
||||
{
|
||||
"id": "rebuild-and-redeploy",
|
||||
"label": "Rebuild on Vulcan and Redeploy",
|
||||
"priority": 80,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true },
|
||||
{ "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.1", "installed": true },
|
||||
{ "type": "file_exists", "vm": "build_machine", "path": "/srv/repo/axiomworks-app_2.1.1-2_amd64.deb" }
|
||||
]
|
||||
},
|
||||
"trust_delta": 4,
|
||||
"world_flags": ["hermes_app_running", "vulcan_build_fixed"],
|
||||
"follow_up_dialogue": "marcus-Q008-complete-rebuild",
|
||||
"follow_up_dialogues": ["sarah-Q008-complete-rebuilt"],
|
||||
"_note": "Player fixed the build on vulcan and redeployed the corrected 2.1.1 package. This is the most thorough fix and gets highest trust, but is harder and requires understanding both machines. The rebuilt .deb increments the Debian revision from -1 to -2."
|
||||
},
|
||||
{
|
||||
"id": "rollback-only",
|
||||
"label": "Rollback Only — Version Not Pinned",
|
||||
"priority": 60,
|
||||
"validation": {
|
||||
"type": "and",
|
||||
"rules": [
|
||||
{ "type": "service_state", "vm": "web_server", "service": "axiomworks-app", "state": "active" },
|
||||
{ "type": "port_listening", "vm": "web_server", "port": 8080, "protocol": "tcp", "listening": true },
|
||||
{ "type": "package_installed", "vm": "web_server", "package": "axiomworks-app=2.1.0", "installed": true },
|
||||
{ "type": "not", "rule": { "type": "file_contains", "vm": "web_server", "path": "/etc/apt/preferences.d/axiomworks-app", "contains": "Pin: version 2.1.0" } }
|
||||
]
|
||||
},
|
||||
"trust_delta": 1,
|
||||
"world_flags": ["hermes_app_running", "vulcan_bad_build_known"],
|
||||
"follow_up_incident": "I003",
|
||||
"follow_up_dialogue": "marcus-Q008-complete-unpinned",
|
||||
"follow_up_dialogues": ["sarah-Q008-complete-unpinned"],
|
||||
"_note": "App is running on 2.1.0 but not pinned. No apt preferences pin exists on hermes. The next apt upgrade will pull 2.1.1 back in. I003 re-breaks the app on the next update cycle. The not-rule on the pin file ensures this branch cannot match when rollback-and-pin already matches."
|
||||
}
|
||||
],
|
||||
"pressure_profile": "app_outage_escalation",
|
||||
"blast_radius": ["I003"],
|
||||
"unlock_requirements": [
|
||||
"world_flag:player_ssh_configured",
|
||||
"world_flag:vulcan_ntp_healthy"
|
||||
],
|
||||
"narrative_phase": "suspicion",
|
||||
"linux_concepts": ["apt", "package pinning", "apt preferences", "internal package mirror", "build pipeline"],
|
||||
"failure_conditions": ["axiomworks-app still broken", "bad package not traced to build machine"],
|
||||
"behavior_impact": {
|
||||
"default": { "curiosity_delta": 1, "obedience_delta": 0, "risk_delta": 0, "suspicion_delta": 0 }
|
||||
},
|
||||
"hidden_hook": {
|
||||
"id": "q008_build_log_anomaly",
|
||||
"description": "vulcan's build log for 2.1.1 shows it was triggered by a manual invocation, not the automated pipeline, at 02:14.",
|
||||
"discovery_method": "Player reads /var/log/build-pipeline.log on vulcan and notices the timestamp and manual trigger field",
|
||||
"significance": "The bad build was triggered manually. Someone made the broken build, and it was not the pipeline."
|
||||
},
|
||||
"access_requirements": {
|
||||
"minimum_access": { "build_machine": "sudo", "web_server": "sudo" },
|
||||
"requires_root": false,
|
||||
"temporary_grants_allowed": []
|
||||
},
|
||||
"tags": ["packages", "builds", "multi-vm", "web_server", "build_machine", "deploy"],
|
||||
"internal_notes": "This is the first quest that requires the player to move between two target VMs — hermes and vulcan. The symptom is on hermes but the root cause is on vulcan. Players who don't follow the package trail will spend a long time on hermes looking for a config problem that isn't there. The rebuild branch requires understanding the package build enough to fix the source input and republish a corrected .deb — it's hard but rewarding. The rollback branches are now correctly differentiated: rollback-and-pin requires an apt preferences pin on hermes, and rollback-only explicitly requires its absence via a not-rule."
|
||||
}
|
||||
Reference in New Issue
Block a user