Add diagnostics logging (RAM), UI viewer, and toggles

This commit is contained in:
Aaron
2025-12-13 11:16:57 -05:00
parent 28acb94a6f
commit 48be7a1c61
6 changed files with 386 additions and 9 deletions

View File

@@ -8,6 +8,8 @@ import urllib.parse
import fcntl
from functools import partial
import json as jsonlib
import io
from collections import deque
HOST = "127.0.0.1"
PORT = 4000
@@ -49,6 +51,99 @@ API_PATH = pathlib.Path("/usr/local/bin/pikit-api.py")
BACKUP_ROOT = pathlib.Path("/var/backups/pikit")
TMP_ROOT = pathlib.Path("/var/tmp/pikit-update")
# Diagnostics logging (RAM-only)
DIAG_STATE_FILE = pathlib.Path("/dev/shm/pikit-diag.state") if pathlib.Path("/dev/shm").exists() else pathlib.Path("/tmp/pikit-diag.state")
DIAG_LOG_FILE = pathlib.Path("/dev/shm/pikit-diag.log") if pathlib.Path("/dev/shm").exists() else pathlib.Path("/tmp/pikit-diag.log")
DIAG_MAX_BYTES = 1_048_576 # 1 MiB cap in RAM
DIAG_MAX_ENTRY_CHARS = 2048
DIAG_DEFAULT_STATE = {"enabled": False, "level": "normal"} # level: normal|debug
_diag_state = None
def _load_diag_state():
global _diag_state
if _diag_state is not None:
return _diag_state
try:
if DIAG_STATE_FILE.exists():
_diag_state = json.loads(DIAG_STATE_FILE.read_text())
return _diag_state
except Exception:
pass
_diag_state = DIAG_DEFAULT_STATE.copy()
return _diag_state
def _save_diag_state(enabled=None, level=None):
state = _load_diag_state()
if enabled is not None:
state["enabled"] = bool(enabled)
if level in ("normal", "debug"):
state["level"] = level
try:
DIAG_STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
DIAG_STATE_FILE.write_text(json.dumps(state))
except Exception:
pass
return state
def diag_log(level: str, message: str, meta: dict | None = None):
"""
Append a diagnostic log line to RAM-backed file.
Skips when disabled or when debug level is off.
"""
state = _load_diag_state()
if not state.get("enabled"):
return
if level == "debug" and state.get("level") != "debug":
return
try:
ts = datetime.datetime.utcnow().isoformat() + "Z"
entry = {"ts": ts, "level": level, "msg": message}
if meta:
entry["meta"] = meta
line = json.dumps(entry, separators=(",", ":"))
if len(line) > DIAG_MAX_ENTRY_CHARS:
entry.pop("meta", None)
entry["msg"] = (message or "")[: DIAG_MAX_ENTRY_CHARS - 40] + ""
line = json.dumps(entry, separators=(",", ":"))
line_bytes = (line + "\n").encode()
DIAG_LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
with DIAG_LOG_FILE.open("ab") as f:
f.write(line_bytes)
# Trim file if above cap
if DIAG_LOG_FILE.stat().st_size > DIAG_MAX_BYTES:
with DIAG_LOG_FILE.open("rb") as f:
f.seek(-DIAG_MAX_BYTES, io.SEEK_END)
tail = f.read()
# drop partial first line to keep JSON lines clean
if b"\n" in tail:
tail = tail.split(b"\n", 1)[1]
with DIAG_LOG_FILE.open("wb") as f:
f.write(tail)
except Exception:
# Never break caller
pass
def diag_read(limit=500):
"""Return latest log entries (dicts), newest first."""
if not DIAG_LOG_FILE.exists():
return []
try:
data = DIAG_LOG_FILE.read_bytes()
except Exception:
return []
lines = data.splitlines()[-limit:]
out = []
for line in lines:
try:
out.append(json.loads(line.decode("utf-8", errors="ignore")))
except Exception:
continue
return out[::-1]
def ensure_dir(path: pathlib.Path):
path.mkdir(parents=True, exist_ok=True)
@@ -80,12 +175,17 @@ def normalize_path(path: str | None) -> str:
def dbg(msg):
if not DEBUG_FLAG:
return
API_LOG.parent.mkdir(parents=True, exist_ok=True)
ts = datetime.datetime.utcnow().isoformat()
with API_LOG.open("a") as f:
f.write(f"[{ts}] {msg}\n")
# Legacy debug file logging (when /boot/pikit-debug exists)
if DEBUG_FLAG:
API_LOG.parent.mkdir(parents=True, exist_ok=True)
ts = datetime.datetime.utcnow().isoformat()
with API_LOG.open("a") as f:
f.write(f"[{ts}] {msg}\n")
# Mirror into diagnostics if enabled
try:
diag_log("debug", msg)
except Exception:
pass
def set_ssh_password_auth(allow: bool):
@@ -677,6 +777,7 @@ def check_for_update():
state["message"] = "Another update is running"
save_update_state(state)
return state
diag_log("info", "Update check started", {"channel": state.get("channel") or "dev"})
state["in_progress"] = True
state["progress"] = "Checking for updates…"
save_update_state(state)
@@ -696,10 +797,12 @@ def check_for_update():
else:
state["status"] = "up_to_date"
state["message"] = "Up to date"
diag_log("info", "Update check finished", {"status": state["status"], "latest": str(latest)})
except Exception as e:
state["status"] = "up_to_date"
state["message"] = f"Could not reach update server: {e}"
state["last_check"] = datetime.datetime.utcnow().isoformat() + "Z"
diag_log("error", "Update check failed", {"error": str(e)})
finally:
state["in_progress"] = False
state["progress"] = None
@@ -729,6 +832,7 @@ def apply_update_stub():
state["status"] = "in_progress"
state["progress"] = "Starting update…"
save_update_state(state)
diag_log("info", "Update apply started", {"channel": state.get("channel") or "dev"})
try:
channel = state.get("channel") or os.environ.get("PIKIT_CHANNEL", "dev")
@@ -763,6 +867,7 @@ def apply_update_stub():
state["progress"] = "Downloading release…"
save_update_state(state)
download_file(bundle_url, bundle_path)
diag_log("debug", "Bundle downloaded", {"url": bundle_url, "path": str(bundle_path)})
# Verify hash if provided
expected_hash = None
@@ -774,6 +879,7 @@ def apply_update_stub():
got = sha256_file(bundle_path)
if got.lower() != expected_hash.lower():
raise RuntimeError("Bundle hash mismatch")
diag_log("debug", "Bundle hash verified", {"expected": expected_hash})
state["progress"] = "Staging files…"
save_update_state(state)
@@ -804,14 +910,17 @@ def apply_update_stub():
state["message"] = "Update installed"
state["progress"] = None
save_update_state(state)
diag_log("info", "Update applied", {"version": str(latest)})
except urllib.error.HTTPError as e:
state["status"] = "error"
state["message"] = f"No release available ({e.code})"
diag_log("error", "Update apply HTTP error", {"code": e.code})
except Exception as e:
state["status"] = "error"
state["message"] = f"Update failed: {e}"
state["progress"] = None
save_update_state(state)
diag_log("error", "Update apply failed", {"error": str(e)})
# Attempt rollback if backup exists
backup = choose_rollback_backup()
if backup:
@@ -820,9 +929,11 @@ def apply_update_stub():
state["current_version"] = read_current_version()
state["message"] += f" (rolled back to backup {backup.name})"
save_update_state(state)
diag_log("info", "Rollback after failed update", {"backup": backup.name})
except Exception as re:
state["message"] += f" (rollback failed: {re})"
save_update_state(state)
diag_log("error", "Rollback after failed update failed", {"error": str(re)})
finally:
state["in_progress"] = False
state["progress"] = None
@@ -844,6 +955,7 @@ def rollback_update_stub():
state["status"] = "in_progress"
state["progress"] = "Rolling back…"
save_update_state(state)
diag_log("info", "Rollback started")
backup = choose_rollback_backup()
if not backup:
state["status"] = "error"
@@ -861,9 +973,11 @@ def rollback_update_stub():
ver = get_backup_version(backup)
suffix = f" (version {ver})" if ver else ""
state["message"] = f"Rolled back to backup {backup.name}{suffix}"
diag_log("info", "Rollback complete", {"backup": backup.name, "version": ver})
except Exception as e:
state["status"] = "error"
state["message"] = f"Rollback failed: {e}"
diag_log("error", "Rollback failed", {"error": str(e)})
state["in_progress"] = False
state["progress"] = None
save_update_state(state)
@@ -1090,6 +1204,10 @@ class Handler(BaseHTTPRequestHandler):
return self._send(200, {"text": text})
except Exception as e:
return self._send(500, {"error": str(e)})
elif self.path.startswith("/api/diag/log"):
entries = diag_read()
state = _load_diag_state()
return self._send(200, {"entries": entries, "state": state})
else:
self._send(404, {"error": "not found"})
@@ -1100,6 +1218,7 @@ class Handler(BaseHTTPRequestHandler):
if payload.get("confirm") == "YES":
self._send(200, {"message": "Resetting and rebooting..."})
dbg("Factory reset triggered via API")
diag_log("info", "Factory reset requested")
factory_reset()
else:
self._send(400, {"error": "type YES to confirm"})
@@ -1109,14 +1228,17 @@ class Handler(BaseHTTPRequestHandler):
set_auto_updates(enable)
dbg(f"Auto updates set to {enable}")
state = auto_updates_state()
diag_log("info", "Auto updates toggled", {"enabled": enable})
return self._send(200, {"enabled": state.get("enabled", False), "details": state})
if self.path.startswith("/api/updates/config"):
try:
cfg = set_updates_config(payload or {})
dbg(f"Update settings applied: {cfg}")
diag_log("info", "Update settings saved", cfg)
return self._send(200, cfg)
except Exception as e:
dbg(f"Failed to apply updates config: {e}")
diag_log("error", "Update settings save failed", {"error": str(e)})
return self._send(500, {"error": str(e)})
if self.path.startswith("/api/update/check"):
state = check_for_update()
@@ -1140,6 +1262,7 @@ class Handler(BaseHTTPRequestHandler):
state = load_update_state()
state["auto_check"] = bool(payload.get("enable"))
save_update_state(state)
diag_log("info", "Release auto-check toggled", {"enabled": state["auto_check"]})
return self._send(200, state)
if self.path.startswith("/api/update/channel"):
chan = payload.get("channel", "dev")
@@ -1148,7 +1271,19 @@ class Handler(BaseHTTPRequestHandler):
state = load_update_state()
state["channel"] = chan
save_update_state(state)
diag_log("info", "Release channel set", {"channel": chan})
return self._send(200, state)
if self.path.startswith("/api/diag/log/level"):
state = _save_diag_state(payload.get("enabled"), payload.get("level"))
diag_log("info", "Diag level updated", state)
return self._send(200, {"state": state})
if self.path.startswith("/api/diag/log/clear"):
try:
DIAG_LOG_FILE.unlink(missing_ok=True)
except Exception:
pass
diag_log("info", "Diag log cleared")
return self._send(200, {"cleared": True, "state": _load_diag_state()})
if self.path.startswith("/api/services/add"):
name = payload.get("name")
port = int(payload.get("port", 0))
@@ -1182,6 +1317,7 @@ class Handler(BaseHTTPRequestHandler):
allow_port_lan(port)
except FirewallToolMissing as e:
return self._send(500, {"error": str(e)})
diag_log("info", "Service added", {"name": name, "port": port, "scheme": scheme})
return self._send(200, {"services": services, "message": f"Added {name} on port {port} and opened LAN firewall"})
if self.path.startswith("/api/services/remove"):
port = int(payload.get("port", 0))
@@ -1195,6 +1331,7 @@ class Handler(BaseHTTPRequestHandler):
except FirewallToolMissing as e:
return self._send(500, {"error": str(e)})
save_services(services)
diag_log("info", "Service removed", {"port": port})
return self._send(200, {"services": services, "message": f"Removed service on port {port}"})
if self.path.startswith("/api/services/update"):
port = int(payload.get("port", 0))
@@ -1272,6 +1409,7 @@ class Handler(BaseHTTPRequestHandler):
if not updated:
return self._send(404, {"error": "service not found"})
save_services(services)
diag_log("info", "Service updated", {"port": target_port, "name": new_name or None, "scheme": scheme})
return self._send(200, {"services": services, "message": "Service updated"})
self._send(404, {"error": "not found"})