Implement Phase 1: LangGraph backend MVP

Sets up the full backend foundation for CouncilOS:

- CouncilState TypedDict with all required fields and LangGraph reducers
- Three agent nodes: master_agent (drafts), critic_agent (scores + routes),
  writer_agent (final polish)
- LangGraph graph with cyclic rework loop: Master → Critic → (score < 8:
  back to Master | score ≥ 8: Writer → END)
- Safety valve: MAX_ITERATIONS=5 prevents infinite loops
- FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id})
  and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events
- In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3)
- pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints
- .env.example, .gitignore, docker-compose.yml, Dockerfile

https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
This commit is contained in:
Claude 2026-02-20 16:33:39 +00:00
parent 34dcfb3dcd
commit 797f02c74d
No known key found for this signature in database
24 changed files with 1472 additions and 0 deletions

1
backend/api/__init__.py Normal file
View file

@ -0,0 +1 @@
"""API route definitions for CouncilOS."""

135
backend/api/routes.py Normal file
View file

@ -0,0 +1,135 @@
"""
REST API routes for CouncilOS.
Endpoints:
POST /api/councils/run Start a new council run (async, returns run_id)
GET /api/councils/run/{run_id} Poll the status/result of a run
GET /api/health Health check
"""
import uuid
from typing import Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from services.graph_builder import run_council_async
from api.run_store import run_store
router = APIRouter()
# ---------------------------------------------------------------------------
# Request / Response Models
# ---------------------------------------------------------------------------
class CouncilRunRequest(BaseModel):
input_topic: str = Field(
...,
min_length=1,
max_length=10_000,
description="The user's prompt or document content for the council to work on.",
examples=["Erkläre die wichtigsten Konzepte des maschinellen Lernens für Einsteiger."],
)
class CouncilRunResponse(BaseModel):
run_id: str
status: str # "pending" | "running" | "completed" | "failed"
message: str
class CouncilResultResponse(BaseModel):
run_id: str
status: str
final_draft: Optional[str] = None
critic_score: Optional[float] = None
iteration_count: Optional[int] = None
error: Optional[str] = None
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "ok", "service": "CouncilOS API"}
@router.post("/councils/run", response_model=CouncilRunResponse, status_code=202)
async def start_council_run(
request: CouncilRunRequest,
background_tasks: BackgroundTasks,
):
"""
Start a new council run.
The run executes asynchronously in the background. Poll
GET /api/councils/run/{run_id} for the result, or connect to the
WebSocket at /ws/council/{run_id} for real-time updates.
"""
run_id = str(uuid.uuid4())
# Register the run as pending in the in-memory store
run_store.create(run_id, request.input_topic)
# Schedule the graph execution as a background task
background_tasks.add_task(_execute_run, run_id, request.input_topic)
return CouncilRunResponse(
run_id=run_id,
status="pending",
message=f"Council run started. Connect to /ws/council/{run_id} for live updates.",
)
@router.get("/councils/run/{run_id}", response_model=CouncilResultResponse)
async def get_council_result(run_id: str):
"""
Retrieve the current status or final result of a council run.
"""
run = run_store.get(run_id)
if run is None:
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found.")
return CouncilResultResponse(
run_id=run_id,
status=run["status"],
final_draft=run.get("final_draft"),
critic_score=run.get("critic_score"),
iteration_count=run.get("iteration_count"),
error=run.get("error"),
)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
async def _execute_run(run_id: str, input_topic: str) -> None:
"""
Background task that runs the LangGraph council and updates the run store.
"""
run_store.update(run_id, {"status": "running"})
try:
final_state = await run_council_async(
input_topic=input_topic,
run_id=run_id,
on_node_event=lambda nid, node: run_store.update(
nid, {"active_node": node}
),
)
run_store.update(
run_id,
{
"status": "completed",
"final_draft": final_state.get("current_draft"),
"critic_score": final_state.get("critic_score"),
"iteration_count": final_state.get("iteration_count"),
"active_node": "done",
},
)
except Exception as exc: # noqa: BLE001
run_store.update(run_id, {"status": "failed", "error": str(exc)})

47
backend/api/run_store.py Normal file
View file

@ -0,0 +1,47 @@
"""
In-memory run store for Phase 1.
Tracks the status and results of council runs by run_id. This is intentionally
simple for Phase 1. Phase 3+ will replace this with a PostgreSQL-backed store.
"""
from typing import Any, Dict, Optional
import threading
class RunStore:
"""Thread-safe in-memory store for council run state."""
def __init__(self) -> None:
self._store: Dict[str, Dict[str, Any]] = {}
self._lock = threading.Lock()
def create(self, run_id: str, input_topic: str) -> None:
with self._lock:
self._store[run_id] = {
"run_id": run_id,
"input_topic": input_topic,
"status": "pending",
"final_draft": None,
"critic_score": None,
"iteration_count": None,
"active_node": None,
"error": None,
}
def get(self, run_id: str) -> Optional[Dict[str, Any]]:
with self._lock:
return self._store.get(run_id)
def update(self, run_id: str, updates: Dict[str, Any]) -> None:
with self._lock:
if run_id in self._store:
self._store[run_id].update(updates)
def delete(self, run_id: str) -> None:
with self._lock:
self._store.pop(run_id, None)
# Singleton instance shared across the application
run_store = RunStore()

128
backend/api/websocket.py Normal file
View file

@ -0,0 +1,128 @@
"""
WebSocket endpoint for real-time agent status updates.
Clients connect to /ws/council/{run_id} and receive JSON events whenever
an agent node becomes active. This powers the live diagram pulsing in the frontend.
Event format:
{"event": "node_start", "run_id": "...", "node": "master_agent", "iteration": 2}
{"event": "node_complete", "run_id": "...", "node": "critic_agent", "score": 6.5}
{"event": "run_complete", "run_id": "...", "final_draft": "..."}
{"event": "run_failed", "run_id": "...", "error": "..."}
"""
import asyncio
import json
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from api.run_store import run_store
ws_router = APIRouter()
# Active WebSocket connections keyed by run_id
_connections: dict[str, list[WebSocket]] = {}
async def broadcast_event(run_id: str, event: dict) -> None:
"""
Send an event to all WebSocket clients subscribed to a run_id.
Args:
run_id: The council run identifier.
event: The event dict to serialize and broadcast.
"""
clients = _connections.get(run_id, [])
disconnected = []
for ws in clients:
try:
await ws.send_text(json.dumps(event))
except Exception: # noqa: BLE001
disconnected.append(ws)
# Clean up dead connections
for ws in disconnected:
clients.remove(ws)
@ws_router.websocket("/ws/council/{run_id}")
async def council_websocket(websocket: WebSocket, run_id: str):
"""
WebSocket endpoint for live council run updates.
On connect: sends the current run status immediately.
While running: polls the run store and pushes status changes.
On complete/failed: sends a final event and closes the connection.
"""
await websocket.accept()
# Register this client
if run_id not in _connections:
_connections[run_id] = []
_connections[run_id].append(websocket)
try:
# Send current state immediately on connect
run = run_store.get(run_id)
if run is None:
await websocket.send_text(
json.dumps({"event": "error", "message": f"Run '{run_id}' not found."})
)
return
await websocket.send_text(
json.dumps({"event": "connected", "run_id": run_id, "status": run["status"]})
)
# Poll for status changes and push updates
last_node = None
while True:
run = run_store.get(run_id)
if run is None:
break
current_node = run.get("active_node")
if current_node and current_node != last_node:
await websocket.send_text(
json.dumps({
"event": "node_active",
"run_id": run_id,
"node": current_node,
"iteration": run.get("iteration_count"),
})
)
last_node = current_node
if run["status"] == "completed":
await websocket.send_text(
json.dumps({
"event": "run_complete",
"run_id": run_id,
"final_draft": run.get("final_draft"),
"critic_score": run.get("critic_score"),
"iteration_count": run.get("iteration_count"),
})
)
break
if run["status"] == "failed":
await websocket.send_text(
json.dumps({
"event": "run_failed",
"run_id": run_id,
"error": run.get("error"),
})
)
break
await asyncio.sleep(0.5) # 500ms polling interval
except WebSocketDisconnect:
pass
finally:
if run_id in _connections:
try:
_connections[run_id].remove(websocket)
except ValueError:
pass