Implement Phase 1: LangGraph backend MVP
Sets up the full backend foundation for CouncilOS:
- CouncilState TypedDict with all required fields and LangGraph reducers
- Three agent nodes: master_agent (drafts), critic_agent (scores + routes),
writer_agent (final polish)
- LangGraph graph with cyclic rework loop: Master → Critic → (score < 8:
back to Master | score ≥ 8: Writer → END)
- Safety valve: MAX_ITERATIONS=5 prevents infinite loops
- FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id})
and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events
- In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3)
- pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints
- .env.example, .gitignore, docker-compose.yml, Dockerfile
https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
This commit is contained in:
parent
34dcfb3dcd
commit
797f02c74d
24 changed files with 1472 additions and 0 deletions
1
backend/api/__init__.py
Normal file
1
backend/api/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""API route definitions for CouncilOS."""
|
||||
135
backend/api/routes.py
Normal file
135
backend/api/routes.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""
|
||||
REST API routes for CouncilOS.
|
||||
|
||||
Endpoints:
|
||||
POST /api/councils/run — Start a new council run (async, returns run_id)
|
||||
GET /api/councils/run/{run_id} — Poll the status/result of a run
|
||||
GET /api/health — Health check
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from services.graph_builder import run_council_async
|
||||
from api.run_store import run_store
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Request / Response Models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CouncilRunRequest(BaseModel):
|
||||
input_topic: str = Field(
|
||||
...,
|
||||
min_length=1,
|
||||
max_length=10_000,
|
||||
description="The user's prompt or document content for the council to work on.",
|
||||
examples=["Erkläre die wichtigsten Konzepte des maschinellen Lernens für Einsteiger."],
|
||||
)
|
||||
|
||||
|
||||
class CouncilRunResponse(BaseModel):
|
||||
run_id: str
|
||||
status: str # "pending" | "running" | "completed" | "failed"
|
||||
message: str
|
||||
|
||||
|
||||
class CouncilResultResponse(BaseModel):
|
||||
run_id: str
|
||||
status: str
|
||||
final_draft: Optional[str] = None
|
||||
critic_score: Optional[float] = None
|
||||
iteration_count: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {"status": "ok", "service": "CouncilOS API"}
|
||||
|
||||
|
||||
@router.post("/councils/run", response_model=CouncilRunResponse, status_code=202)
|
||||
async def start_council_run(
|
||||
request: CouncilRunRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
"""
|
||||
Start a new council run.
|
||||
|
||||
The run executes asynchronously in the background. Poll
|
||||
GET /api/councils/run/{run_id} for the result, or connect to the
|
||||
WebSocket at /ws/council/{run_id} for real-time updates.
|
||||
"""
|
||||
run_id = str(uuid.uuid4())
|
||||
|
||||
# Register the run as pending in the in-memory store
|
||||
run_store.create(run_id, request.input_topic)
|
||||
|
||||
# Schedule the graph execution as a background task
|
||||
background_tasks.add_task(_execute_run, run_id, request.input_topic)
|
||||
|
||||
return CouncilRunResponse(
|
||||
run_id=run_id,
|
||||
status="pending",
|
||||
message=f"Council run started. Connect to /ws/council/{run_id} for live updates.",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/councils/run/{run_id}", response_model=CouncilResultResponse)
|
||||
async def get_council_result(run_id: str):
|
||||
"""
|
||||
Retrieve the current status or final result of a council run.
|
||||
"""
|
||||
run = run_store.get(run_id)
|
||||
if run is None:
|
||||
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found.")
|
||||
|
||||
return CouncilResultResponse(
|
||||
run_id=run_id,
|
||||
status=run["status"],
|
||||
final_draft=run.get("final_draft"),
|
||||
critic_score=run.get("critic_score"),
|
||||
iteration_count=run.get("iteration_count"),
|
||||
error=run.get("error"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _execute_run(run_id: str, input_topic: str) -> None:
|
||||
"""
|
||||
Background task that runs the LangGraph council and updates the run store.
|
||||
"""
|
||||
run_store.update(run_id, {"status": "running"})
|
||||
try:
|
||||
final_state = await run_council_async(
|
||||
input_topic=input_topic,
|
||||
run_id=run_id,
|
||||
on_node_event=lambda nid, node: run_store.update(
|
||||
nid, {"active_node": node}
|
||||
),
|
||||
)
|
||||
run_store.update(
|
||||
run_id,
|
||||
{
|
||||
"status": "completed",
|
||||
"final_draft": final_state.get("current_draft"),
|
||||
"critic_score": final_state.get("critic_score"),
|
||||
"iteration_count": final_state.get("iteration_count"),
|
||||
"active_node": "done",
|
||||
},
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
run_store.update(run_id, {"status": "failed", "error": str(exc)})
|
||||
47
backend/api/run_store.py
Normal file
47
backend/api/run_store.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
"""
|
||||
In-memory run store for Phase 1.
|
||||
|
||||
Tracks the status and results of council runs by run_id. This is intentionally
|
||||
simple for Phase 1. Phase 3+ will replace this with a PostgreSQL-backed store.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
import threading
|
||||
|
||||
|
||||
class RunStore:
|
||||
"""Thread-safe in-memory store for council run state."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._store: Dict[str, Dict[str, Any]] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def create(self, run_id: str, input_topic: str) -> None:
|
||||
with self._lock:
|
||||
self._store[run_id] = {
|
||||
"run_id": run_id,
|
||||
"input_topic": input_topic,
|
||||
"status": "pending",
|
||||
"final_draft": None,
|
||||
"critic_score": None,
|
||||
"iteration_count": None,
|
||||
"active_node": None,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
def get(self, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
return self._store.get(run_id)
|
||||
|
||||
def update(self, run_id: str, updates: Dict[str, Any]) -> None:
|
||||
with self._lock:
|
||||
if run_id in self._store:
|
||||
self._store[run_id].update(updates)
|
||||
|
||||
def delete(self, run_id: str) -> None:
|
||||
with self._lock:
|
||||
self._store.pop(run_id, None)
|
||||
|
||||
|
||||
# Singleton instance shared across the application
|
||||
run_store = RunStore()
|
||||
128
backend/api/websocket.py
Normal file
128
backend/api/websocket.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"""
|
||||
WebSocket endpoint for real-time agent status updates.
|
||||
|
||||
Clients connect to /ws/council/{run_id} and receive JSON events whenever
|
||||
an agent node becomes active. This powers the live diagram pulsing in the frontend.
|
||||
|
||||
Event format:
|
||||
{"event": "node_start", "run_id": "...", "node": "master_agent", "iteration": 2}
|
||||
{"event": "node_complete", "run_id": "...", "node": "critic_agent", "score": 6.5}
|
||||
{"event": "run_complete", "run_id": "...", "final_draft": "..."}
|
||||
{"event": "run_failed", "run_id": "...", "error": "..."}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
||||
|
||||
from api.run_store import run_store
|
||||
|
||||
|
||||
ws_router = APIRouter()
|
||||
|
||||
# Active WebSocket connections keyed by run_id
|
||||
_connections: dict[str, list[WebSocket]] = {}
|
||||
|
||||
|
||||
async def broadcast_event(run_id: str, event: dict) -> None:
|
||||
"""
|
||||
Send an event to all WebSocket clients subscribed to a run_id.
|
||||
|
||||
Args:
|
||||
run_id: The council run identifier.
|
||||
event: The event dict to serialize and broadcast.
|
||||
"""
|
||||
clients = _connections.get(run_id, [])
|
||||
disconnected = []
|
||||
|
||||
for ws in clients:
|
||||
try:
|
||||
await ws.send_text(json.dumps(event))
|
||||
except Exception: # noqa: BLE001
|
||||
disconnected.append(ws)
|
||||
|
||||
# Clean up dead connections
|
||||
for ws in disconnected:
|
||||
clients.remove(ws)
|
||||
|
||||
|
||||
@ws_router.websocket("/ws/council/{run_id}")
|
||||
async def council_websocket(websocket: WebSocket, run_id: str):
|
||||
"""
|
||||
WebSocket endpoint for live council run updates.
|
||||
|
||||
On connect: sends the current run status immediately.
|
||||
While running: polls the run store and pushes status changes.
|
||||
On complete/failed: sends a final event and closes the connection.
|
||||
"""
|
||||
await websocket.accept()
|
||||
|
||||
# Register this client
|
||||
if run_id not in _connections:
|
||||
_connections[run_id] = []
|
||||
_connections[run_id].append(websocket)
|
||||
|
||||
try:
|
||||
# Send current state immediately on connect
|
||||
run = run_store.get(run_id)
|
||||
if run is None:
|
||||
await websocket.send_text(
|
||||
json.dumps({"event": "error", "message": f"Run '{run_id}' not found."})
|
||||
)
|
||||
return
|
||||
|
||||
await websocket.send_text(
|
||||
json.dumps({"event": "connected", "run_id": run_id, "status": run["status"]})
|
||||
)
|
||||
|
||||
# Poll for status changes and push updates
|
||||
last_node = None
|
||||
while True:
|
||||
run = run_store.get(run_id)
|
||||
if run is None:
|
||||
break
|
||||
|
||||
current_node = run.get("active_node")
|
||||
if current_node and current_node != last_node:
|
||||
await websocket.send_text(
|
||||
json.dumps({
|
||||
"event": "node_active",
|
||||
"run_id": run_id,
|
||||
"node": current_node,
|
||||
"iteration": run.get("iteration_count"),
|
||||
})
|
||||
)
|
||||
last_node = current_node
|
||||
|
||||
if run["status"] == "completed":
|
||||
await websocket.send_text(
|
||||
json.dumps({
|
||||
"event": "run_complete",
|
||||
"run_id": run_id,
|
||||
"final_draft": run.get("final_draft"),
|
||||
"critic_score": run.get("critic_score"),
|
||||
"iteration_count": run.get("iteration_count"),
|
||||
})
|
||||
)
|
||||
break
|
||||
|
||||
if run["status"] == "failed":
|
||||
await websocket.send_text(
|
||||
json.dumps({
|
||||
"event": "run_failed",
|
||||
"run_id": run_id,
|
||||
"error": run.get("error"),
|
||||
})
|
||||
)
|
||||
break
|
||||
|
||||
await asyncio.sleep(0.5) # 500ms polling interval
|
||||
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
if run_id in _connections:
|
||||
try:
|
||||
_connections[run_id].remove(websocket)
|
||||
except ValueError:
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue