Implement Phase 1: LangGraph backend MVP

Sets up the full backend foundation for CouncilOS:

- CouncilState TypedDict with all required fields and LangGraph reducers
- Three agent nodes: master_agent (drafts), critic_agent (scores + routes),
  writer_agent (final polish)
- LangGraph graph with cyclic rework loop: Master → Critic → (score < 8:
  back to Master | score ≥ 8: Writer → END)
- Safety valve: MAX_ITERATIONS=5 prevents infinite loops
- FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id})
  and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events
- In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3)
- pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints
- .env.example, .gitignore, docker-compose.yml, Dockerfile

https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
This commit is contained in:
Claude 2026-02-20 16:33:39 +00:00
parent 34dcfb3dcd
commit 797f02c74d
No known key found for this signature in database
24 changed files with 1472 additions and 0 deletions

View file

@ -0,0 +1 @@
"""Pytest test suite for CouncilOS backend."""

99
backend/tests/test_api.py Normal file
View file

@ -0,0 +1,99 @@
"""
Integration tests for the FastAPI REST endpoints.
Uses httpx.AsyncClient with the TestClient pattern no real LLM calls.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import pytest
from unittest.mock import AsyncMock, patch
from fastapi.testclient import TestClient
from main import app
from api.run_store import run_store
@pytest.fixture(autouse=True)
def clean_run_store():
"""Reset the run store before each test."""
run_store._store.clear()
yield
run_store._store.clear()
client = TestClient(app)
class TestHealthEndpoint:
def test_health_check_returns_ok(self):
response = client.get("/api/health")
assert response.status_code == 200
assert response.json()["status"] == "ok"
class TestStartCouncilRun:
def test_start_run_returns_202_with_run_id(self):
with patch("api.routes._execute_run", new_callable=AsyncMock):
response = client.post(
"/api/councils/run",
json={"input_topic": "Erkläre maschinelles Lernen"},
)
assert response.status_code == 202
data = response.json()
assert "run_id" in data
assert data["status"] == "pending"
assert len(data["run_id"]) == 36 # UUID format
def test_start_run_rejects_empty_topic(self):
response = client.post("/api/councils/run", json={"input_topic": ""})
assert response.status_code == 422 # Pydantic validation error
def test_start_run_rejects_missing_topic(self):
response = client.post("/api/councils/run", json={})
assert response.status_code == 422
class TestGetCouncilResult:
def test_get_pending_run(self):
run_store.create("test-run-id", "Test topic")
response = client.get("/api/councils/run/test-run-id")
assert response.status_code == 200
data = response.json()
assert data["run_id"] == "test-run-id"
assert data["status"] == "pending"
def test_get_completed_run(self):
run_store.create("completed-run", "Topic")
run_store.update("completed-run", {
"status": "completed",
"final_draft": "Final polished document.",
"critic_score": 9.0,
"iteration_count": 2,
})
response = client.get("/api/councils/run/completed-run")
assert response.status_code == 200
data = response.json()
assert data["status"] == "completed"
assert data["final_draft"] == "Final polished document."
assert data["critic_score"] == 9.0
assert data["iteration_count"] == 2
def test_get_nonexistent_run_returns_404(self):
response = client.get("/api/councils/run/does-not-exist")
assert response.status_code == 404
def test_get_failed_run(self):
run_store.create("failed-run", "Topic")
run_store.update("failed-run", {
"status": "failed",
"error": "API connection timeout",
})
response = client.get("/api/councils/run/failed-run")
assert response.status_code == 200
data = response.json()
assert data["status"] == "failed"
assert "timeout" in data["error"]

View file

@ -0,0 +1,211 @@
"""
Tests for the LangGraph routing logic.
All LLM calls are mocked no real API calls are made in these tests.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import pytest
from unittest.mock import patch, MagicMock
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
from services.graph_builder import route_after_critic, create_initial_state
class TestRouteAfterCritic:
"""Unit tests for the conditional edge routing function."""
def _make_state(self, route_decision: str, iteration_count: int = 1) -> CouncilState:
state = create_initial_state("test topic", "test-run")
state["route_decision"] = route_decision
state["iteration_count"] = iteration_count
return state
def test_approve_routes_to_writer(self):
state = self._make_state("approve")
assert route_after_critic(state) == "writer_agent"
def test_rework_routes_to_master(self):
state = self._make_state("rework")
assert route_after_critic(state) == "master_agent"
def test_empty_decision_defaults_to_rework(self):
state = self._make_state("")
assert route_after_critic(state) == "master_agent"
def test_unknown_decision_defaults_to_rework(self):
state = self._make_state("unknown_value")
assert route_after_critic(state) == "master_agent"
class TestCriticAgentParsing:
"""Unit tests for the critic agent's response parser."""
def test_parse_valid_approve_response(self):
from agents.critic_agent import _parse_critic_response
content = "SCORE: 9\nVERDICT: approve\nFEEDBACK:\nExcellent work."
score, verdict, feedback = _parse_critic_response(content)
assert score == 9.0
assert verdict == "approve"
assert "Excellent" in feedback
def test_parse_valid_rework_response(self):
from agents.critic_agent import _parse_critic_response
content = "SCORE: 5\nVERDICT: rework\nFEEDBACK:\nNeeds more detail."
score, verdict, feedback = _parse_critic_response(content)
assert score == 5.0
assert verdict == "rework"
assert "detail" in feedback
def test_parse_score_clamped_to_0_10(self):
from agents.critic_agent import _parse_critic_response
content = "SCORE: 15\nVERDICT: approve\nFEEDBACK:\nToo high score."
score, verdict, feedback = _parse_critic_response(content)
assert score == 10.0
def test_parse_missing_score_defaults_to_0(self):
from agents.critic_agent import _parse_critic_response
content = "No structured response at all."
score, verdict, feedback = _parse_critic_response(content)
assert score == 0.0
assert verdict == "rework"
def test_threshold_boundary_exactly_8_approves(self):
from agents.critic_agent import _parse_critic_response
content = f"SCORE: {APPROVAL_THRESHOLD}\nVERDICT: approve\nFEEDBACK:\nGood."
score, verdict, _ = _parse_critic_response(content)
assert score == APPROVAL_THRESHOLD
assert verdict == "approve"
class TestMasterAgentPromptBuilding:
"""Unit tests for the master agent's prompt construction."""
def test_first_iteration_prompt_has_no_feedback_block(self):
from agents.master_agent import _build_master_prompt
state = create_initial_state("Test topic", "run-1")
prompt = _build_master_prompt(state)
assert "Test topic" in prompt
assert "feedback" not in prompt.lower() or "Feedback" not in prompt
def test_rework_prompt_includes_feedback(self):
from agents.master_agent import _build_master_prompt
state = create_initial_state("Test topic", "run-1")
state["current_draft"] = "My draft"
state["feedback_history"] = ["Score: 5/10\nNeeds more structure."]
prompt = _build_master_prompt(state)
assert "My draft" in prompt
assert "Needs more structure" in prompt
def test_rework_prompt_includes_all_feedback_rounds(self):
from agents.master_agent import _build_master_prompt
state = create_initial_state("Topic", "run-2")
state["current_draft"] = "Draft v2"
state["feedback_history"] = ["First feedback", "Second feedback"]
prompt = _build_master_prompt(state)
assert "First feedback" in prompt
assert "Second feedback" in prompt
assert "2 round" in prompt
class TestCriticSafetyValve:
"""Tests for the MAX_ITERATIONS safety valve in the critic agent."""
def test_safety_valve_forces_approve_at_max_iterations(self):
from agents.critic_agent import critic_agent_node
state = create_initial_state("topic", "run-safety")
state["iteration_count"] = MAX_ITERATIONS
state["current_draft"] = "Some draft"
result = critic_agent_node(state)
assert result["route_decision"] == "approve"
assert result["critic_score"] == APPROVAL_THRESHOLD
def test_safety_valve_not_triggered_below_max(self):
"""Below MAX_ITERATIONS the real LLM call would happen — mock it."""
from agents.critic_agent import critic_agent_node
mock_response = MagicMock()
mock_response.content = "SCORE: 4\nVERDICT: rework\nFEEDBACK:\nNeeds work."
with patch("agents.critic_agent.ChatAnthropic") as MockLLM:
MockLLM.return_value.invoke.return_value = mock_response
state = create_initial_state("topic", "run-below-max")
state["iteration_count"] = MAX_ITERATIONS - 1
state["current_draft"] = "Draft"
result = critic_agent_node(state)
assert result["route_decision"] == "rework"
assert result["critic_score"] == 4.0
class TestMasterAgentNode:
"""Integration-style tests for master_agent_node with mocked LLM."""
def test_master_agent_returns_draft(self):
from agents.master_agent import master_agent_node
mock_response = MagicMock()
mock_response.content = "This is a generated draft about AI."
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
MockLLM.return_value.invoke.return_value = mock_response
state = create_initial_state("AI basics", "run-master-1")
result = master_agent_node(state)
assert result["current_draft"] == "This is a generated draft about AI."
assert result["active_node"] == "master_agent"
assert result["iteration_count"] == 1
def test_master_agent_increments_iteration_count(self):
from agents.master_agent import master_agent_node
mock_response = MagicMock()
mock_response.content = "Draft"
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
MockLLM.return_value.invoke.return_value = mock_response
state = create_initial_state("topic", "run-master-2")
state["iteration_count"] = 3
result = master_agent_node(state)
assert result["iteration_count"] == 4
class TestWriterAgentNode:
"""Tests for writer_agent_node with mocked LLM."""
def test_writer_returns_polished_draft(self):
from agents.writer_agent import writer_agent_node
mock_response = MagicMock()
mock_response.content = "Polished and professional document."
with patch("agents.writer_agent.ChatAnthropic") as MockLLM:
MockLLM.return_value.invoke.return_value = mock_response
state = create_initial_state("Machine Learning", "run-writer-1")
state["current_draft"] = "Raw draft content"
result = writer_agent_node(state)
assert result["current_draft"] == "Polished and professional document."
assert result["active_node"] == "writer_agent"
assert result["route_decision"] == "done"

View file

@ -0,0 +1,55 @@
"""Tests for the in-memory RunStore."""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from api.run_store import RunStore
class TestRunStore:
def setup_method(self):
self.store = RunStore()
def test_create_and_get(self):
self.store.create("run-1", "Test topic")
run = self.store.get("run-1")
assert run is not None
assert run["run_id"] == "run-1"
assert run["input_topic"] == "Test topic"
assert run["status"] == "pending"
def test_get_nonexistent_returns_none(self):
assert self.store.get("nonexistent") is None
def test_update_status(self):
self.store.create("run-2", "Topic")
self.store.update("run-2", {"status": "running"})
assert self.store.get("run-2")["status"] == "running"
def test_update_nonexistent_is_noop(self):
"""Updating a non-existent run should not raise."""
self.store.update("ghost-run", {"status": "running"})
def test_delete(self):
self.store.create("run-3", "Topic")
self.store.delete("run-3")
assert self.store.get("run-3") is None
def test_delete_nonexistent_is_noop(self):
self.store.delete("ghost-run")
def test_update_partial_fields(self):
self.store.create("run-4", "Topic")
self.store.update("run-4", {"status": "completed", "final_draft": "Result text"})
run = self.store.get("run-4")
assert run["status"] == "completed"
assert run["final_draft"] == "Result text"
assert run["input_topic"] == "Topic" # original field preserved
def test_multiple_runs_independent(self):
self.store.create("run-a", "Topic A")
self.store.create("run-b", "Topic B")
self.store.update("run-a", {"status": "running"})
assert self.store.get("run-b")["status"] == "pending"

View file

@ -0,0 +1,44 @@
"""Tests for CouncilState structure and graph_builder helpers."""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
from services.graph_builder import create_initial_state
class TestCouncilState:
def test_initial_state_fields(self):
state = create_initial_state("Test topic", "run-001")
assert state["input_topic"] == "Test topic"
assert state["current_draft"] == ""
assert state["feedback_history"] == []
assert state["route_decision"] == ""
assert state["messages"] == []
assert state["iteration_count"] == 0
assert state["critic_score"] is None
assert state["run_id"] == "run-001"
assert state["active_node"] == ""
def test_approval_threshold_value(self):
assert APPROVAL_THRESHOLD == 8.0
def test_max_iterations_value(self):
assert MAX_ITERATIONS == 5
def test_state_is_typed_dict(self):
"""CouncilState should be instantiable as a plain dict."""
state: CouncilState = {
"input_topic": "AI",
"current_draft": "draft",
"feedback_history": ["fb1"],
"route_decision": "rework",
"messages": [],
"iteration_count": 1,
"critic_score": 6.0,
"run_id": "x",
"active_node": "critic_agent",
}
assert state["critic_score"] == 6.0