Implement Phase 1: LangGraph backend MVP

Sets up the full backend foundation for CouncilOS: - CouncilState TypedDict with all required fields and LangGraph reducers - Three agent nodes: master_agent (drafts), critic_agent (scores + routes), writer_agent (final polish) - LangGraph graph with cyclic rework loop: Master → Critic → (score < 8: back to Master | score ≥ 8: Writer → END) - Safety valve: MAX_ITERATIONS=5 prevents infinite loops - FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id}) and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events - In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3) - pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints - .env.example, .gitignore, docker-compose.yml, Dockerfile https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
2026-02-20 16:33:39 +00:00 · 2026-02-20 16:33:39 +00:00 · 797f02c74d
commit 797f02c74d
parent 34dcfb3dcd
24 changed files with 1472 additions and 0 deletions
--- a/backend/tests/init.py
+++ b/backend/tests/init.py
@ -0,0 +1 @@
+"""Pytest test suite for CouncilOS backend."""
--- a/backend/tests/test_api.py
+++ b/backend/tests/test_api.py
@ -0,0 +1,99 @@
+"""
+Integration tests for the FastAPI REST endpoints.
+
+Uses httpx.AsyncClient with the TestClient pattern — no real LLM calls.
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import pytest
+from unittest.mock import AsyncMock, patch
+from fastapi.testclient import TestClient
+
+from main import app
+from api.run_store import run_store
+
+
+@pytest.fixture(autouse=True)
+def clean_run_store():
+    """Reset the run store before each test."""
+    run_store._store.clear()
+    yield
+    run_store._store.clear()
+
+
+client = TestClient(app)
+
+
+class TestHealthEndpoint:
+    def test_health_check_returns_ok(self):
+        response = client.get("/api/health")
+        assert response.status_code == 200
+        assert response.json()["status"] == "ok"
+
+
+class TestStartCouncilRun:
+    def test_start_run_returns_202_with_run_id(self):
+        with patch("api.routes._execute_run", new_callable=AsyncMock):
+            response = client.post(
+                "/api/councils/run",
+                json={"input_topic": "Erkläre maschinelles Lernen"},
+            )
+        assert response.status_code == 202
+        data = response.json()
+        assert "run_id" in data
+        assert data["status"] == "pending"
+        assert len(data["run_id"]) == 36  # UUID format
+
+    def test_start_run_rejects_empty_topic(self):
+        response = client.post("/api/councils/run", json={"input_topic": ""})
+        assert response.status_code == 422  # Pydantic validation error
+
+    def test_start_run_rejects_missing_topic(self):
+        response = client.post("/api/councils/run", json={})
+        assert response.status_code == 422
+
+
+class TestGetCouncilResult:
+    def test_get_pending_run(self):
+        run_store.create("test-run-id", "Test topic")
+        response = client.get("/api/councils/run/test-run-id")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["run_id"] == "test-run-id"
+        assert data["status"] == "pending"
+
+    def test_get_completed_run(self):
+        run_store.create("completed-run", "Topic")
+        run_store.update("completed-run", {
+            "status": "completed",
+            "final_draft": "Final polished document.",
+            "critic_score": 9.0,
+            "iteration_count": 2,
+        })
+        response = client.get("/api/councils/run/completed-run")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "completed"
+        assert data["final_draft"] == "Final polished document."
+        assert data["critic_score"] == 9.0
+        assert data["iteration_count"] == 2
+
+    def test_get_nonexistent_run_returns_404(self):
+        response = client.get("/api/councils/run/does-not-exist")
+        assert response.status_code == 404
+
+    def test_get_failed_run(self):
+        run_store.create("failed-run", "Topic")
+        run_store.update("failed-run", {
+            "status": "failed",
+            "error": "API connection timeout",
+        })
+        response = client.get("/api/councils/run/failed-run")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "failed"
+        assert "timeout" in data["error"]
--- a/backend/tests/test_routing.py
+++ b/backend/tests/test_routing.py
@ -0,0 +1,211 @@
+"""
+Tests for the LangGraph routing logic.
+
+All LLM calls are mocked — no real API calls are made in these tests.
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
+from services.graph_builder import route_after_critic, create_initial_state
+
+
+class TestRouteAfterCritic:
+    """Unit tests for the conditional edge routing function."""
+
+    def _make_state(self, route_decision: str, iteration_count: int = 1) -> CouncilState:
+        state = create_initial_state("test topic", "test-run")
+        state["route_decision"] = route_decision
+        state["iteration_count"] = iteration_count
+        return state
+
+    def test_approve_routes_to_writer(self):
+        state = self._make_state("approve")
+        assert route_after_critic(state) == "writer_agent"
+
+    def test_rework_routes_to_master(self):
+        state = self._make_state("rework")
+        assert route_after_critic(state) == "master_agent"
+
+    def test_empty_decision_defaults_to_rework(self):
+        state = self._make_state("")
+        assert route_after_critic(state) == "master_agent"
+
+    def test_unknown_decision_defaults_to_rework(self):
+        state = self._make_state("unknown_value")
+        assert route_after_critic(state) == "master_agent"
+
+
+class TestCriticAgentParsing:
+    """Unit tests for the critic agent's response parser."""
+
+    def test_parse_valid_approve_response(self):
+        from agents.critic_agent import _parse_critic_response
+
+        content = "SCORE: 9\nVERDICT: approve\nFEEDBACK:\nExcellent work."
+        score, verdict, feedback = _parse_critic_response(content)
+        assert score == 9.0
+        assert verdict == "approve"
+        assert "Excellent" in feedback
+
+    def test_parse_valid_rework_response(self):
+        from agents.critic_agent import _parse_critic_response
+
+        content = "SCORE: 5\nVERDICT: rework\nFEEDBACK:\nNeeds more detail."
+        score, verdict, feedback = _parse_critic_response(content)
+        assert score == 5.0
+        assert verdict == "rework"
+        assert "detail" in feedback
+
+    def test_parse_score_clamped_to_0_10(self):
+        from agents.critic_agent import _parse_critic_response
+
+        content = "SCORE: 15\nVERDICT: approve\nFEEDBACK:\nToo high score."
+        score, verdict, feedback = _parse_critic_response(content)
+        assert score == 10.0
+
+    def test_parse_missing_score_defaults_to_0(self):
+        from agents.critic_agent import _parse_critic_response
+
+        content = "No structured response at all."
+        score, verdict, feedback = _parse_critic_response(content)
+        assert score == 0.0
+        assert verdict == "rework"
+
+    def test_threshold_boundary_exactly_8_approves(self):
+        from agents.critic_agent import _parse_critic_response
+
+        content = f"SCORE: {APPROVAL_THRESHOLD}\nVERDICT: approve\nFEEDBACK:\nGood."
+        score, verdict, _ = _parse_critic_response(content)
+        assert score == APPROVAL_THRESHOLD
+        assert verdict == "approve"
+
+
+class TestMasterAgentPromptBuilding:
+    """Unit tests for the master agent's prompt construction."""
+
+    def test_first_iteration_prompt_has_no_feedback_block(self):
+        from agents.master_agent import _build_master_prompt
+
+        state = create_initial_state("Test topic", "run-1")
+        prompt = _build_master_prompt(state)
+        assert "Test topic" in prompt
+        assert "feedback" not in prompt.lower() or "Feedback" not in prompt
+
+    def test_rework_prompt_includes_feedback(self):
+        from agents.master_agent import _build_master_prompt
+
+        state = create_initial_state("Test topic", "run-1")
+        state["current_draft"] = "My draft"
+        state["feedback_history"] = ["Score: 5/10\nNeeds more structure."]
+        prompt = _build_master_prompt(state)
+        assert "My draft" in prompt
+        assert "Needs more structure" in prompt
+
+    def test_rework_prompt_includes_all_feedback_rounds(self):
+        from agents.master_agent import _build_master_prompt
+
+        state = create_initial_state("Topic", "run-2")
+        state["current_draft"] = "Draft v2"
+        state["feedback_history"] = ["First feedback", "Second feedback"]
+        prompt = _build_master_prompt(state)
+        assert "First feedback" in prompt
+        assert "Second feedback" in prompt
+        assert "2 round" in prompt
+
+
+class TestCriticSafetyValve:
+    """Tests for the MAX_ITERATIONS safety valve in the critic agent."""
+
+    def test_safety_valve_forces_approve_at_max_iterations(self):
+        from agents.critic_agent import critic_agent_node
+
+        state = create_initial_state("topic", "run-safety")
+        state["iteration_count"] = MAX_ITERATIONS
+        state["current_draft"] = "Some draft"
+
+        result = critic_agent_node(state)
+
+        assert result["route_decision"] == "approve"
+        assert result["critic_score"] == APPROVAL_THRESHOLD
+
+    def test_safety_valve_not_triggered_below_max(self):
+        """Below MAX_ITERATIONS the real LLM call would happen — mock it."""
+        from agents.critic_agent import critic_agent_node
+
+        mock_response = MagicMock()
+        mock_response.content = "SCORE: 4\nVERDICT: rework\nFEEDBACK:\nNeeds work."
+
+        with patch("agents.critic_agent.ChatAnthropic") as MockLLM:
+            MockLLM.return_value.invoke.return_value = mock_response
+
+            state = create_initial_state("topic", "run-below-max")
+            state["iteration_count"] = MAX_ITERATIONS - 1
+            state["current_draft"] = "Draft"
+
+            result = critic_agent_node(state)
+
+        assert result["route_decision"] == "rework"
+        assert result["critic_score"] == 4.0
+
+
+class TestMasterAgentNode:
+    """Integration-style tests for master_agent_node with mocked LLM."""
+
+    def test_master_agent_returns_draft(self):
+        from agents.master_agent import master_agent_node
+
+        mock_response = MagicMock()
+        mock_response.content = "This is a generated draft about AI."
+
+        with patch("agents.master_agent.ChatAnthropic") as MockLLM:
+            MockLLM.return_value.invoke.return_value = mock_response
+
+            state = create_initial_state("AI basics", "run-master-1")
+            result = master_agent_node(state)
+
+        assert result["current_draft"] == "This is a generated draft about AI."
+        assert result["active_node"] == "master_agent"
+        assert result["iteration_count"] == 1
+
+    def test_master_agent_increments_iteration_count(self):
+        from agents.master_agent import master_agent_node
+
+        mock_response = MagicMock()
+        mock_response.content = "Draft"
+
+        with patch("agents.master_agent.ChatAnthropic") as MockLLM:
+            MockLLM.return_value.invoke.return_value = mock_response
+
+            state = create_initial_state("topic", "run-master-2")
+            state["iteration_count"] = 3
+            result = master_agent_node(state)
+
+        assert result["iteration_count"] == 4
+
+
+class TestWriterAgentNode:
+    """Tests for writer_agent_node with mocked LLM."""
+
+    def test_writer_returns_polished_draft(self):
+        from agents.writer_agent import writer_agent_node
+
+        mock_response = MagicMock()
+        mock_response.content = "Polished and professional document."
+
+        with patch("agents.writer_agent.ChatAnthropic") as MockLLM:
+            MockLLM.return_value.invoke.return_value = mock_response
+
+            state = create_initial_state("Machine Learning", "run-writer-1")
+            state["current_draft"] = "Raw draft content"
+            result = writer_agent_node(state)
+
+        assert result["current_draft"] == "Polished and professional document."
+        assert result["active_node"] == "writer_agent"
+        assert result["route_decision"] == "done"
--- a/backend/tests/test_run_store.py
+++ b/backend/tests/test_run_store.py
@ -0,0 +1,55 @@
+"""Tests for the in-memory RunStore."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from api.run_store import RunStore
+
+
+class TestRunStore:
+    def setup_method(self):
+        self.store = RunStore()
+
+    def test_create_and_get(self):
+        self.store.create("run-1", "Test topic")
+        run = self.store.get("run-1")
+        assert run is not None
+        assert run["run_id"] == "run-1"
+        assert run["input_topic"] == "Test topic"
+        assert run["status"] == "pending"
+
+    def test_get_nonexistent_returns_none(self):
+        assert self.store.get("nonexistent") is None
+
+    def test_update_status(self):
+        self.store.create("run-2", "Topic")
+        self.store.update("run-2", {"status": "running"})
+        assert self.store.get("run-2")["status"] == "running"
+
+    def test_update_nonexistent_is_noop(self):
+        """Updating a non-existent run should not raise."""
+        self.store.update("ghost-run", {"status": "running"})
+
+    def test_delete(self):
+        self.store.create("run-3", "Topic")
+        self.store.delete("run-3")
+        assert self.store.get("run-3") is None
+
+    def test_delete_nonexistent_is_noop(self):
+        self.store.delete("ghost-run")
+
+    def test_update_partial_fields(self):
+        self.store.create("run-4", "Topic")
+        self.store.update("run-4", {"status": "completed", "final_draft": "Result text"})
+        run = self.store.get("run-4")
+        assert run["status"] == "completed"
+        assert run["final_draft"] == "Result text"
+        assert run["input_topic"] == "Topic"  # original field preserved
+
+    def test_multiple_runs_independent(self):
+        self.store.create("run-a", "Topic A")
+        self.store.create("run-b", "Topic B")
+        self.store.update("run-a", {"status": "running"})
+        assert self.store.get("run-b")["status"] == "pending"
--- a/backend/tests/test_state.py
+++ b/backend/tests/test_state.py
@ -0,0 +1,44 @@
+"""Tests for CouncilState structure and graph_builder helpers."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
+from services.graph_builder import create_initial_state
+
+
+class TestCouncilState:
+    def test_initial_state_fields(self):
+        state = create_initial_state("Test topic", "run-001")
+        assert state["input_topic"] == "Test topic"
+        assert state["current_draft"] == ""
+        assert state["feedback_history"] == []
+        assert state["route_decision"] == ""
+        assert state["messages"] == []
+        assert state["iteration_count"] == 0
+        assert state["critic_score"] is None
+        assert state["run_id"] == "run-001"
+        assert state["active_node"] == ""
+
+    def test_approval_threshold_value(self):
+        assert APPROVAL_THRESHOLD == 8.0
+
+    def test_max_iterations_value(self):
+        assert MAX_ITERATIONS == 5
+
+    def test_state_is_typed_dict(self):
+        """CouncilState should be instantiable as a plain dict."""
+        state: CouncilState = {
+            "input_topic": "AI",
+            "current_draft": "draft",
+            "feedback_history": ["fb1"],
+            "route_decision": "rework",
+            "messages": [],
+            "iteration_count": 1,
+            "critic_score": 6.0,
+            "run_id": "x",
+            "active_node": "critic_agent",
+        }
+        assert state["critic_score"] == 6.0
				`@ -0,0 +1 @@`
				`"""Pytest test suite for CouncilOS backend."""`