Implement Phase 1: LangGraph backend MVP
Sets up the full backend foundation for CouncilOS:
- CouncilState TypedDict with all required fields and LangGraph reducers
- Three agent nodes: master_agent (drafts), critic_agent (scores + routes),
writer_agent (final polish)
- LangGraph graph with cyclic rework loop: Master → Critic → (score < 8:
back to Master | score ≥ 8: Writer → END)
- Safety valve: MAX_ITERATIONS=5 prevents infinite loops
- FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id})
and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events
- In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3)
- pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints
- .env.example, .gitignore, docker-compose.yml, Dockerfile
https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
This commit is contained in:
parent
34dcfb3dcd
commit
797f02c74d
24 changed files with 1472 additions and 0 deletions
1
backend/tests/__init__.py
Normal file
1
backend/tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Pytest test suite for CouncilOS backend."""
|
||||
99
backend/tests/test_api.py
Normal file
99
backend/tests/test_api.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
"""
|
||||
Integration tests for the FastAPI REST endpoints.
|
||||
|
||||
Uses httpx.AsyncClient with the TestClient pattern — no real LLM calls.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from main import app
|
||||
from api.run_store import run_store
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_run_store():
|
||||
"""Reset the run store before each test."""
|
||||
run_store._store.clear()
|
||||
yield
|
||||
run_store._store.clear()
|
||||
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
def test_health_check_returns_ok(self):
|
||||
response = client.get("/api/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
|
||||
class TestStartCouncilRun:
|
||||
def test_start_run_returns_202_with_run_id(self):
|
||||
with patch("api.routes._execute_run", new_callable=AsyncMock):
|
||||
response = client.post(
|
||||
"/api/councils/run",
|
||||
json={"input_topic": "Erkläre maschinelles Lernen"},
|
||||
)
|
||||
assert response.status_code == 202
|
||||
data = response.json()
|
||||
assert "run_id" in data
|
||||
assert data["status"] == "pending"
|
||||
assert len(data["run_id"]) == 36 # UUID format
|
||||
|
||||
def test_start_run_rejects_empty_topic(self):
|
||||
response = client.post("/api/councils/run", json={"input_topic": ""})
|
||||
assert response.status_code == 422 # Pydantic validation error
|
||||
|
||||
def test_start_run_rejects_missing_topic(self):
|
||||
response = client.post("/api/councils/run", json={})
|
||||
assert response.status_code == 422
|
||||
|
||||
|
||||
class TestGetCouncilResult:
|
||||
def test_get_pending_run(self):
|
||||
run_store.create("test-run-id", "Test topic")
|
||||
response = client.get("/api/councils/run/test-run-id")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["run_id"] == "test-run-id"
|
||||
assert data["status"] == "pending"
|
||||
|
||||
def test_get_completed_run(self):
|
||||
run_store.create("completed-run", "Topic")
|
||||
run_store.update("completed-run", {
|
||||
"status": "completed",
|
||||
"final_draft": "Final polished document.",
|
||||
"critic_score": 9.0,
|
||||
"iteration_count": 2,
|
||||
})
|
||||
response = client.get("/api/councils/run/completed-run")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "completed"
|
||||
assert data["final_draft"] == "Final polished document."
|
||||
assert data["critic_score"] == 9.0
|
||||
assert data["iteration_count"] == 2
|
||||
|
||||
def test_get_nonexistent_run_returns_404(self):
|
||||
response = client.get("/api/councils/run/does-not-exist")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_get_failed_run(self):
|
||||
run_store.create("failed-run", "Topic")
|
||||
run_store.update("failed-run", {
|
||||
"status": "failed",
|
||||
"error": "API connection timeout",
|
||||
})
|
||||
response = client.get("/api/councils/run/failed-run")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "failed"
|
||||
assert "timeout" in data["error"]
|
||||
211
backend/tests/test_routing.py
Normal file
211
backend/tests/test_routing.py
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
"""
|
||||
Tests for the LangGraph routing logic.
|
||||
|
||||
All LLM calls are mocked — no real API calls are made in these tests.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||||
from services.graph_builder import route_after_critic, create_initial_state
|
||||
|
||||
|
||||
class TestRouteAfterCritic:
|
||||
"""Unit tests for the conditional edge routing function."""
|
||||
|
||||
def _make_state(self, route_decision: str, iteration_count: int = 1) -> CouncilState:
|
||||
state = create_initial_state("test topic", "test-run")
|
||||
state["route_decision"] = route_decision
|
||||
state["iteration_count"] = iteration_count
|
||||
return state
|
||||
|
||||
def test_approve_routes_to_writer(self):
|
||||
state = self._make_state("approve")
|
||||
assert route_after_critic(state) == "writer_agent"
|
||||
|
||||
def test_rework_routes_to_master(self):
|
||||
state = self._make_state("rework")
|
||||
assert route_after_critic(state) == "master_agent"
|
||||
|
||||
def test_empty_decision_defaults_to_rework(self):
|
||||
state = self._make_state("")
|
||||
assert route_after_critic(state) == "master_agent"
|
||||
|
||||
def test_unknown_decision_defaults_to_rework(self):
|
||||
state = self._make_state("unknown_value")
|
||||
assert route_after_critic(state) == "master_agent"
|
||||
|
||||
|
||||
class TestCriticAgentParsing:
|
||||
"""Unit tests for the critic agent's response parser."""
|
||||
|
||||
def test_parse_valid_approve_response(self):
|
||||
from agents.critic_agent import _parse_critic_response
|
||||
|
||||
content = "SCORE: 9\nVERDICT: approve\nFEEDBACK:\nExcellent work."
|
||||
score, verdict, feedback = _parse_critic_response(content)
|
||||
assert score == 9.0
|
||||
assert verdict == "approve"
|
||||
assert "Excellent" in feedback
|
||||
|
||||
def test_parse_valid_rework_response(self):
|
||||
from agents.critic_agent import _parse_critic_response
|
||||
|
||||
content = "SCORE: 5\nVERDICT: rework\nFEEDBACK:\nNeeds more detail."
|
||||
score, verdict, feedback = _parse_critic_response(content)
|
||||
assert score == 5.0
|
||||
assert verdict == "rework"
|
||||
assert "detail" in feedback
|
||||
|
||||
def test_parse_score_clamped_to_0_10(self):
|
||||
from agents.critic_agent import _parse_critic_response
|
||||
|
||||
content = "SCORE: 15\nVERDICT: approve\nFEEDBACK:\nToo high score."
|
||||
score, verdict, feedback = _parse_critic_response(content)
|
||||
assert score == 10.0
|
||||
|
||||
def test_parse_missing_score_defaults_to_0(self):
|
||||
from agents.critic_agent import _parse_critic_response
|
||||
|
||||
content = "No structured response at all."
|
||||
score, verdict, feedback = _parse_critic_response(content)
|
||||
assert score == 0.0
|
||||
assert verdict == "rework"
|
||||
|
||||
def test_threshold_boundary_exactly_8_approves(self):
|
||||
from agents.critic_agent import _parse_critic_response
|
||||
|
||||
content = f"SCORE: {APPROVAL_THRESHOLD}\nVERDICT: approve\nFEEDBACK:\nGood."
|
||||
score, verdict, _ = _parse_critic_response(content)
|
||||
assert score == APPROVAL_THRESHOLD
|
||||
assert verdict == "approve"
|
||||
|
||||
|
||||
class TestMasterAgentPromptBuilding:
|
||||
"""Unit tests for the master agent's prompt construction."""
|
||||
|
||||
def test_first_iteration_prompt_has_no_feedback_block(self):
|
||||
from agents.master_agent import _build_master_prompt
|
||||
|
||||
state = create_initial_state("Test topic", "run-1")
|
||||
prompt = _build_master_prompt(state)
|
||||
assert "Test topic" in prompt
|
||||
assert "feedback" not in prompt.lower() or "Feedback" not in prompt
|
||||
|
||||
def test_rework_prompt_includes_feedback(self):
|
||||
from agents.master_agent import _build_master_prompt
|
||||
|
||||
state = create_initial_state("Test topic", "run-1")
|
||||
state["current_draft"] = "My draft"
|
||||
state["feedback_history"] = ["Score: 5/10\nNeeds more structure."]
|
||||
prompt = _build_master_prompt(state)
|
||||
assert "My draft" in prompt
|
||||
assert "Needs more structure" in prompt
|
||||
|
||||
def test_rework_prompt_includes_all_feedback_rounds(self):
|
||||
from agents.master_agent import _build_master_prompt
|
||||
|
||||
state = create_initial_state("Topic", "run-2")
|
||||
state["current_draft"] = "Draft v2"
|
||||
state["feedback_history"] = ["First feedback", "Second feedback"]
|
||||
prompt = _build_master_prompt(state)
|
||||
assert "First feedback" in prompt
|
||||
assert "Second feedback" in prompt
|
||||
assert "2 round" in prompt
|
||||
|
||||
|
||||
class TestCriticSafetyValve:
|
||||
"""Tests for the MAX_ITERATIONS safety valve in the critic agent."""
|
||||
|
||||
def test_safety_valve_forces_approve_at_max_iterations(self):
|
||||
from agents.critic_agent import critic_agent_node
|
||||
|
||||
state = create_initial_state("topic", "run-safety")
|
||||
state["iteration_count"] = MAX_ITERATIONS
|
||||
state["current_draft"] = "Some draft"
|
||||
|
||||
result = critic_agent_node(state)
|
||||
|
||||
assert result["route_decision"] == "approve"
|
||||
assert result["critic_score"] == APPROVAL_THRESHOLD
|
||||
|
||||
def test_safety_valve_not_triggered_below_max(self):
|
||||
"""Below MAX_ITERATIONS the real LLM call would happen — mock it."""
|
||||
from agents.critic_agent import critic_agent_node
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "SCORE: 4\nVERDICT: rework\nFEEDBACK:\nNeeds work."
|
||||
|
||||
with patch("agents.critic_agent.ChatAnthropic") as MockLLM:
|
||||
MockLLM.return_value.invoke.return_value = mock_response
|
||||
|
||||
state = create_initial_state("topic", "run-below-max")
|
||||
state["iteration_count"] = MAX_ITERATIONS - 1
|
||||
state["current_draft"] = "Draft"
|
||||
|
||||
result = critic_agent_node(state)
|
||||
|
||||
assert result["route_decision"] == "rework"
|
||||
assert result["critic_score"] == 4.0
|
||||
|
||||
|
||||
class TestMasterAgentNode:
|
||||
"""Integration-style tests for master_agent_node with mocked LLM."""
|
||||
|
||||
def test_master_agent_returns_draft(self):
|
||||
from agents.master_agent import master_agent_node
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "This is a generated draft about AI."
|
||||
|
||||
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
|
||||
MockLLM.return_value.invoke.return_value = mock_response
|
||||
|
||||
state = create_initial_state("AI basics", "run-master-1")
|
||||
result = master_agent_node(state)
|
||||
|
||||
assert result["current_draft"] == "This is a generated draft about AI."
|
||||
assert result["active_node"] == "master_agent"
|
||||
assert result["iteration_count"] == 1
|
||||
|
||||
def test_master_agent_increments_iteration_count(self):
|
||||
from agents.master_agent import master_agent_node
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "Draft"
|
||||
|
||||
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
|
||||
MockLLM.return_value.invoke.return_value = mock_response
|
||||
|
||||
state = create_initial_state("topic", "run-master-2")
|
||||
state["iteration_count"] = 3
|
||||
result = master_agent_node(state)
|
||||
|
||||
assert result["iteration_count"] == 4
|
||||
|
||||
|
||||
class TestWriterAgentNode:
|
||||
"""Tests for writer_agent_node with mocked LLM."""
|
||||
|
||||
def test_writer_returns_polished_draft(self):
|
||||
from agents.writer_agent import writer_agent_node
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "Polished and professional document."
|
||||
|
||||
with patch("agents.writer_agent.ChatAnthropic") as MockLLM:
|
||||
MockLLM.return_value.invoke.return_value = mock_response
|
||||
|
||||
state = create_initial_state("Machine Learning", "run-writer-1")
|
||||
state["current_draft"] = "Raw draft content"
|
||||
result = writer_agent_node(state)
|
||||
|
||||
assert result["current_draft"] == "Polished and professional document."
|
||||
assert result["active_node"] == "writer_agent"
|
||||
assert result["route_decision"] == "done"
|
||||
55
backend/tests/test_run_store.py
Normal file
55
backend/tests/test_run_store.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
"""Tests for the in-memory RunStore."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from api.run_store import RunStore
|
||||
|
||||
|
||||
class TestRunStore:
|
||||
def setup_method(self):
|
||||
self.store = RunStore()
|
||||
|
||||
def test_create_and_get(self):
|
||||
self.store.create("run-1", "Test topic")
|
||||
run = self.store.get("run-1")
|
||||
assert run is not None
|
||||
assert run["run_id"] == "run-1"
|
||||
assert run["input_topic"] == "Test topic"
|
||||
assert run["status"] == "pending"
|
||||
|
||||
def test_get_nonexistent_returns_none(self):
|
||||
assert self.store.get("nonexistent") is None
|
||||
|
||||
def test_update_status(self):
|
||||
self.store.create("run-2", "Topic")
|
||||
self.store.update("run-2", {"status": "running"})
|
||||
assert self.store.get("run-2")["status"] == "running"
|
||||
|
||||
def test_update_nonexistent_is_noop(self):
|
||||
"""Updating a non-existent run should not raise."""
|
||||
self.store.update("ghost-run", {"status": "running"})
|
||||
|
||||
def test_delete(self):
|
||||
self.store.create("run-3", "Topic")
|
||||
self.store.delete("run-3")
|
||||
assert self.store.get("run-3") is None
|
||||
|
||||
def test_delete_nonexistent_is_noop(self):
|
||||
self.store.delete("ghost-run")
|
||||
|
||||
def test_update_partial_fields(self):
|
||||
self.store.create("run-4", "Topic")
|
||||
self.store.update("run-4", {"status": "completed", "final_draft": "Result text"})
|
||||
run = self.store.get("run-4")
|
||||
assert run["status"] == "completed"
|
||||
assert run["final_draft"] == "Result text"
|
||||
assert run["input_topic"] == "Topic" # original field preserved
|
||||
|
||||
def test_multiple_runs_independent(self):
|
||||
self.store.create("run-a", "Topic A")
|
||||
self.store.create("run-b", "Topic B")
|
||||
self.store.update("run-a", {"status": "running"})
|
||||
assert self.store.get("run-b")["status"] == "pending"
|
||||
44
backend/tests/test_state.py
Normal file
44
backend/tests/test_state.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
"""Tests for CouncilState structure and graph_builder helpers."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||||
from services.graph_builder import create_initial_state
|
||||
|
||||
|
||||
class TestCouncilState:
|
||||
def test_initial_state_fields(self):
|
||||
state = create_initial_state("Test topic", "run-001")
|
||||
assert state["input_topic"] == "Test topic"
|
||||
assert state["current_draft"] == ""
|
||||
assert state["feedback_history"] == []
|
||||
assert state["route_decision"] == ""
|
||||
assert state["messages"] == []
|
||||
assert state["iteration_count"] == 0
|
||||
assert state["critic_score"] is None
|
||||
assert state["run_id"] == "run-001"
|
||||
assert state["active_node"] == ""
|
||||
|
||||
def test_approval_threshold_value(self):
|
||||
assert APPROVAL_THRESHOLD == 8.0
|
||||
|
||||
def test_max_iterations_value(self):
|
||||
assert MAX_ITERATIONS == 5
|
||||
|
||||
def test_state_is_typed_dict(self):
|
||||
"""CouncilState should be instantiable as a plain dict."""
|
||||
state: CouncilState = {
|
||||
"input_topic": "AI",
|
||||
"current_draft": "draft",
|
||||
"feedback_history": ["fb1"],
|
||||
"route_decision": "rework",
|
||||
"messages": [],
|
||||
"iteration_count": 1,
|
||||
"critic_score": 6.0,
|
||||
"run_id": "x",
|
||||
"active_node": "critic_agent",
|
||||
}
|
||||
assert state["critic_score"] == 6.0
|
||||
Loading…
Add table
Add a link
Reference in a new issue