Implement Phase 1: LangGraph backend MVP

Sets up the full backend foundation for CouncilOS:

- CouncilState TypedDict with all required fields and LangGraph reducers
- Three agent nodes: master_agent (drafts), critic_agent (scores + routes),
  writer_agent (final polish)
- LangGraph graph with cyclic rework loop: Master → Critic → (score < 8:
  back to Master | score ≥ 8: Writer → END)
- Safety valve: MAX_ITERATIONS=5 prevents infinite loops
- FastAPI app with REST endpoints (POST /api/councils/run, GET /api/councils/run/{id})
  and WebSocket endpoint (/ws/council/{run_id}) for real-time agent status events
- In-memory RunStore for Phase 1 (PostgreSQL-backed in Phase 3)
- pytest test suite: state, routing logic, critic parser, agent nodes, API endpoints
- .env.example, .gitignore, docker-compose.yml, Dockerfile

https://claude.ai/code/session_01RfMpt3TbMjZEtK3CAyP5iQ
This commit is contained in:
Claude 2026-02-20 16:33:39 +00:00
parent 34dcfb3dcd
commit 797f02c74d
No known key found for this signature in database
24 changed files with 1472 additions and 0 deletions

View file

@ -0,0 +1,127 @@
"""
Critic Agent Node evaluates the current draft and decides whether to approve or rework.
The critic scores the draft from 010 and returns structured feedback.
If the score meets APPROVAL_THRESHOLD, route_decision is set to "approve".
Otherwise it is set to "rework" and the feedback is appended to feedback_history.
"""
import os
import re
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import HumanMessage, SystemMessage
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
_SYSTEM_PROMPT = """You are the Critic AI in a council of expert AIs.
Your job is to rigorously evaluate the quality of a draft document.
You must respond in EXACTLY this format no deviations:
SCORE: <integer 0-10>
VERDICT: <"approve" if score >= 8, otherwise "rework">
FEEDBACK:
<detailed, actionable feedback explaining what must be improved>
Scoring criteria:
- 03: Poor structure, major factual gaps, incoherent
- 46: Adequate but needs significant improvement
- 7: Good but has notable weaknesses
- 89: High quality, minor improvements possible
- 10: Exceptional, publication-ready
Be strict. Only award 8+ if the document genuinely meets high quality standards."""
def _parse_critic_response(content: str) -> tuple[float, str, str]:
"""
Parse the structured critic response.
Returns:
(score, verdict, feedback) tuple.
Falls back to ("rework", full content) on parse failure.
"""
score_match = re.search(r"SCORE:\s*(\d+(?:\.\d+)?)", content)
verdict_match = re.search(r"VERDICT:\s*(approve|rework)", content, re.IGNORECASE)
feedback_match = re.search(r"FEEDBACK:\s*(.*)", content, re.DOTALL)
score = float(score_match.group(1)) if score_match else 0.0
verdict = verdict_match.group(1).lower() if verdict_match else "rework"
feedback = feedback_match.group(1).strip() if feedback_match else content.strip()
# Clamp score to 010
score = max(0.0, min(10.0, score))
return score, verdict, feedback
def critic_agent_node(state: CouncilState) -> dict:
"""
LangGraph node function for the Critic Agent.
Reads current_draft from state, evaluates it, and returns:
- route_decision: "approve" or "rework"
- critic_score: numeric score
- feedback_history: appended with new feedback (if rework)
- active_node: "critic_agent"
Safety valve: if iteration_count >= MAX_ITERATIONS, force approval
to prevent infinite loops.
Args:
state: The current CouncilState.
Returns:
A dict with updated state fields.
"""
# Safety valve: prevent infinite loops
if state.get("iteration_count", 0) >= MAX_ITERATIONS:
return {
"route_decision": "approve",
"critic_score": APPROVAL_THRESHOLD,
"feedback_history": [
f"[Auto-approved after {MAX_ITERATIONS} iterations]"
],
"messages": [],
"active_node": "critic_agent",
}
llm = ChatAnthropic(
model="claude-3-5-sonnet-20241022",
api_key=os.environ.get("ANTHROPIC_API_KEY"),
temperature=0.2, # Low temperature for consistent evaluation
max_tokens=1024,
)
system_msg = SystemMessage(content=_SYSTEM_PROMPT)
user_msg = HumanMessage(
content=(
f"Please evaluate this draft on the topic '{state['input_topic']}':\n\n"
f"{state['current_draft']}"
)
)
response = llm.invoke([system_msg, user_msg])
score, verdict, feedback = _parse_critic_response(response.content)
# Override verdict based on threshold to ensure consistency
if score >= APPROVAL_THRESHOLD:
route_decision = "approve"
else:
route_decision = "rework"
result: dict = {
"critic_score": score,
"route_decision": route_decision,
"messages": [system_msg, user_msg, response],
"active_node": "critic_agent",
}
# Only append feedback if we're sending back for rework
if route_decision == "rework":
result["feedback_history"] = [
f"Score: {score}/10\n{feedback}"
]
return result