Merge pull request #4 from Kenearos/claude/start-implementation-2XWxH
Implement Phase 1: LangGraph backend MVP
This commit is contained in:
commit
06aec41a8a
24 changed files with 1472 additions and 0 deletions
45
.env.example
Normal file
45
.env.example
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
# CouncilOS — Environment Variables Template
|
||||||
|
# Copy this file to .env and fill in your actual values.
|
||||||
|
# NEVER commit the .env file to version control.
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LLM API Keys
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Anthropic Claude API key (required)
|
||||||
|
ANTHROPIC_API_KEY=
|
||||||
|
|
||||||
|
# OpenAI GPT-4o API key (optional for Phase 1, required from Phase 3)
|
||||||
|
OPENAI_API_KEY=
|
||||||
|
|
||||||
|
# Tavily Search API key (required for Phase 4 web-search tool)
|
||||||
|
TAVILY_API_KEY=
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Database
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# PostgreSQL connection string (required from Phase 2)
|
||||||
|
DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/councilOS
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Vector Database (ChromaDB)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Local directory to persist ChromaDB embeddings (required for Phase 4 PDF tool)
|
||||||
|
CHROMA_PERSIST_DIR=./chroma_db
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Application Settings
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# FastAPI server host and port
|
||||||
|
HOST=0.0.0.0
|
||||||
|
PORT=8000
|
||||||
|
|
||||||
|
# Log level: DEBUG | INFO | WARNING | ERROR
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# CORS: comma-separated list of allowed frontend origins in production
|
||||||
|
# Example: https://my-app.vercel.app,https://www.my-domain.com
|
||||||
|
CORS_ORIGINS=http://localhost:3000
|
||||||
72
.gitignore
vendored
Normal file
72
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
# =============================================================================
|
||||||
|
# Environment & Secrets — NEVER commit these
|
||||||
|
# =============================================================================
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
*.pem
|
||||||
|
*.key
|
||||||
|
secrets/
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Python
|
||||||
|
# =============================================================================
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.eggs/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
htmlcov/
|
||||||
|
.coverage
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Node / Frontend
|
||||||
|
# =============================================================================
|
||||||
|
node_modules/
|
||||||
|
.next/
|
||||||
|
out/
|
||||||
|
.nuxt/
|
||||||
|
dist/
|
||||||
|
.cache/
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Database & Vector Store
|
||||||
|
# =============================================================================
|
||||||
|
chroma_db/
|
||||||
|
*.sqlite3
|
||||||
|
*.db
|
||||||
|
postgres_data/
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# IDE & OS
|
||||||
|
# =============================================================================
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Docker
|
||||||
|
# =============================================================================
|
||||||
|
.docker/
|
||||||
19
backend/Dockerfile
Normal file
19
backend/Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
7
backend/agents/__init__.py
Normal file
7
backend/agents/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
"""Agent node functions for CouncilOS."""
|
||||||
|
|
||||||
|
from .master_agent import master_agent_node
|
||||||
|
from .critic_agent import critic_agent_node
|
||||||
|
from .writer_agent import writer_agent_node
|
||||||
|
|
||||||
|
__all__ = ["master_agent_node", "critic_agent_node", "writer_agent_node"]
|
||||||
127
backend/agents/critic_agent.py
Normal file
127
backend/agents/critic_agent.py
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
"""
|
||||||
|
Critic Agent Node — evaluates the current draft and decides whether to approve or rework.
|
||||||
|
|
||||||
|
The critic scores the draft from 0–10 and returns structured feedback.
|
||||||
|
If the score meets APPROVAL_THRESHOLD, route_decision is set to "approve".
|
||||||
|
Otherwise it is set to "rework" and the feedback is appended to feedback_history.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from langchain_anthropic import ChatAnthropic
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||||||
|
|
||||||
|
|
||||||
|
_SYSTEM_PROMPT = """You are the Critic AI in a council of expert AIs.
|
||||||
|
Your job is to rigorously evaluate the quality of a draft document.
|
||||||
|
|
||||||
|
You must respond in EXACTLY this format — no deviations:
|
||||||
|
|
||||||
|
SCORE: <integer 0-10>
|
||||||
|
VERDICT: <"approve" if score >= 8, otherwise "rework">
|
||||||
|
FEEDBACK:
|
||||||
|
<detailed, actionable feedback explaining what must be improved>
|
||||||
|
|
||||||
|
Scoring criteria:
|
||||||
|
- 0–3: Poor structure, major factual gaps, incoherent
|
||||||
|
- 4–6: Adequate but needs significant improvement
|
||||||
|
- 7: Good but has notable weaknesses
|
||||||
|
- 8–9: High quality, minor improvements possible
|
||||||
|
- 10: Exceptional, publication-ready
|
||||||
|
|
||||||
|
Be strict. Only award 8+ if the document genuinely meets high quality standards."""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_critic_response(content: str) -> tuple[float, str, str]:
|
||||||
|
"""
|
||||||
|
Parse the structured critic response.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(score, verdict, feedback) tuple.
|
||||||
|
Falls back to ("rework", full content) on parse failure.
|
||||||
|
"""
|
||||||
|
score_match = re.search(r"SCORE:\s*(\d+(?:\.\d+)?)", content)
|
||||||
|
verdict_match = re.search(r"VERDICT:\s*(approve|rework)", content, re.IGNORECASE)
|
||||||
|
feedback_match = re.search(r"FEEDBACK:\s*(.*)", content, re.DOTALL)
|
||||||
|
|
||||||
|
score = float(score_match.group(1)) if score_match else 0.0
|
||||||
|
verdict = verdict_match.group(1).lower() if verdict_match else "rework"
|
||||||
|
feedback = feedback_match.group(1).strip() if feedback_match else content.strip()
|
||||||
|
|
||||||
|
# Clamp score to 0–10
|
||||||
|
score = max(0.0, min(10.0, score))
|
||||||
|
|
||||||
|
return score, verdict, feedback
|
||||||
|
|
||||||
|
|
||||||
|
def critic_agent_node(state: CouncilState) -> dict:
|
||||||
|
"""
|
||||||
|
LangGraph node function for the Critic Agent.
|
||||||
|
|
||||||
|
Reads current_draft from state, evaluates it, and returns:
|
||||||
|
- route_decision: "approve" or "rework"
|
||||||
|
- critic_score: numeric score
|
||||||
|
- feedback_history: appended with new feedback (if rework)
|
||||||
|
- active_node: "critic_agent"
|
||||||
|
|
||||||
|
Safety valve: if iteration_count >= MAX_ITERATIONS, force approval
|
||||||
|
to prevent infinite loops.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state: The current CouncilState.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict with updated state fields.
|
||||||
|
"""
|
||||||
|
# Safety valve: prevent infinite loops
|
||||||
|
if state.get("iteration_count", 0) >= MAX_ITERATIONS:
|
||||||
|
return {
|
||||||
|
"route_decision": "approve",
|
||||||
|
"critic_score": APPROVAL_THRESHOLD,
|
||||||
|
"feedback_history": [
|
||||||
|
f"[Auto-approved after {MAX_ITERATIONS} iterations]"
|
||||||
|
],
|
||||||
|
"messages": [],
|
||||||
|
"active_node": "critic_agent",
|
||||||
|
}
|
||||||
|
|
||||||
|
llm = ChatAnthropic(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
api_key=os.environ.get("ANTHROPIC_API_KEY"),
|
||||||
|
temperature=0.2, # Low temperature for consistent evaluation
|
||||||
|
max_tokens=1024,
|
||||||
|
)
|
||||||
|
|
||||||
|
system_msg = SystemMessage(content=_SYSTEM_PROMPT)
|
||||||
|
user_msg = HumanMessage(
|
||||||
|
content=(
|
||||||
|
f"Please evaluate this draft on the topic '{state['input_topic']}':\n\n"
|
||||||
|
f"{state['current_draft']}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = llm.invoke([system_msg, user_msg])
|
||||||
|
score, verdict, feedback = _parse_critic_response(response.content)
|
||||||
|
|
||||||
|
# Override verdict based on threshold to ensure consistency
|
||||||
|
if score >= APPROVAL_THRESHOLD:
|
||||||
|
route_decision = "approve"
|
||||||
|
else:
|
||||||
|
route_decision = "rework"
|
||||||
|
|
||||||
|
result: dict = {
|
||||||
|
"critic_score": score,
|
||||||
|
"route_decision": route_decision,
|
||||||
|
"messages": [system_msg, user_msg, response],
|
||||||
|
"active_node": "critic_agent",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only append feedback if we're sending back for rework
|
||||||
|
if route_decision == "rework":
|
||||||
|
result["feedback_history"] = [
|
||||||
|
f"Score: {score}/10\n{feedback}"
|
||||||
|
]
|
||||||
|
|
||||||
|
return result
|
||||||
75
backend/agents/master_agent.py
Normal file
75
backend/agents/master_agent.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
"""
|
||||||
|
Master Agent Node — creates and refines drafts based on critic feedback.
|
||||||
|
|
||||||
|
This agent is the primary content creator. On the first iteration it produces
|
||||||
|
an initial draft. On subsequent iterations it incorporates all feedback from
|
||||||
|
the feedback_history to improve the draft.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from langchain_anthropic import ChatAnthropic
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
from state import CouncilState
|
||||||
|
|
||||||
|
|
||||||
|
_SYSTEM_PROMPT = """You are the Master AI in a council of expert AIs.
|
||||||
|
Your job is to write high-quality content on the given topic.
|
||||||
|
When you receive critic feedback, carefully incorporate ALL feedback points
|
||||||
|
and produce an improved draft. Be thorough and precise."""
|
||||||
|
|
||||||
|
|
||||||
|
def _build_master_prompt(state: CouncilState) -> str:
|
||||||
|
"""Build the user-facing prompt for the master agent based on current state."""
|
||||||
|
if not state["feedback_history"]:
|
||||||
|
return (
|
||||||
|
f"Please write a comprehensive, well-structured document on the following topic:\n\n"
|
||||||
|
f"{state['input_topic']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
feedback_block = "\n\n---\n".join(
|
||||||
|
f"Feedback round {i + 1}:\n{fb}"
|
||||||
|
for i, fb in enumerate(state["feedback_history"])
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
f"Topic: {state['input_topic']}\n\n"
|
||||||
|
f"Your current draft:\n{state['current_draft']}\n\n"
|
||||||
|
f"The critic has provided the following feedback across {len(state['feedback_history'])} round(s):\n\n"
|
||||||
|
f"{feedback_block}\n\n"
|
||||||
|
f"Please produce an improved draft that fully addresses ALL feedback points above."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def master_agent_node(state: CouncilState) -> dict:
|
||||||
|
"""
|
||||||
|
LangGraph node function for the Master Agent.
|
||||||
|
|
||||||
|
Reads input_topic and feedback_history from state, calls the LLM,
|
||||||
|
and returns an updated current_draft.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state: The current CouncilState.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict with updated state fields: current_draft, messages, active_node.
|
||||||
|
"""
|
||||||
|
llm = ChatAnthropic(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
api_key=os.environ.get("ANTHROPIC_API_KEY"),
|
||||||
|
temperature=0.7,
|
||||||
|
max_tokens=2048,
|
||||||
|
)
|
||||||
|
|
||||||
|
system_msg = SystemMessage(content=_SYSTEM_PROMPT)
|
||||||
|
user_msg = HumanMessage(content=_build_master_prompt(state))
|
||||||
|
|
||||||
|
response = llm.invoke([system_msg, user_msg])
|
||||||
|
draft = response.content
|
||||||
|
|
||||||
|
return {
|
||||||
|
"current_draft": draft,
|
||||||
|
"messages": [system_msg, user_msg, response],
|
||||||
|
"active_node": "master_agent",
|
||||||
|
"iteration_count": state.get("iteration_count", 0) + 1,
|
||||||
|
}
|
||||||
63
backend/agents/writer_agent.py
Normal file
63
backend/agents/writer_agent.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
"""
|
||||||
|
Writer Agent Node — final polishing of an approved draft.
|
||||||
|
|
||||||
|
This agent receives a critic-approved draft and produces the final,
|
||||||
|
publication-ready version with polished formatting and language.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from langchain_anthropic import ChatAnthropic
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
from state import CouncilState
|
||||||
|
|
||||||
|
|
||||||
|
_SYSTEM_PROMPT = """You are the Writer AI in a council of expert AIs.
|
||||||
|
You receive a draft that has already been approved for quality by the Critic AI.
|
||||||
|
Your job is to give it a final professional polish:
|
||||||
|
|
||||||
|
- Improve sentence flow and readability
|
||||||
|
- Ensure consistent formatting (headers, bullet points, paragraphs)
|
||||||
|
- Fix any grammatical or stylistic issues
|
||||||
|
- Do NOT change the factual content or overall structure
|
||||||
|
- Preserve all key information from the draft
|
||||||
|
|
||||||
|
Return only the polished document — no meta-commentary."""
|
||||||
|
|
||||||
|
|
||||||
|
def writer_agent_node(state: CouncilState) -> dict:
|
||||||
|
"""
|
||||||
|
LangGraph node function for the Writer Agent.
|
||||||
|
|
||||||
|
Receives the approved current_draft and returns a polished final version.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state: The current CouncilState.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict with the final polished current_draft and updated messages.
|
||||||
|
"""
|
||||||
|
llm = ChatAnthropic(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
api_key=os.environ.get("ANTHROPIC_API_KEY"),
|
||||||
|
temperature=0.4,
|
||||||
|
max_tokens=4096,
|
||||||
|
)
|
||||||
|
|
||||||
|
system_msg = SystemMessage(content=_SYSTEM_PROMPT)
|
||||||
|
user_msg = HumanMessage(
|
||||||
|
content=(
|
||||||
|
f"Please give a final professional polish to this approved document "
|
||||||
|
f"on the topic '{state['input_topic']}':\n\n"
|
||||||
|
f"{state['current_draft']}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = llm.invoke([system_msg, user_msg])
|
||||||
|
|
||||||
|
return {
|
||||||
|
"current_draft": response.content,
|
||||||
|
"messages": [system_msg, user_msg, response],
|
||||||
|
"active_node": "writer_agent",
|
||||||
|
"route_decision": "done",
|
||||||
|
}
|
||||||
1
backend/api/__init__.py
Normal file
1
backend/api/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
"""API route definitions for CouncilOS."""
|
||||||
135
backend/api/routes.py
Normal file
135
backend/api/routes.py
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
"""
|
||||||
|
REST API routes for CouncilOS.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
POST /api/councils/run — Start a new council run (async, returns run_id)
|
||||||
|
GET /api/councils/run/{run_id} — Poll the status/result of a run
|
||||||
|
GET /api/health — Health check
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from typing import Optional
|
||||||
|
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from services.graph_builder import run_council_async
|
||||||
|
from api.run_store import run_store
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Request / Response Models
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class CouncilRunRequest(BaseModel):
|
||||||
|
input_topic: str = Field(
|
||||||
|
...,
|
||||||
|
min_length=1,
|
||||||
|
max_length=10_000,
|
||||||
|
description="The user's prompt or document content for the council to work on.",
|
||||||
|
examples=["Erkläre die wichtigsten Konzepte des maschinellen Lernens für Einsteiger."],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CouncilRunResponse(BaseModel):
|
||||||
|
run_id: str
|
||||||
|
status: str # "pending" | "running" | "completed" | "failed"
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
class CouncilResultResponse(BaseModel):
|
||||||
|
run_id: str
|
||||||
|
status: str
|
||||||
|
final_draft: Optional[str] = None
|
||||||
|
critic_score: Optional[float] = None
|
||||||
|
iteration_count: Optional[int] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""Health check endpoint."""
|
||||||
|
return {"status": "ok", "service": "CouncilOS API"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/councils/run", response_model=CouncilRunResponse, status_code=202)
|
||||||
|
async def start_council_run(
|
||||||
|
request: CouncilRunRequest,
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Start a new council run.
|
||||||
|
|
||||||
|
The run executes asynchronously in the background. Poll
|
||||||
|
GET /api/councils/run/{run_id} for the result, or connect to the
|
||||||
|
WebSocket at /ws/council/{run_id} for real-time updates.
|
||||||
|
"""
|
||||||
|
run_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Register the run as pending in the in-memory store
|
||||||
|
run_store.create(run_id, request.input_topic)
|
||||||
|
|
||||||
|
# Schedule the graph execution as a background task
|
||||||
|
background_tasks.add_task(_execute_run, run_id, request.input_topic)
|
||||||
|
|
||||||
|
return CouncilRunResponse(
|
||||||
|
run_id=run_id,
|
||||||
|
status="pending",
|
||||||
|
message=f"Council run started. Connect to /ws/council/{run_id} for live updates.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/councils/run/{run_id}", response_model=CouncilResultResponse)
|
||||||
|
async def get_council_result(run_id: str):
|
||||||
|
"""
|
||||||
|
Retrieve the current status or final result of a council run.
|
||||||
|
"""
|
||||||
|
run = run_store.get(run_id)
|
||||||
|
if run is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found.")
|
||||||
|
|
||||||
|
return CouncilResultResponse(
|
||||||
|
run_id=run_id,
|
||||||
|
status=run["status"],
|
||||||
|
final_draft=run.get("final_draft"),
|
||||||
|
critic_score=run.get("critic_score"),
|
||||||
|
iteration_count=run.get("iteration_count"),
|
||||||
|
error=run.get("error"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _execute_run(run_id: str, input_topic: str) -> None:
|
||||||
|
"""
|
||||||
|
Background task that runs the LangGraph council and updates the run store.
|
||||||
|
"""
|
||||||
|
run_store.update(run_id, {"status": "running"})
|
||||||
|
try:
|
||||||
|
final_state = await run_council_async(
|
||||||
|
input_topic=input_topic,
|
||||||
|
run_id=run_id,
|
||||||
|
on_node_event=lambda nid, node: run_store.update(
|
||||||
|
nid, {"active_node": node}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
run_store.update(
|
||||||
|
run_id,
|
||||||
|
{
|
||||||
|
"status": "completed",
|
||||||
|
"final_draft": final_state.get("current_draft"),
|
||||||
|
"critic_score": final_state.get("critic_score"),
|
||||||
|
"iteration_count": final_state.get("iteration_count"),
|
||||||
|
"active_node": "done",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
run_store.update(run_id, {"status": "failed", "error": str(exc)})
|
||||||
47
backend/api/run_store.py
Normal file
47
backend/api/run_store.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
"""
|
||||||
|
In-memory run store for Phase 1.
|
||||||
|
|
||||||
|
Tracks the status and results of council runs by run_id. This is intentionally
|
||||||
|
simple for Phase 1. Phase 3+ will replace this with a PostgreSQL-backed store.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
|
class RunStore:
|
||||||
|
"""Thread-safe in-memory store for council run state."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._store: Dict[str, Dict[str, Any]] = {}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def create(self, run_id: str, input_topic: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._store[run_id] = {
|
||||||
|
"run_id": run_id,
|
||||||
|
"input_topic": input_topic,
|
||||||
|
"status": "pending",
|
||||||
|
"final_draft": None,
|
||||||
|
"critic_score": None,
|
||||||
|
"iteration_count": None,
|
||||||
|
"active_node": None,
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
def get(self, run_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
with self._lock:
|
||||||
|
return self._store.get(run_id)
|
||||||
|
|
||||||
|
def update(self, run_id: str, updates: Dict[str, Any]) -> None:
|
||||||
|
with self._lock:
|
||||||
|
if run_id in self._store:
|
||||||
|
self._store[run_id].update(updates)
|
||||||
|
|
||||||
|
def delete(self, run_id: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._store.pop(run_id, None)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance shared across the application
|
||||||
|
run_store = RunStore()
|
||||||
128
backend/api/websocket.py
Normal file
128
backend/api/websocket.py
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
"""
|
||||||
|
WebSocket endpoint for real-time agent status updates.
|
||||||
|
|
||||||
|
Clients connect to /ws/council/{run_id} and receive JSON events whenever
|
||||||
|
an agent node becomes active. This powers the live diagram pulsing in the frontend.
|
||||||
|
|
||||||
|
Event format:
|
||||||
|
{"event": "node_start", "run_id": "...", "node": "master_agent", "iteration": 2}
|
||||||
|
{"event": "node_complete", "run_id": "...", "node": "critic_agent", "score": 6.5}
|
||||||
|
{"event": "run_complete", "run_id": "...", "final_draft": "..."}
|
||||||
|
{"event": "run_failed", "run_id": "...", "error": "..."}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
||||||
|
|
||||||
|
from api.run_store import run_store
|
||||||
|
|
||||||
|
|
||||||
|
ws_router = APIRouter()
|
||||||
|
|
||||||
|
# Active WebSocket connections keyed by run_id
|
||||||
|
_connections: dict[str, list[WebSocket]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
async def broadcast_event(run_id: str, event: dict) -> None:
|
||||||
|
"""
|
||||||
|
Send an event to all WebSocket clients subscribed to a run_id.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
run_id: The council run identifier.
|
||||||
|
event: The event dict to serialize and broadcast.
|
||||||
|
"""
|
||||||
|
clients = _connections.get(run_id, [])
|
||||||
|
disconnected = []
|
||||||
|
|
||||||
|
for ws in clients:
|
||||||
|
try:
|
||||||
|
await ws.send_text(json.dumps(event))
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
disconnected.append(ws)
|
||||||
|
|
||||||
|
# Clean up dead connections
|
||||||
|
for ws in disconnected:
|
||||||
|
clients.remove(ws)
|
||||||
|
|
||||||
|
|
||||||
|
@ws_router.websocket("/ws/council/{run_id}")
|
||||||
|
async def council_websocket(websocket: WebSocket, run_id: str):
|
||||||
|
"""
|
||||||
|
WebSocket endpoint for live council run updates.
|
||||||
|
|
||||||
|
On connect: sends the current run status immediately.
|
||||||
|
While running: polls the run store and pushes status changes.
|
||||||
|
On complete/failed: sends a final event and closes the connection.
|
||||||
|
"""
|
||||||
|
await websocket.accept()
|
||||||
|
|
||||||
|
# Register this client
|
||||||
|
if run_id not in _connections:
|
||||||
|
_connections[run_id] = []
|
||||||
|
_connections[run_id].append(websocket)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Send current state immediately on connect
|
||||||
|
run = run_store.get(run_id)
|
||||||
|
if run is None:
|
||||||
|
await websocket.send_text(
|
||||||
|
json.dumps({"event": "error", "message": f"Run '{run_id}' not found."})
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
await websocket.send_text(
|
||||||
|
json.dumps({"event": "connected", "run_id": run_id, "status": run["status"]})
|
||||||
|
)
|
||||||
|
|
||||||
|
# Poll for status changes and push updates
|
||||||
|
last_node = None
|
||||||
|
while True:
|
||||||
|
run = run_store.get(run_id)
|
||||||
|
if run is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
current_node = run.get("active_node")
|
||||||
|
if current_node and current_node != last_node:
|
||||||
|
await websocket.send_text(
|
||||||
|
json.dumps({
|
||||||
|
"event": "node_active",
|
||||||
|
"run_id": run_id,
|
||||||
|
"node": current_node,
|
||||||
|
"iteration": run.get("iteration_count"),
|
||||||
|
})
|
||||||
|
)
|
||||||
|
last_node = current_node
|
||||||
|
|
||||||
|
if run["status"] == "completed":
|
||||||
|
await websocket.send_text(
|
||||||
|
json.dumps({
|
||||||
|
"event": "run_complete",
|
||||||
|
"run_id": run_id,
|
||||||
|
"final_draft": run.get("final_draft"),
|
||||||
|
"critic_score": run.get("critic_score"),
|
||||||
|
"iteration_count": run.get("iteration_count"),
|
||||||
|
})
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
if run["status"] == "failed":
|
||||||
|
await websocket.send_text(
|
||||||
|
json.dumps({
|
||||||
|
"event": "run_failed",
|
||||||
|
"run_id": run_id,
|
||||||
|
"error": run.get("error"),
|
||||||
|
})
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
await asyncio.sleep(0.5) # 500ms polling interval
|
||||||
|
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
if run_id in _connections:
|
||||||
|
try:
|
||||||
|
_connections[run_id].remove(websocket)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
59
backend/main.py
Normal file
59
backend/main.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
"""
|
||||||
|
CouncilOS — FastAPI application entrypoint.
|
||||||
|
|
||||||
|
Start the server:
|
||||||
|
uvicorn main:app --reload --port 8000
|
||||||
|
|
||||||
|
API Overview:
|
||||||
|
POST /api/councils/run — Start a council run
|
||||||
|
GET /api/councils/run/{run_id} — Poll run status/result
|
||||||
|
GET /api/health — Health check
|
||||||
|
WS /ws/council/{run_id} — Real-time agent status events
|
||||||
|
"""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
from api.routes import router
|
||||||
|
from api.websocket import ws_router
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan: startup and shutdown logic."""
|
||||||
|
print("CouncilOS API starting up...")
|
||||||
|
yield
|
||||||
|
print("CouncilOS API shutting down...")
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="CouncilOS API",
|
||||||
|
description=(
|
||||||
|
"Backend for the CouncilOS multi-agent AI pipeline platform. "
|
||||||
|
"Orchestrates LangGraph council runs and streams real-time agent "
|
||||||
|
"status via WebSockets."
|
||||||
|
),
|
||||||
|
version="0.1.0",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
# CORS — allow all origins in development; tighten in production
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mount REST routes under /api prefix
|
||||||
|
app.include_router(router, prefix="/api")
|
||||||
|
|
||||||
|
# Mount WebSocket routes (no prefix — path is /ws/council/{run_id})
|
||||||
|
app.include_router(ws_router)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
|
||||||
6
backend/pytest.ini
Normal file
6
backend/pytest.ini
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
[pytest]
|
||||||
|
testpaths = tests
|
||||||
|
asyncio_mode = auto
|
||||||
|
python_files = test_*.py
|
||||||
|
python_classes = Test*
|
||||||
|
python_functions = test_*
|
||||||
38
backend/requirements.txt
Normal file
38
backend/requirements.txt
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
# Core AI orchestration
|
||||||
|
langgraph>=0.2.0
|
||||||
|
langchain>=0.2.0
|
||||||
|
langchain-anthropic>=0.1.0
|
||||||
|
langchain-openai>=0.1.0
|
||||||
|
|
||||||
|
# Backend API
|
||||||
|
fastapi>=0.111.0
|
||||||
|
uvicorn[standard]>=0.30.0
|
||||||
|
websockets>=12.0
|
||||||
|
python-multipart>=0.0.9
|
||||||
|
|
||||||
|
# Database
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
sqlalchemy[asyncio]>=2.0.0
|
||||||
|
alembic>=1.13.0
|
||||||
|
|
||||||
|
# Vector DB (PDF tool)
|
||||||
|
chromadb>=0.5.0
|
||||||
|
pypdf>=4.0.0
|
||||||
|
|
||||||
|
# Search tool
|
||||||
|
tavily-python>=0.3.0
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
pydantic>=2.0.0
|
||||||
|
pydantic-settings>=2.0.0
|
||||||
|
|
||||||
|
# Linting and formatting
|
||||||
|
ruff>=0.4.0
|
||||||
|
black>=24.0.0
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
pytest>=8.0.0
|
||||||
|
pytest-asyncio>=0.23.0
|
||||||
|
pytest-mock>=3.14.0
|
||||||
|
httpx>=0.27.0
|
||||||
1
backend/services/__init__.py
Normal file
1
backend/services/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
"""Service modules for CouncilOS backend."""
|
||||||
131
backend/services/graph_builder.py
Normal file
131
backend/services/graph_builder.py
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
"""
|
||||||
|
Graph Builder — constructs the LangGraph execution graph for council runs.
|
||||||
|
|
||||||
|
Phase 1: Hard-coded test graph:
|
||||||
|
User Input → Master Agent → Critic Agent → (score < 8: back to Master | score ≥ 8: Writer Agent)
|
||||||
|
|
||||||
|
Phase 3 (future): This module will be extended to build graphs dynamically
|
||||||
|
from JSON blueprints stored in PostgreSQL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, Callable, Optional
|
||||||
|
from langgraph.graph import StateGraph, END
|
||||||
|
|
||||||
|
from state import CouncilState
|
||||||
|
from agents import master_agent_node, critic_agent_node, writer_agent_node
|
||||||
|
|
||||||
|
|
||||||
|
def route_after_critic(state: CouncilState) -> str:
|
||||||
|
"""
|
||||||
|
Conditional edge function: determines next node after the critic.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
"master_agent" if the critic wants rework.
|
||||||
|
"writer_agent" if the critic approves the draft.
|
||||||
|
"""
|
||||||
|
decision = state.get("route_decision", "rework")
|
||||||
|
if decision == "approve":
|
||||||
|
return "writer_agent"
|
||||||
|
return "master_agent"
|
||||||
|
|
||||||
|
|
||||||
|
def build_council_graph(
|
||||||
|
on_node_start: Optional[Callable[[str, str], Any]] = None,
|
||||||
|
) -> StateGraph:
|
||||||
|
"""
|
||||||
|
Build and compile the Phase 1 hard-coded council graph.
|
||||||
|
|
||||||
|
Graph topology:
|
||||||
|
master_agent → critic_agent → (conditional) → master_agent | writer_agent → END
|
||||||
|
|
||||||
|
Args:
|
||||||
|
on_node_start: Optional async callback invoked when a node begins execution.
|
||||||
|
Signature: (run_id: str, node_name: str) -> Any
|
||||||
|
Used to emit WebSocket events for real-time UI updates.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A compiled LangGraph StateGraph ready for invocation.
|
||||||
|
"""
|
||||||
|
graph = StateGraph(CouncilState)
|
||||||
|
|
||||||
|
# Register agent nodes
|
||||||
|
graph.add_node("master_agent", master_agent_node)
|
||||||
|
graph.add_node("critic_agent", critic_agent_node)
|
||||||
|
graph.add_node("writer_agent", writer_agent_node)
|
||||||
|
|
||||||
|
# Define edges
|
||||||
|
graph.set_entry_point("master_agent")
|
||||||
|
graph.add_edge("master_agent", "critic_agent")
|
||||||
|
|
||||||
|
# Conditional edge: critic decides whether to rework or approve
|
||||||
|
graph.add_conditional_edges(
|
||||||
|
"critic_agent",
|
||||||
|
route_after_critic,
|
||||||
|
{
|
||||||
|
"master_agent": "master_agent",
|
||||||
|
"writer_agent": "writer_agent",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Writer is the terminal node
|
||||||
|
graph.add_edge("writer_agent", END)
|
||||||
|
|
||||||
|
return graph.compile()
|
||||||
|
|
||||||
|
|
||||||
|
def create_initial_state(
|
||||||
|
input_topic: str,
|
||||||
|
run_id: str,
|
||||||
|
) -> CouncilState:
|
||||||
|
"""
|
||||||
|
Create a fresh CouncilState for a new council run.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_topic: The user's prompt or document content.
|
||||||
|
run_id: Unique identifier for this run (used in WebSocket events).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An initialized CouncilState dict.
|
||||||
|
"""
|
||||||
|
return CouncilState(
|
||||||
|
input_topic=input_topic,
|
||||||
|
current_draft="",
|
||||||
|
feedback_history=[],
|
||||||
|
route_decision="",
|
||||||
|
messages=[],
|
||||||
|
iteration_count=0,
|
||||||
|
critic_score=None,
|
||||||
|
run_id=run_id,
|
||||||
|
active_node="",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_council_async(
|
||||||
|
input_topic: str,
|
||||||
|
run_id: str,
|
||||||
|
on_node_event: Optional[Callable[[str, str], Any]] = None,
|
||||||
|
) -> CouncilState:
|
||||||
|
"""
|
||||||
|
Execute a full council run asynchronously.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_topic: The user's prompt.
|
||||||
|
run_id: Unique identifier for this run.
|
||||||
|
on_node_event: Optional callback for WebSocket node events.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The final CouncilState after the writer agent completes.
|
||||||
|
"""
|
||||||
|
graph = build_council_graph(on_node_start=on_node_event)
|
||||||
|
initial_state = create_initial_state(input_topic, run_id)
|
||||||
|
|
||||||
|
# LangGraph's invoke is synchronous — run it in a thread pool to avoid
|
||||||
|
# blocking the FastAPI event loop
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
final_state = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: graph.invoke(initial_state),
|
||||||
|
)
|
||||||
|
|
||||||
|
return final_state
|
||||||
47
backend/state.py
Normal file
47
backend/state.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
"""
|
||||||
|
CouncilState — the central data structure passed between all agents in LangGraph.
|
||||||
|
|
||||||
|
All agents must read from and write to this TypedDict. Agents must not store
|
||||||
|
state internally; everything passes through CouncilState.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Annotated, List, Optional
|
||||||
|
import operator
|
||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
class CouncilState(TypedDict):
|
||||||
|
"""
|
||||||
|
The global state shared across all agents in a council run.
|
||||||
|
|
||||||
|
Fields:
|
||||||
|
input_topic: The user's original prompt or uploaded PDF content.
|
||||||
|
current_draft: The document currently being worked on.
|
||||||
|
feedback_history: All critic feedback accumulated across loop iterations.
|
||||||
|
Agents append here — never overwrite.
|
||||||
|
route_decision: Routing signal used by conditional edges.
|
||||||
|
Values: "rework" | "approve" | custom strings.
|
||||||
|
messages: LLM message history (system prompts + responses).
|
||||||
|
Uses operator.add reducer so messages accumulate.
|
||||||
|
iteration_count: Tracks how many rework loops have occurred.
|
||||||
|
critic_score: The numeric score (0–10) assigned by the critic agent.
|
||||||
|
run_id: Unique identifier for this council run (for WebSocket events).
|
||||||
|
active_node: Name of the currently executing agent node (for UI updates).
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_topic: str
|
||||||
|
current_draft: str
|
||||||
|
feedback_history: Annotated[List[str], operator.add]
|
||||||
|
route_decision: str
|
||||||
|
messages: Annotated[list, operator.add]
|
||||||
|
iteration_count: int
|
||||||
|
critic_score: Optional[float]
|
||||||
|
run_id: str
|
||||||
|
active_node: str
|
||||||
|
|
||||||
|
|
||||||
|
# Approval threshold: critic score must reach this value to exit the loop
|
||||||
|
APPROVAL_THRESHOLD = 8.0
|
||||||
|
|
||||||
|
# Safety limit: maximum number of rework iterations before forcing approval
|
||||||
|
MAX_ITERATIONS = 5
|
||||||
1
backend/tests/__init__.py
Normal file
1
backend/tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
"""Pytest test suite for CouncilOS backend."""
|
||||||
99
backend/tests/test_api.py
Normal file
99
backend/tests/test_api.py
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
"""
|
||||||
|
Integration tests for the FastAPI REST endpoints.
|
||||||
|
|
||||||
|
Uses httpx.AsyncClient with the TestClient pattern — no real LLM calls.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from main import app
|
||||||
|
from api.run_store import run_store
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def clean_run_store():
|
||||||
|
"""Reset the run store before each test."""
|
||||||
|
run_store._store.clear()
|
||||||
|
yield
|
||||||
|
run_store._store.clear()
|
||||||
|
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHealthEndpoint:
|
||||||
|
def test_health_check_returns_ok(self):
|
||||||
|
response = client.get("/api/health")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["status"] == "ok"
|
||||||
|
|
||||||
|
|
||||||
|
class TestStartCouncilRun:
|
||||||
|
def test_start_run_returns_202_with_run_id(self):
|
||||||
|
with patch("api.routes._execute_run", new_callable=AsyncMock):
|
||||||
|
response = client.post(
|
||||||
|
"/api/councils/run",
|
||||||
|
json={"input_topic": "Erkläre maschinelles Lernen"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 202
|
||||||
|
data = response.json()
|
||||||
|
assert "run_id" in data
|
||||||
|
assert data["status"] == "pending"
|
||||||
|
assert len(data["run_id"]) == 36 # UUID format
|
||||||
|
|
||||||
|
def test_start_run_rejects_empty_topic(self):
|
||||||
|
response = client.post("/api/councils/run", json={"input_topic": ""})
|
||||||
|
assert response.status_code == 422 # Pydantic validation error
|
||||||
|
|
||||||
|
def test_start_run_rejects_missing_topic(self):
|
||||||
|
response = client.post("/api/councils/run", json={})
|
||||||
|
assert response.status_code == 422
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetCouncilResult:
|
||||||
|
def test_get_pending_run(self):
|
||||||
|
run_store.create("test-run-id", "Test topic")
|
||||||
|
response = client.get("/api/councils/run/test-run-id")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["run_id"] == "test-run-id"
|
||||||
|
assert data["status"] == "pending"
|
||||||
|
|
||||||
|
def test_get_completed_run(self):
|
||||||
|
run_store.create("completed-run", "Topic")
|
||||||
|
run_store.update("completed-run", {
|
||||||
|
"status": "completed",
|
||||||
|
"final_draft": "Final polished document.",
|
||||||
|
"critic_score": 9.0,
|
||||||
|
"iteration_count": 2,
|
||||||
|
})
|
||||||
|
response = client.get("/api/councils/run/completed-run")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["status"] == "completed"
|
||||||
|
assert data["final_draft"] == "Final polished document."
|
||||||
|
assert data["critic_score"] == 9.0
|
||||||
|
assert data["iteration_count"] == 2
|
||||||
|
|
||||||
|
def test_get_nonexistent_run_returns_404(self):
|
||||||
|
response = client.get("/api/councils/run/does-not-exist")
|
||||||
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
def test_get_failed_run(self):
|
||||||
|
run_store.create("failed-run", "Topic")
|
||||||
|
run_store.update("failed-run", {
|
||||||
|
"status": "failed",
|
||||||
|
"error": "API connection timeout",
|
||||||
|
})
|
||||||
|
response = client.get("/api/councils/run/failed-run")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["status"] == "failed"
|
||||||
|
assert "timeout" in data["error"]
|
||||||
211
backend/tests/test_routing.py
Normal file
211
backend/tests/test_routing.py
Normal file
|
|
@ -0,0 +1,211 @@
|
||||||
|
"""
|
||||||
|
Tests for the LangGraph routing logic.
|
||||||
|
|
||||||
|
All LLM calls are mocked — no real API calls are made in these tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||||||
|
from services.graph_builder import route_after_critic, create_initial_state
|
||||||
|
|
||||||
|
|
||||||
|
class TestRouteAfterCritic:
|
||||||
|
"""Unit tests for the conditional edge routing function."""
|
||||||
|
|
||||||
|
def _make_state(self, route_decision: str, iteration_count: int = 1) -> CouncilState:
|
||||||
|
state = create_initial_state("test topic", "test-run")
|
||||||
|
state["route_decision"] = route_decision
|
||||||
|
state["iteration_count"] = iteration_count
|
||||||
|
return state
|
||||||
|
|
||||||
|
def test_approve_routes_to_writer(self):
|
||||||
|
state = self._make_state("approve")
|
||||||
|
assert route_after_critic(state) == "writer_agent"
|
||||||
|
|
||||||
|
def test_rework_routes_to_master(self):
|
||||||
|
state = self._make_state("rework")
|
||||||
|
assert route_after_critic(state) == "master_agent"
|
||||||
|
|
||||||
|
def test_empty_decision_defaults_to_rework(self):
|
||||||
|
state = self._make_state("")
|
||||||
|
assert route_after_critic(state) == "master_agent"
|
||||||
|
|
||||||
|
def test_unknown_decision_defaults_to_rework(self):
|
||||||
|
state = self._make_state("unknown_value")
|
||||||
|
assert route_after_critic(state) == "master_agent"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCriticAgentParsing:
|
||||||
|
"""Unit tests for the critic agent's response parser."""
|
||||||
|
|
||||||
|
def test_parse_valid_approve_response(self):
|
||||||
|
from agents.critic_agent import _parse_critic_response
|
||||||
|
|
||||||
|
content = "SCORE: 9\nVERDICT: approve\nFEEDBACK:\nExcellent work."
|
||||||
|
score, verdict, feedback = _parse_critic_response(content)
|
||||||
|
assert score == 9.0
|
||||||
|
assert verdict == "approve"
|
||||||
|
assert "Excellent" in feedback
|
||||||
|
|
||||||
|
def test_parse_valid_rework_response(self):
|
||||||
|
from agents.critic_agent import _parse_critic_response
|
||||||
|
|
||||||
|
content = "SCORE: 5\nVERDICT: rework\nFEEDBACK:\nNeeds more detail."
|
||||||
|
score, verdict, feedback = _parse_critic_response(content)
|
||||||
|
assert score == 5.0
|
||||||
|
assert verdict == "rework"
|
||||||
|
assert "detail" in feedback
|
||||||
|
|
||||||
|
def test_parse_score_clamped_to_0_10(self):
|
||||||
|
from agents.critic_agent import _parse_critic_response
|
||||||
|
|
||||||
|
content = "SCORE: 15\nVERDICT: approve\nFEEDBACK:\nToo high score."
|
||||||
|
score, verdict, feedback = _parse_critic_response(content)
|
||||||
|
assert score == 10.0
|
||||||
|
|
||||||
|
def test_parse_missing_score_defaults_to_0(self):
|
||||||
|
from agents.critic_agent import _parse_critic_response
|
||||||
|
|
||||||
|
content = "No structured response at all."
|
||||||
|
score, verdict, feedback = _parse_critic_response(content)
|
||||||
|
assert score == 0.0
|
||||||
|
assert verdict == "rework"
|
||||||
|
|
||||||
|
def test_threshold_boundary_exactly_8_approves(self):
|
||||||
|
from agents.critic_agent import _parse_critic_response
|
||||||
|
|
||||||
|
content = f"SCORE: {APPROVAL_THRESHOLD}\nVERDICT: approve\nFEEDBACK:\nGood."
|
||||||
|
score, verdict, _ = _parse_critic_response(content)
|
||||||
|
assert score == APPROVAL_THRESHOLD
|
||||||
|
assert verdict == "approve"
|
||||||
|
|
||||||
|
|
||||||
|
class TestMasterAgentPromptBuilding:
|
||||||
|
"""Unit tests for the master agent's prompt construction."""
|
||||||
|
|
||||||
|
def test_first_iteration_prompt_has_no_feedback_block(self):
|
||||||
|
from agents.master_agent import _build_master_prompt
|
||||||
|
|
||||||
|
state = create_initial_state("Test topic", "run-1")
|
||||||
|
prompt = _build_master_prompt(state)
|
||||||
|
assert "Test topic" in prompt
|
||||||
|
assert "feedback" not in prompt.lower() or "Feedback" not in prompt
|
||||||
|
|
||||||
|
def test_rework_prompt_includes_feedback(self):
|
||||||
|
from agents.master_agent import _build_master_prompt
|
||||||
|
|
||||||
|
state = create_initial_state("Test topic", "run-1")
|
||||||
|
state["current_draft"] = "My draft"
|
||||||
|
state["feedback_history"] = ["Score: 5/10\nNeeds more structure."]
|
||||||
|
prompt = _build_master_prompt(state)
|
||||||
|
assert "My draft" in prompt
|
||||||
|
assert "Needs more structure" in prompt
|
||||||
|
|
||||||
|
def test_rework_prompt_includes_all_feedback_rounds(self):
|
||||||
|
from agents.master_agent import _build_master_prompt
|
||||||
|
|
||||||
|
state = create_initial_state("Topic", "run-2")
|
||||||
|
state["current_draft"] = "Draft v2"
|
||||||
|
state["feedback_history"] = ["First feedback", "Second feedback"]
|
||||||
|
prompt = _build_master_prompt(state)
|
||||||
|
assert "First feedback" in prompt
|
||||||
|
assert "Second feedback" in prompt
|
||||||
|
assert "2 round" in prompt
|
||||||
|
|
||||||
|
|
||||||
|
class TestCriticSafetyValve:
|
||||||
|
"""Tests for the MAX_ITERATIONS safety valve in the critic agent."""
|
||||||
|
|
||||||
|
def test_safety_valve_forces_approve_at_max_iterations(self):
|
||||||
|
from agents.critic_agent import critic_agent_node
|
||||||
|
|
||||||
|
state = create_initial_state("topic", "run-safety")
|
||||||
|
state["iteration_count"] = MAX_ITERATIONS
|
||||||
|
state["current_draft"] = "Some draft"
|
||||||
|
|
||||||
|
result = critic_agent_node(state)
|
||||||
|
|
||||||
|
assert result["route_decision"] == "approve"
|
||||||
|
assert result["critic_score"] == APPROVAL_THRESHOLD
|
||||||
|
|
||||||
|
def test_safety_valve_not_triggered_below_max(self):
|
||||||
|
"""Below MAX_ITERATIONS the real LLM call would happen — mock it."""
|
||||||
|
from agents.critic_agent import critic_agent_node
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = "SCORE: 4\nVERDICT: rework\nFEEDBACK:\nNeeds work."
|
||||||
|
|
||||||
|
with patch("agents.critic_agent.ChatAnthropic") as MockLLM:
|
||||||
|
MockLLM.return_value.invoke.return_value = mock_response
|
||||||
|
|
||||||
|
state = create_initial_state("topic", "run-below-max")
|
||||||
|
state["iteration_count"] = MAX_ITERATIONS - 1
|
||||||
|
state["current_draft"] = "Draft"
|
||||||
|
|
||||||
|
result = critic_agent_node(state)
|
||||||
|
|
||||||
|
assert result["route_decision"] == "rework"
|
||||||
|
assert result["critic_score"] == 4.0
|
||||||
|
|
||||||
|
|
||||||
|
class TestMasterAgentNode:
|
||||||
|
"""Integration-style tests for master_agent_node with mocked LLM."""
|
||||||
|
|
||||||
|
def test_master_agent_returns_draft(self):
|
||||||
|
from agents.master_agent import master_agent_node
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = "This is a generated draft about AI."
|
||||||
|
|
||||||
|
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
|
||||||
|
MockLLM.return_value.invoke.return_value = mock_response
|
||||||
|
|
||||||
|
state = create_initial_state("AI basics", "run-master-1")
|
||||||
|
result = master_agent_node(state)
|
||||||
|
|
||||||
|
assert result["current_draft"] == "This is a generated draft about AI."
|
||||||
|
assert result["active_node"] == "master_agent"
|
||||||
|
assert result["iteration_count"] == 1
|
||||||
|
|
||||||
|
def test_master_agent_increments_iteration_count(self):
|
||||||
|
from agents.master_agent import master_agent_node
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = "Draft"
|
||||||
|
|
||||||
|
with patch("agents.master_agent.ChatAnthropic") as MockLLM:
|
||||||
|
MockLLM.return_value.invoke.return_value = mock_response
|
||||||
|
|
||||||
|
state = create_initial_state("topic", "run-master-2")
|
||||||
|
state["iteration_count"] = 3
|
||||||
|
result = master_agent_node(state)
|
||||||
|
|
||||||
|
assert result["iteration_count"] == 4
|
||||||
|
|
||||||
|
|
||||||
|
class TestWriterAgentNode:
|
||||||
|
"""Tests for writer_agent_node with mocked LLM."""
|
||||||
|
|
||||||
|
def test_writer_returns_polished_draft(self):
|
||||||
|
from agents.writer_agent import writer_agent_node
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = "Polished and professional document."
|
||||||
|
|
||||||
|
with patch("agents.writer_agent.ChatAnthropic") as MockLLM:
|
||||||
|
MockLLM.return_value.invoke.return_value = mock_response
|
||||||
|
|
||||||
|
state = create_initial_state("Machine Learning", "run-writer-1")
|
||||||
|
state["current_draft"] = "Raw draft content"
|
||||||
|
result = writer_agent_node(state)
|
||||||
|
|
||||||
|
assert result["current_draft"] == "Polished and professional document."
|
||||||
|
assert result["active_node"] == "writer_agent"
|
||||||
|
assert result["route_decision"] == "done"
|
||||||
55
backend/tests/test_run_store.py
Normal file
55
backend/tests/test_run_store.py
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
"""Tests for the in-memory RunStore."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
from api.run_store import RunStore
|
||||||
|
|
||||||
|
|
||||||
|
class TestRunStore:
|
||||||
|
def setup_method(self):
|
||||||
|
self.store = RunStore()
|
||||||
|
|
||||||
|
def test_create_and_get(self):
|
||||||
|
self.store.create("run-1", "Test topic")
|
||||||
|
run = self.store.get("run-1")
|
||||||
|
assert run is not None
|
||||||
|
assert run["run_id"] == "run-1"
|
||||||
|
assert run["input_topic"] == "Test topic"
|
||||||
|
assert run["status"] == "pending"
|
||||||
|
|
||||||
|
def test_get_nonexistent_returns_none(self):
|
||||||
|
assert self.store.get("nonexistent") is None
|
||||||
|
|
||||||
|
def test_update_status(self):
|
||||||
|
self.store.create("run-2", "Topic")
|
||||||
|
self.store.update("run-2", {"status": "running"})
|
||||||
|
assert self.store.get("run-2")["status"] == "running"
|
||||||
|
|
||||||
|
def test_update_nonexistent_is_noop(self):
|
||||||
|
"""Updating a non-existent run should not raise."""
|
||||||
|
self.store.update("ghost-run", {"status": "running"})
|
||||||
|
|
||||||
|
def test_delete(self):
|
||||||
|
self.store.create("run-3", "Topic")
|
||||||
|
self.store.delete("run-3")
|
||||||
|
assert self.store.get("run-3") is None
|
||||||
|
|
||||||
|
def test_delete_nonexistent_is_noop(self):
|
||||||
|
self.store.delete("ghost-run")
|
||||||
|
|
||||||
|
def test_update_partial_fields(self):
|
||||||
|
self.store.create("run-4", "Topic")
|
||||||
|
self.store.update("run-4", {"status": "completed", "final_draft": "Result text"})
|
||||||
|
run = self.store.get("run-4")
|
||||||
|
assert run["status"] == "completed"
|
||||||
|
assert run["final_draft"] == "Result text"
|
||||||
|
assert run["input_topic"] == "Topic" # original field preserved
|
||||||
|
|
||||||
|
def test_multiple_runs_independent(self):
|
||||||
|
self.store.create("run-a", "Topic A")
|
||||||
|
self.store.create("run-b", "Topic B")
|
||||||
|
self.store.update("run-a", {"status": "running"})
|
||||||
|
assert self.store.get("run-b")["status"] == "pending"
|
||||||
44
backend/tests/test_state.py
Normal file
44
backend/tests/test_state.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
"""Tests for CouncilState structure and graph_builder helpers."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||||||
|
from services.graph_builder import create_initial_state
|
||||||
|
|
||||||
|
|
||||||
|
class TestCouncilState:
|
||||||
|
def test_initial_state_fields(self):
|
||||||
|
state = create_initial_state("Test topic", "run-001")
|
||||||
|
assert state["input_topic"] == "Test topic"
|
||||||
|
assert state["current_draft"] == ""
|
||||||
|
assert state["feedback_history"] == []
|
||||||
|
assert state["route_decision"] == ""
|
||||||
|
assert state["messages"] == []
|
||||||
|
assert state["iteration_count"] == 0
|
||||||
|
assert state["critic_score"] is None
|
||||||
|
assert state["run_id"] == "run-001"
|
||||||
|
assert state["active_node"] == ""
|
||||||
|
|
||||||
|
def test_approval_threshold_value(self):
|
||||||
|
assert APPROVAL_THRESHOLD == 8.0
|
||||||
|
|
||||||
|
def test_max_iterations_value(self):
|
||||||
|
assert MAX_ITERATIONS == 5
|
||||||
|
|
||||||
|
def test_state_is_typed_dict(self):
|
||||||
|
"""CouncilState should be instantiable as a plain dict."""
|
||||||
|
state: CouncilState = {
|
||||||
|
"input_topic": "AI",
|
||||||
|
"current_draft": "draft",
|
||||||
|
"feedback_history": ["fb1"],
|
||||||
|
"route_decision": "rework",
|
||||||
|
"messages": [],
|
||||||
|
"iteration_count": 1,
|
||||||
|
"critic_score": 6.0,
|
||||||
|
"run_id": "x",
|
||||||
|
"active_node": "critic_agent",
|
||||||
|
}
|
||||||
|
assert state["critic_score"] == 6.0
|
||||||
7
backend/tools/__init__.py
Normal file
7
backend/tools/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
"""
|
||||||
|
Agent tools for CouncilOS.
|
||||||
|
|
||||||
|
Phase 4 will add:
|
||||||
|
- web_search_tool: Tavily Search API wrapper
|
||||||
|
- pdf_reader_tool: PyPDF + ChromaDB vector store wrapper
|
||||||
|
"""
|
||||||
54
docker-compose.yml
Normal file
54
docker-compose.yml
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
# CouncilOS — local development environment
|
||||||
|
# Usage:
|
||||||
|
# docker compose up -d # Start all services
|
||||||
|
# docker compose down # Stop all services
|
||||||
|
# docker compose logs -f api # Follow API logs
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# PostgreSQL — stores council blueprints (used from Phase 2)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: councilOS
|
||||||
|
POSTGRES_USER: user
|
||||||
|
POSTGRES_PASSWORD: password
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U user -d councilOS"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CouncilOS API — FastAPI + LangGraph backend
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql+asyncpg://user:password@db:5432/councilOS
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
- chroma_data:/app/chroma_db
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
chroma_data:
|
||||||
Loading…
Add table
Add a link
Reference in a new issue