Merge pull request #4 from Kenearos/claude/start-implementation-2XWxH

Implement Phase 1: LangGraph backend MVP
2026-02-20 17:51:01 +01:00 · 2026-02-20 17:51:01 +01:00 · 06aec41a8a
commit 06aec41a8a
parent 34dcfb3dcd 797f02c74d
24 changed files with 1472 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,45 @@
 # CouncilOS — Environment Variables Template
 # Copy this file to .env and fill in your actual values.
 # NEVER commit the .env file to version control.
 # =============================================================================
 # LLM API Keys
 # =============================================================================
 # Anthropic Claude API key (required)
 ANTHROPIC_API_KEY=
 # OpenAI GPT-4o API key (optional for Phase 1, required from Phase 3)
 OPENAI_API_KEY=
 # Tavily Search API key (required for Phase 4 web-search tool)
 TAVILY_API_KEY=
 # =============================================================================
 # Database
 # =============================================================================
 # PostgreSQL connection string (required from Phase 2)
 DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/councilOS
 # =============================================================================
 # Vector Database (ChromaDB)
 # =============================================================================
 # Local directory to persist ChromaDB embeddings (required for Phase 4 PDF tool)
 CHROMA_PERSIST_DIR=./chroma_db
 # =============================================================================
 # Application Settings
 # =============================================================================
 # FastAPI server host and port
 HOST=0.0.0.0
 PORT=8000
 # Log level: DEBUG | INFO | WARNING | ERROR
 LOG_LEVEL=INFO
 # CORS: comma-separated list of allowed frontend origins in production
 # Example: https://my-app.vercel.app,https://www.my-domain.com
 CORS_ORIGINS=http://localhost:3000
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,72 @@
 # =============================================================================
 # Environment & Secrets — NEVER commit these
 # =============================================================================
 .env
 .env.local
 .env.*.local
 *.pem
 *.key
 secrets/
 # =============================================================================
 # Python
 # =============================================================================
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 .venv/
 venv/
 env/
 ENV/
 *.egg-info/
 dist/
 build/
 .eggs/
 pip-wheel-metadata/
 .mypy_cache/
 .ruff_cache/
 .pytest_cache/
 htmlcov/
 .coverage
 coverage.xml
 *.cover
 # =============================================================================
 # Node / Frontend
 # =============================================================================
 node_modules/
 .next/
 out/
 .nuxt/
 dist/
 .cache/
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 .pnpm-debug.log*
 # =============================================================================
 # Database & Vector Store
 # =============================================================================
 chroma_db/
 *.sqlite3
 *.db
 postgres_data/
 # =============================================================================
 # IDE & OS
 # =============================================================================
 .idea/
 .vscode/
 *.swp
 *.swo
 .DS_Store
 Thumbs.db
 # =============================================================================
 # Docker
 # =============================================================================
 .docker/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,19 @@
 FROM python:3.11-slim
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY . .
 EXPOSE 8000
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/agents/init.py
+++ b/backend/agents/init.py
@ -0,0 +1,7 @@
 """Agent node functions for CouncilOS."""
 from .master_agent import master_agent_node
 from .critic_agent import critic_agent_node
 from .writer_agent import writer_agent_node
 __all__ = ["master_agent_node", "critic_agent_node", "writer_agent_node"]
--- a/backend/agents/critic_agent.py
+++ b/backend/agents/critic_agent.py
@ -0,0 +1,127 @@
 """
 Critic Agent Node — evaluates the current draft and decides whether to approve or rework.
 The critic scores the draft from 0–10 and returns structured feedback.
 If the score meets APPROVAL_THRESHOLD, route_decision is set to "approve".
 Otherwise it is set to "rework" and the feedback is appended to feedback_history.
 """
 import os
 import re
 from langchain_anthropic import ChatAnthropic
 from langchain_core.messages import HumanMessage, SystemMessage
 from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
 _SYSTEM_PROMPT = """You are the Critic AI in a council of expert AIs.
 Your job is to rigorously evaluate the quality of a draft document.
 You must respond in EXACTLY this format — no deviations:
 SCORE: <integer 0-10>
 VERDICT: <"approve" if score >= 8, otherwise "rework">
 FEEDBACK:
 <detailed, actionable feedback explaining what must be improved>
 Scoring criteria:
 - 0–3: Poor structure, major factual gaps, incoherent
 - 4–6: Adequate but needs significant improvement
 - 7:   Good but has notable weaknesses
 - 8–9: High quality, minor improvements possible
 - 10:  Exceptional, publication-ready
 Be strict. Only award 8+ if the document genuinely meets high quality standards."""
 def _parse_critic_response(content: str) -> tuple[float, str, str]:
    """
    Parse the structured critic response.
    Returns:
        (score, verdict, feedback) tuple.
        Falls back to ("rework", full content) on parse failure.
    """
    score_match = re.search(r"SCORE:\s*(\d+(?:\.\d+)?)", content)
    verdict_match = re.search(r"VERDICT:\s*(approve|rework)", content, re.IGNORECASE)
    feedback_match = re.search(r"FEEDBACK:\s*(.*)", content, re.DOTALL)
    score = float(score_match.group(1)) if score_match else 0.0
    verdict = verdict_match.group(1).lower() if verdict_match else "rework"
    feedback = feedback_match.group(1).strip() if feedback_match else content.strip()
    # Clamp score to 0–10
    score = max(0.0, min(10.0, score))
    return score, verdict, feedback
 def critic_agent_node(state: CouncilState) -> dict:
    """
    LangGraph node function for the Critic Agent.
    Reads current_draft from state, evaluates it, and returns:
    - route_decision: "approve" or "rework"
    - critic_score: numeric score
    - feedback_history: appended with new feedback (if rework)
    - active_node: "critic_agent"
    Safety valve: if iteration_count >= MAX_ITERATIONS, force approval
    to prevent infinite loops.
    Args:
        state: The current CouncilState.
    Returns:
        A dict with updated state fields.
    """
    # Safety valve: prevent infinite loops
    if state.get("iteration_count", 0) >= MAX_ITERATIONS:
        return {
            "route_decision": "approve",
            "critic_score": APPROVAL_THRESHOLD,
            "feedback_history": [
                f"[Auto-approved after {MAX_ITERATIONS} iterations]"
            ],
            "messages": [],
            "active_node": "critic_agent",
        }
    llm = ChatAnthropic(
        model="claude-3-5-sonnet-20241022",
        api_key=os.environ.get("ANTHROPIC_API_KEY"),
        temperature=0.2,  # Low temperature for consistent evaluation
        max_tokens=1024,
    )
    system_msg = SystemMessage(content=_SYSTEM_PROMPT)
    user_msg = HumanMessage(
        content=(
            f"Please evaluate this draft on the topic '{state['input_topic']}':\n\n"
            f"{state['current_draft']}"
        )
    )
    response = llm.invoke([system_msg, user_msg])
    score, verdict, feedback = _parse_critic_response(response.content)
    # Override verdict based on threshold to ensure consistency
    if score >= APPROVAL_THRESHOLD:
        route_decision = "approve"
    else:
        route_decision = "rework"
    result: dict = {
        "critic_score": score,
        "route_decision": route_decision,
        "messages": [system_msg, user_msg, response],
        "active_node": "critic_agent",
    }
    # Only append feedback if we're sending back for rework
    if route_decision == "rework":
        result["feedback_history"] = [
            f"Score: {score}/10\n{feedback}"
        ]
    return result
--- a/backend/agents/master_agent.py
+++ b/backend/agents/master_agent.py
@ -0,0 +1,75 @@
 """
 Master Agent Node — creates and refines drafts based on critic feedback.
 This agent is the primary content creator. On the first iteration it produces
 an initial draft. On subsequent iterations it incorporates all feedback from
 the feedback_history to improve the draft.
 """
 import os
 from langchain_anthropic import ChatAnthropic
 from langchain_core.messages import HumanMessage, SystemMessage
 from state import CouncilState
 _SYSTEM_PROMPT = """You are the Master AI in a council of expert AIs.
 Your job is to write high-quality content on the given topic.
 When you receive critic feedback, carefully incorporate ALL feedback points
 and produce an improved draft. Be thorough and precise."""
 def _build_master_prompt(state: CouncilState) -> str:
    """Build the user-facing prompt for the master agent based on current state."""
    if not state["feedback_history"]:
        return (
            f"Please write a comprehensive, well-structured document on the following topic:\n\n"
            f"{state['input_topic']}"
        )
    feedback_block = "\n\n---\n".join(
        f"Feedback round {i + 1}:\n{fb}"
        for i, fb in enumerate(state["feedback_history"])
    )
    return (
        f"Topic: {state['input_topic']}\n\n"
        f"Your current draft:\n{state['current_draft']}\n\n"
        f"The critic has provided the following feedback across {len(state['feedback_history'])} round(s):\n\n"
        f"{feedback_block}\n\n"
        f"Please produce an improved draft that fully addresses ALL feedback points above."
    )
 def master_agent_node(state: CouncilState) -> dict:
    """
    LangGraph node function for the Master Agent.
    Reads input_topic and feedback_history from state, calls the LLM,
    and returns an updated current_draft.
    Args:
        state: The current CouncilState.
    Returns:
        A dict with updated state fields: current_draft, messages, active_node.
    """
    llm = ChatAnthropic(
        model="claude-3-5-sonnet-20241022",
        api_key=os.environ.get("ANTHROPIC_API_KEY"),
        temperature=0.7,
        max_tokens=2048,
    )
    system_msg = SystemMessage(content=_SYSTEM_PROMPT)
    user_msg = HumanMessage(content=_build_master_prompt(state))
    response = llm.invoke([system_msg, user_msg])
    draft = response.content
    return {
        "current_draft": draft,
        "messages": [system_msg, user_msg, response],
        "active_node": "master_agent",
        "iteration_count": state.get("iteration_count", 0) + 1,
    }
--- a/backend/agents/writer_agent.py
+++ b/backend/agents/writer_agent.py
@ -0,0 +1,63 @@
 """
 Writer Agent Node — final polishing of an approved draft.
 This agent receives a critic-approved draft and produces the final,
 publication-ready version with polished formatting and language.
 """
 import os
 from langchain_anthropic import ChatAnthropic
 from langchain_core.messages import HumanMessage, SystemMessage
 from state import CouncilState
 _SYSTEM_PROMPT = """You are the Writer AI in a council of expert AIs.
 You receive a draft that has already been approved for quality by the Critic AI.
 Your job is to give it a final professional polish:
 - Improve sentence flow and readability
 - Ensure consistent formatting (headers, bullet points, paragraphs)
 - Fix any grammatical or stylistic issues
 - Do NOT change the factual content or overall structure
 - Preserve all key information from the draft
 Return only the polished document — no meta-commentary."""
 def writer_agent_node(state: CouncilState) -> dict:
    """
    LangGraph node function for the Writer Agent.
    Receives the approved current_draft and returns a polished final version.
    Args:
        state: The current CouncilState.
    Returns:
        A dict with the final polished current_draft and updated messages.
    """
    llm = ChatAnthropic(
        model="claude-3-5-sonnet-20241022",
        api_key=os.environ.get("ANTHROPIC_API_KEY"),
        temperature=0.4,
        max_tokens=4096,
    )
    system_msg = SystemMessage(content=_SYSTEM_PROMPT)
    user_msg = HumanMessage(
        content=(
            f"Please give a final professional polish to this approved document "
            f"on the topic '{state['input_topic']}':\n\n"
            f"{state['current_draft']}"
        )
    )
    response = llm.invoke([system_msg, user_msg])
    return {
        "current_draft": response.content,
        "messages": [system_msg, user_msg, response],
        "active_node": "writer_agent",
        "route_decision": "done",
    }
--- a/backend/api/init.py
+++ b/backend/api/init.py
@ -0,0 +1 @@
 """API route definitions for CouncilOS."""
--- a/backend/api/routes.py
+++ b/backend/api/routes.py
@ -0,0 +1,135 @@
 """
 REST API routes for CouncilOS.
 Endpoints:
    POST /api/councils/run    — Start a new council run (async, returns run_id)
    GET  /api/councils/run/{run_id}  — Poll the status/result of a run
    GET  /api/health          — Health check
 """
 import uuid
 from typing import Optional
 from fastapi import APIRouter, HTTPException, BackgroundTasks
 from pydantic import BaseModel, Field
 from services.graph_builder import run_council_async
 from api.run_store import run_store
 router = APIRouter()
 # ---------------------------------------------------------------------------
 # Request / Response Models
 # ---------------------------------------------------------------------------
 class CouncilRunRequest(BaseModel):
    input_topic: str = Field(
        ...,
        min_length=1,
        max_length=10_000,
        description="The user's prompt or document content for the council to work on.",
        examples=["Erkläre die wichtigsten Konzepte des maschinellen Lernens für Einsteiger."],
    )
 class CouncilRunResponse(BaseModel):
    run_id: str
    status: str  # "pending" | "running" | "completed" | "failed"
    message: str
 class CouncilResultResponse(BaseModel):
    run_id: str
    status: str
    final_draft: Optional[str] = None
    critic_score: Optional[float] = None
    iteration_count: Optional[int] = None
    error: Optional[str] = None
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@router.get("/health")
 async def health_check():
    """Health check endpoint."""
    return {"status": "ok", "service": "CouncilOS API"}
@router.post("/councils/run", response_model=CouncilRunResponse, status_code=202)
 async def start_council_run(
    request: CouncilRunRequest,
    background_tasks: BackgroundTasks,
 ):
    """
    Start a new council run.
    The run executes asynchronously in the background. Poll
    GET /api/councils/run/{run_id} for the result, or connect to the
    WebSocket at /ws/council/{run_id} for real-time updates.
    """
    run_id = str(uuid.uuid4())
    # Register the run as pending in the in-memory store
    run_store.create(run_id, request.input_topic)
    # Schedule the graph execution as a background task
    background_tasks.add_task(_execute_run, run_id, request.input_topic)
    return CouncilRunResponse(
        run_id=run_id,
        status="pending",
        message=f"Council run started. Connect to /ws/council/{run_id} for live updates.",
    )
@router.get("/councils/run/{run_id}", response_model=CouncilResultResponse)
 async def get_council_result(run_id: str):
    """
    Retrieve the current status or final result of a council run.
    """
    run = run_store.get(run_id)
    if run is None:
        raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found.")
    return CouncilResultResponse(
        run_id=run_id,
        status=run["status"],
        final_draft=run.get("final_draft"),
        critic_score=run.get("critic_score"),
        iteration_count=run.get("iteration_count"),
        error=run.get("error"),
    )
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 async def _execute_run(run_id: str, input_topic: str) -> None:
    """
    Background task that runs the LangGraph council and updates the run store.
    """
    run_store.update(run_id, {"status": "running"})
    try:
        final_state = await run_council_async(
            input_topic=input_topic,
            run_id=run_id,
            on_node_event=lambda nid, node: run_store.update(
                nid, {"active_node": node}
            ),
        )
        run_store.update(
            run_id,
            {
                "status": "completed",
                "final_draft": final_state.get("current_draft"),
                "critic_score": final_state.get("critic_score"),
                "iteration_count": final_state.get("iteration_count"),
                "active_node": "done",
            },
        )
    except Exception as exc:  # noqa: BLE001
        run_store.update(run_id, {"status": "failed", "error": str(exc)})
--- a/backend/api/run_store.py
+++ b/backend/api/run_store.py
@ -0,0 +1,47 @@
 """
 In-memory run store for Phase 1.
 Tracks the status and results of council runs by run_id. This is intentionally
 simple for Phase 1. Phase 3+ will replace this with a PostgreSQL-backed store.
 """
 from typing import Any, Dict, Optional
 import threading
 class RunStore:
    """Thread-safe in-memory store for council run state."""
    def __init__(self) -> None:
        self._store: Dict[str, Dict[str, Any]] = {}
        self._lock = threading.Lock()
    def create(self, run_id: str, input_topic: str) -> None:
        with self._lock:
            self._store[run_id] = {
                "run_id": run_id,
                "input_topic": input_topic,
                "status": "pending",
                "final_draft": None,
                "critic_score": None,
                "iteration_count": None,
                "active_node": None,
                "error": None,
            }
    def get(self, run_id: str) -> Optional[Dict[str, Any]]:
        with self._lock:
            return self._store.get(run_id)
    def update(self, run_id: str, updates: Dict[str, Any]) -> None:
        with self._lock:
            if run_id in self._store:
                self._store[run_id].update(updates)
    def delete(self, run_id: str) -> None:
        with self._lock:
            self._store.pop(run_id, None)
 # Singleton instance shared across the application
 run_store = RunStore()
--- a/backend/api/websocket.py
+++ b/backend/api/websocket.py
@ -0,0 +1,128 @@
 """
 WebSocket endpoint for real-time agent status updates.
 Clients connect to /ws/council/{run_id} and receive JSON events whenever
 an agent node becomes active. This powers the live diagram pulsing in the frontend.
 Event format:
    {"event": "node_start", "run_id": "...", "node": "master_agent", "iteration": 2}
    {"event": "node_complete", "run_id": "...", "node": "critic_agent", "score": 6.5}
    {"event": "run_complete", "run_id": "...", "final_draft": "..."}
    {"event": "run_failed", "run_id": "...", "error": "..."}
 """
 import asyncio
 import json
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 from api.run_store import run_store
 ws_router = APIRouter()
 # Active WebSocket connections keyed by run_id
 _connections: dict[str, list[WebSocket]] = {}
 async def broadcast_event(run_id: str, event: dict) -> None:
    """
    Send an event to all WebSocket clients subscribed to a run_id.
    Args:
        run_id: The council run identifier.
        event:  The event dict to serialize and broadcast.
    """
    clients = _connections.get(run_id, [])
    disconnected = []
    for ws in clients:
        try:
            await ws.send_text(json.dumps(event))
        except Exception:  # noqa: BLE001
            disconnected.append(ws)
    # Clean up dead connections
    for ws in disconnected:
        clients.remove(ws)
@ws_router.websocket("/ws/council/{run_id}")
 async def council_websocket(websocket: WebSocket, run_id: str):
    """
    WebSocket endpoint for live council run updates.
    On connect: sends the current run status immediately.
    While running: polls the run store and pushes status changes.
    On complete/failed: sends a final event and closes the connection.
    """
    await websocket.accept()
    # Register this client
    if run_id not in _connections:
        _connections[run_id] = []
    _connections[run_id].append(websocket)
    try:
        # Send current state immediately on connect
        run = run_store.get(run_id)
        if run is None:
            await websocket.send_text(
                json.dumps({"event": "error", "message": f"Run '{run_id}' not found."})
            )
            return
        await websocket.send_text(
            json.dumps({"event": "connected", "run_id": run_id, "status": run["status"]})
        )
        # Poll for status changes and push updates
        last_node = None
        while True:
            run = run_store.get(run_id)
            if run is None:
                break
            current_node = run.get("active_node")
            if current_node and current_node != last_node:
                await websocket.send_text(
                    json.dumps({
                        "event": "node_active",
                        "run_id": run_id,
                        "node": current_node,
                        "iteration": run.get("iteration_count"),
                    })
                )
                last_node = current_node
            if run["status"] == "completed":
                await websocket.send_text(
                    json.dumps({
                        "event": "run_complete",
                        "run_id": run_id,
                        "final_draft": run.get("final_draft"),
                        "critic_score": run.get("critic_score"),
                        "iteration_count": run.get("iteration_count"),
                    })
                )
                break
            if run["status"] == "failed":
                await websocket.send_text(
                    json.dumps({
                        "event": "run_failed",
                        "run_id": run_id,
                        "error": run.get("error"),
                    })
                )
                break
            await asyncio.sleep(0.5)  # 500ms polling interval
    except WebSocketDisconnect:
        pass
    finally:
        if run_id in _connections:
            try:
                _connections[run_id].remove(websocket)
            except ValueError:
                pass
--- a/backend/main.py
+++ b/backend/main.py
@ -0,0 +1,59 @@
 """
 CouncilOS — FastAPI application entrypoint.
 Start the server:
    uvicorn main:app --reload --port 8000
 API Overview:
    POST /api/councils/run          — Start a council run
    GET  /api/councils/run/{run_id} — Poll run status/result
    GET  /api/health                — Health check
    WS   /ws/council/{run_id}       — Real-time agent status events
 """
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from api.routes import router
 from api.websocket import ws_router
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application lifespan: startup and shutdown logic."""
    print("CouncilOS API starting up...")
    yield
    print("CouncilOS API shutting down...")
 app = FastAPI(
    title="CouncilOS API",
    description=(
        "Backend for the CouncilOS multi-agent AI pipeline platform. "
        "Orchestrates LangGraph council runs and streams real-time agent "
        "status via WebSockets."
    ),
    version="0.1.0",
    lifespan=lifespan,
 )
 # CORS — allow all origins in development; tighten in production
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Mount REST routes under /api prefix
 app.include_router(router, prefix="/api")
 # Mount WebSocket routes (no prefix — path is /ws/council/{run_id})
 app.include_router(ws_router)
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
--- a/backend/pytest.ini
+++ b/backend/pytest.ini
@ -0,0 +1,6 @@
 [pytest]
 testpaths = tests
 asyncio_mode = auto
 python_files = test_*.py
 python_classes = Test*
 python_functions = test_*
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,38 @@
 # Core AI orchestration
 langgraph>=0.2.0
 langchain>=0.2.0
 langchain-anthropic>=0.1.0
 langchain-openai>=0.1.0
 # Backend API
 fastapi>=0.111.0
 uvicorn[standard]>=0.30.0
 websockets>=12.0
 python-multipart>=0.0.9
 # Database
 asyncpg>=0.29.0
 sqlalchemy[asyncio]>=2.0.0
 alembic>=1.13.0
 # Vector DB (PDF tool)
 chromadb>=0.5.0
 pypdf>=4.0.0
 # Search tool
 tavily-python>=0.3.0
 # Utilities
 python-dotenv>=1.0.0
 pydantic>=2.0.0
 pydantic-settings>=2.0.0
 # Linting and formatting
 ruff>=0.4.0
 black>=24.0.0
 # Testing
 pytest>=8.0.0
 pytest-asyncio>=0.23.0
 pytest-mock>=3.14.0
 httpx>=0.27.0
--- a/backend/services/init.py
+++ b/backend/services/init.py
@ -0,0 +1 @@
 """Service modules for CouncilOS backend."""
--- a/backend/services/graph_builder.py
+++ b/backend/services/graph_builder.py
@ -0,0 +1,131 @@
 """
 Graph Builder — constructs the LangGraph execution graph for council runs.
 Phase 1: Hard-coded test graph:
    User Input → Master Agent → Critic Agent → (score < 8: back to Master | score ≥ 8: Writer Agent)
 Phase 3 (future): This module will be extended to build graphs dynamically
 from JSON blueprints stored in PostgreSQL.
 """
 import asyncio
 from typing import Any, Callable, Optional
 from langgraph.graph import StateGraph, END
 from state import CouncilState
 from agents import master_agent_node, critic_agent_node, writer_agent_node
 def route_after_critic(state: CouncilState) -> str:
    """
    Conditional edge function: determines next node after the critic.
    Returns:
        "master_agent" if the critic wants rework.
        "writer_agent" if the critic approves the draft.
    """
    decision = state.get("route_decision", "rework")
    if decision == "approve":
        return "writer_agent"
    return "master_agent"
 def build_council_graph(
    on_node_start: Optional[Callable[[str, str], Any]] = None,
 ) -> StateGraph:
    """
    Build and compile the Phase 1 hard-coded council graph.
    Graph topology:
        master_agent → critic_agent → (conditional) → master_agent | writer_agent → END
    Args:
        on_node_start: Optional async callback invoked when a node begins execution.
                       Signature: (run_id: str, node_name: str) -> Any
                       Used to emit WebSocket events for real-time UI updates.
    Returns:
        A compiled LangGraph StateGraph ready for invocation.
    """
    graph = StateGraph(CouncilState)
    # Register agent nodes
    graph.add_node("master_agent", master_agent_node)
    graph.add_node("critic_agent", critic_agent_node)
    graph.add_node("writer_agent", writer_agent_node)
    # Define edges
    graph.set_entry_point("master_agent")
    graph.add_edge("master_agent", "critic_agent")
    # Conditional edge: critic decides whether to rework or approve
    graph.add_conditional_edges(
        "critic_agent",
        route_after_critic,
        {
            "master_agent": "master_agent",
            "writer_agent": "writer_agent",
        },
    )
    # Writer is the terminal node
    graph.add_edge("writer_agent", END)
    return graph.compile()
 def create_initial_state(
    input_topic: str,
    run_id: str,
 ) -> CouncilState:
    """
    Create a fresh CouncilState for a new council run.
    Args:
        input_topic: The user's prompt or document content.
        run_id:      Unique identifier for this run (used in WebSocket events).
    Returns:
        An initialized CouncilState dict.
    """
    return CouncilState(
        input_topic=input_topic,
        current_draft="",
        feedback_history=[],
        route_decision="",
        messages=[],
        iteration_count=0,
        critic_score=None,
        run_id=run_id,
        active_node="",
    )
 async def run_council_async(
    input_topic: str,
    run_id: str,
    on_node_event: Optional[Callable[[str, str], Any]] = None,
 ) -> CouncilState:
    """
    Execute a full council run asynchronously.
    Args:
        input_topic:   The user's prompt.
        run_id:        Unique identifier for this run.
        on_node_event: Optional callback for WebSocket node events.
    Returns:
        The final CouncilState after the writer agent completes.
    """
    graph = build_council_graph(on_node_start=on_node_event)
    initial_state = create_initial_state(input_topic, run_id)
    # LangGraph's invoke is synchronous — run it in a thread pool to avoid
    # blocking the FastAPI event loop
    loop = asyncio.get_event_loop()
    final_state = await loop.run_in_executor(
        None,
        lambda: graph.invoke(initial_state),
    )
    return final_state
--- a/backend/state.py
+++ b/backend/state.py
@ -0,0 +1,47 @@
 """
 CouncilState — the central data structure passed between all agents in LangGraph.
 All agents must read from and write to this TypedDict. Agents must not store
 state internally; everything passes through CouncilState.
 """
 from typing import Annotated, List, Optional
 import operator
 from typing_extensions import TypedDict
 class CouncilState(TypedDict):
    """
    The global state shared across all agents in a council run.
    Fields:
        input_topic:        The user's original prompt or uploaded PDF content.
        current_draft:      The document currently being worked on.
        feedback_history:   All critic feedback accumulated across loop iterations.
                            Agents append here — never overwrite.
        route_decision:     Routing signal used by conditional edges.
                            Values: "rework" | "approve" | custom strings.
        messages:           LLM message history (system prompts + responses).
                            Uses operator.add reducer so messages accumulate.
        iteration_count:    Tracks how many rework loops have occurred.
        critic_score:       The numeric score (0–10) assigned by the critic agent.
        run_id:             Unique identifier for this council run (for WebSocket events).
        active_node:        Name of the currently executing agent node (for UI updates).
    """
    input_topic: str
    current_draft: str
    feedback_history: Annotated[List[str], operator.add]
    route_decision: str
    messages: Annotated[list, operator.add]
    iteration_count: int
    critic_score: Optional[float]
    run_id: str
    active_node: str
 # Approval threshold: critic score must reach this value to exit the loop
 APPROVAL_THRESHOLD = 8.0
 # Safety limit: maximum number of rework iterations before forcing approval
 MAX_ITERATIONS = 5
--- a/backend/tests/init.py
+++ b/backend/tests/init.py
@ -0,0 +1 @@
 """Pytest test suite for CouncilOS backend."""
--- a/backend/tests/test_api.py
+++ b/backend/tests/test_api.py
@ -0,0 +1,99 @@
 """
 Integration tests for the FastAPI REST endpoints.
 Uses httpx.AsyncClient with the TestClient pattern — no real LLM calls.
 """
 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 import pytest
 from unittest.mock import AsyncMock, patch
 from fastapi.testclient import TestClient
 from main import app
 from api.run_store import run_store
@pytest.fixture(autouse=True)
 def clean_run_store():
    """Reset the run store before each test."""
    run_store._store.clear()
    yield
    run_store._store.clear()
 client = TestClient(app)
 class TestHealthEndpoint:
    def test_health_check_returns_ok(self):
        response = client.get("/api/health")
        assert response.status_code == 200
        assert response.json()["status"] == "ok"
 class TestStartCouncilRun:
    def test_start_run_returns_202_with_run_id(self):
        with patch("api.routes._execute_run", new_callable=AsyncMock):
            response = client.post(
                "/api/councils/run",
                json={"input_topic": "Erkläre maschinelles Lernen"},
            )
        assert response.status_code == 202
        data = response.json()
        assert "run_id" in data
        assert data["status"] == "pending"
        assert len(data["run_id"]) == 36  # UUID format
    def test_start_run_rejects_empty_topic(self):
        response = client.post("/api/councils/run", json={"input_topic": ""})
        assert response.status_code == 422  # Pydantic validation error
    def test_start_run_rejects_missing_topic(self):
        response = client.post("/api/councils/run", json={})
        assert response.status_code == 422
 class TestGetCouncilResult:
    def test_get_pending_run(self):
        run_store.create("test-run-id", "Test topic")
        response = client.get("/api/councils/run/test-run-id")
        assert response.status_code == 200
        data = response.json()
        assert data["run_id"] == "test-run-id"
        assert data["status"] == "pending"
    def test_get_completed_run(self):
        run_store.create("completed-run", "Topic")
        run_store.update("completed-run", {
            "status": "completed",
            "final_draft": "Final polished document.",
            "critic_score": 9.0,
            "iteration_count": 2,
        })
        response = client.get("/api/councils/run/completed-run")
        assert response.status_code == 200
        data = response.json()
        assert data["status"] == "completed"
        assert data["final_draft"] == "Final polished document."
        assert data["critic_score"] == 9.0
        assert data["iteration_count"] == 2
    def test_get_nonexistent_run_returns_404(self):
        response = client.get("/api/councils/run/does-not-exist")
        assert response.status_code == 404
    def test_get_failed_run(self):
        run_store.create("failed-run", "Topic")
        run_store.update("failed-run", {
            "status": "failed",
            "error": "API connection timeout",
        })
        response = client.get("/api/councils/run/failed-run")
        assert response.status_code == 200
        data = response.json()
        assert data["status"] == "failed"
        assert "timeout" in data["error"]
--- a/backend/tests/test_routing.py
+++ b/backend/tests/test_routing.py
@ -0,0 +1,211 @@
 """
 Tests for the LangGraph routing logic.
 All LLM calls are mocked — no real API calls are made in these tests.
 """
 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 import pytest
 from unittest.mock import patch, MagicMock
 from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
 from services.graph_builder import route_after_critic, create_initial_state
 class TestRouteAfterCritic:
    """Unit tests for the conditional edge routing function."""
    def _make_state(self, route_decision: str, iteration_count: int = 1) -> CouncilState:
        state = create_initial_state("test topic", "test-run")
        state["route_decision"] = route_decision
        state["iteration_count"] = iteration_count
        return state
    def test_approve_routes_to_writer(self):
        state = self._make_state("approve")
        assert route_after_critic(state) == "writer_agent"
    def test_rework_routes_to_master(self):
        state = self._make_state("rework")
        assert route_after_critic(state) == "master_agent"
    def test_empty_decision_defaults_to_rework(self):
        state = self._make_state("")
        assert route_after_critic(state) == "master_agent"
    def test_unknown_decision_defaults_to_rework(self):
        state = self._make_state("unknown_value")
        assert route_after_critic(state) == "master_agent"
 class TestCriticAgentParsing:
    """Unit tests for the critic agent's response parser."""
    def test_parse_valid_approve_response(self):
        from agents.critic_agent import _parse_critic_response
        content = "SCORE: 9\nVERDICT: approve\nFEEDBACK:\nExcellent work."
        score, verdict, feedback = _parse_critic_response(content)
        assert score == 9.0
        assert verdict == "approve"
        assert "Excellent" in feedback
    def test_parse_valid_rework_response(self):
        from agents.critic_agent import _parse_critic_response
        content = "SCORE: 5\nVERDICT: rework\nFEEDBACK:\nNeeds more detail."
        score, verdict, feedback = _parse_critic_response(content)
        assert score == 5.0
        assert verdict == "rework"
        assert "detail" in feedback
    def test_parse_score_clamped_to_0_10(self):
        from agents.critic_agent import _parse_critic_response
        content = "SCORE: 15\nVERDICT: approve\nFEEDBACK:\nToo high score."
        score, verdict, feedback = _parse_critic_response(content)
        assert score == 10.0
    def test_parse_missing_score_defaults_to_0(self):
        from agents.critic_agent import _parse_critic_response
        content = "No structured response at all."
        score, verdict, feedback = _parse_critic_response(content)
        assert score == 0.0
        assert verdict == "rework"
    def test_threshold_boundary_exactly_8_approves(self):
        from agents.critic_agent import _parse_critic_response
        content = f"SCORE: {APPROVAL_THRESHOLD}\nVERDICT: approve\nFEEDBACK:\nGood."
        score, verdict, _ = _parse_critic_response(content)
        assert score == APPROVAL_THRESHOLD
        assert verdict == "approve"
 class TestMasterAgentPromptBuilding:
    """Unit tests for the master agent's prompt construction."""
    def test_first_iteration_prompt_has_no_feedback_block(self):
        from agents.master_agent import _build_master_prompt
        state = create_initial_state("Test topic", "run-1")
        prompt = _build_master_prompt(state)
        assert "Test topic" in prompt
        assert "feedback" not in prompt.lower() or "Feedback" not in prompt
    def test_rework_prompt_includes_feedback(self):
        from agents.master_agent import _build_master_prompt
        state = create_initial_state("Test topic", "run-1")
        state["current_draft"] = "My draft"
        state["feedback_history"] = ["Score: 5/10\nNeeds more structure."]
        prompt = _build_master_prompt(state)
        assert "My draft" in prompt
        assert "Needs more structure" in prompt
    def test_rework_prompt_includes_all_feedback_rounds(self):
        from agents.master_agent import _build_master_prompt
        state = create_initial_state("Topic", "run-2")
        state["current_draft"] = "Draft v2"
        state["feedback_history"] = ["First feedback", "Second feedback"]
        prompt = _build_master_prompt(state)
        assert "First feedback" in prompt
        assert "Second feedback" in prompt
        assert "2 round" in prompt
 class TestCriticSafetyValve:
    """Tests for the MAX_ITERATIONS safety valve in the critic agent."""
    def test_safety_valve_forces_approve_at_max_iterations(self):
        from agents.critic_agent import critic_agent_node
        state = create_initial_state("topic", "run-safety")
        state["iteration_count"] = MAX_ITERATIONS
        state["current_draft"] = "Some draft"
        result = critic_agent_node(state)
        assert result["route_decision"] == "approve"
        assert result["critic_score"] == APPROVAL_THRESHOLD
    def test_safety_valve_not_triggered_below_max(self):
        """Below MAX_ITERATIONS the real LLM call would happen — mock it."""
        from agents.critic_agent import critic_agent_node
        mock_response = MagicMock()
        mock_response.content = "SCORE: 4\nVERDICT: rework\nFEEDBACK:\nNeeds work."
        with patch("agents.critic_agent.ChatAnthropic") as MockLLM:
            MockLLM.return_value.invoke.return_value = mock_response
            state = create_initial_state("topic", "run-below-max")
            state["iteration_count"] = MAX_ITERATIONS - 1
            state["current_draft"] = "Draft"
            result = critic_agent_node(state)
        assert result["route_decision"] == "rework"
        assert result["critic_score"] == 4.0
 class TestMasterAgentNode:
    """Integration-style tests for master_agent_node with mocked LLM."""
    def test_master_agent_returns_draft(self):
        from agents.master_agent import master_agent_node
        mock_response = MagicMock()
        mock_response.content = "This is a generated draft about AI."
        with patch("agents.master_agent.ChatAnthropic") as MockLLM:
            MockLLM.return_value.invoke.return_value = mock_response
            state = create_initial_state("AI basics", "run-master-1")
            result = master_agent_node(state)
        assert result["current_draft"] == "This is a generated draft about AI."
        assert result["active_node"] == "master_agent"
        assert result["iteration_count"] == 1
    def test_master_agent_increments_iteration_count(self):
        from agents.master_agent import master_agent_node
        mock_response = MagicMock()
        mock_response.content = "Draft"
        with patch("agents.master_agent.ChatAnthropic") as MockLLM:
            MockLLM.return_value.invoke.return_value = mock_response
            state = create_initial_state("topic", "run-master-2")
            state["iteration_count"] = 3
            result = master_agent_node(state)
        assert result["iteration_count"] == 4
 class TestWriterAgentNode:
    """Tests for writer_agent_node with mocked LLM."""
    def test_writer_returns_polished_draft(self):
        from agents.writer_agent import writer_agent_node
        mock_response = MagicMock()
        mock_response.content = "Polished and professional document."
        with patch("agents.writer_agent.ChatAnthropic") as MockLLM:
            MockLLM.return_value.invoke.return_value = mock_response
            state = create_initial_state("Machine Learning", "run-writer-1")
            state["current_draft"] = "Raw draft content"
            result = writer_agent_node(state)
        assert result["current_draft"] == "Polished and professional document."
        assert result["active_node"] == "writer_agent"
        assert result["route_decision"] == "done"
--- a/backend/tests/test_run_store.py
+++ b/backend/tests/test_run_store.py
@ -0,0 +1,55 @@
 """Tests for the in-memory RunStore."""
 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from api.run_store import RunStore
 class TestRunStore:
    def setup_method(self):
        self.store = RunStore()
    def test_create_and_get(self):
        self.store.create("run-1", "Test topic")
        run = self.store.get("run-1")
        assert run is not None
        assert run["run_id"] == "run-1"
        assert run["input_topic"] == "Test topic"
        assert run["status"] == "pending"
    def test_get_nonexistent_returns_none(self):
        assert self.store.get("nonexistent") is None
    def test_update_status(self):
        self.store.create("run-2", "Topic")
        self.store.update("run-2", {"status": "running"})
        assert self.store.get("run-2")["status"] == "running"
    def test_update_nonexistent_is_noop(self):
        """Updating a non-existent run should not raise."""
        self.store.update("ghost-run", {"status": "running"})
    def test_delete(self):
        self.store.create("run-3", "Topic")
        self.store.delete("run-3")
        assert self.store.get("run-3") is None
    def test_delete_nonexistent_is_noop(self):
        self.store.delete("ghost-run")
    def test_update_partial_fields(self):
        self.store.create("run-4", "Topic")
        self.store.update("run-4", {"status": "completed", "final_draft": "Result text"})
        run = self.store.get("run-4")
        assert run["status"] == "completed"
        assert run["final_draft"] == "Result text"
        assert run["input_topic"] == "Topic"  # original field preserved
    def test_multiple_runs_independent(self):
        self.store.create("run-a", "Topic A")
        self.store.create("run-b", "Topic B")
        self.store.update("run-a", {"status": "running"})
        assert self.store.get("run-b")["status"] == "pending"
--- a/backend/tests/test_state.py
+++ b/backend/tests/test_state.py
@ -0,0 +1,44 @@
 """Tests for CouncilState structure and graph_builder helpers."""
 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
 from services.graph_builder import create_initial_state
 class TestCouncilState:
    def test_initial_state_fields(self):
        state = create_initial_state("Test topic", "run-001")
        assert state["input_topic"] == "Test topic"
        assert state["current_draft"] == ""
        assert state["feedback_history"] == []
        assert state["route_decision"] == ""
        assert state["messages"] == []
        assert state["iteration_count"] == 0
        assert state["critic_score"] is None
        assert state["run_id"] == "run-001"
        assert state["active_node"] == ""
    def test_approval_threshold_value(self):
        assert APPROVAL_THRESHOLD == 8.0
    def test_max_iterations_value(self):
        assert MAX_ITERATIONS == 5
    def test_state_is_typed_dict(self):
        """CouncilState should be instantiable as a plain dict."""
        state: CouncilState = {
            "input_topic": "AI",
            "current_draft": "draft",
            "feedback_history": ["fb1"],
            "route_decision": "rework",
            "messages": [],
            "iteration_count": 1,
            "critic_score": 6.0,
            "run_id": "x",
            "active_node": "critic_agent",
        }
        assert state["critic_score"] == 6.0
--- a/backend/tools/init.py
+++ b/backend/tools/init.py
@ -0,0 +1,7 @@
 """
 Agent tools for CouncilOS.
 Phase 4 will add:
 - web_search_tool: Tavily Search API wrapper
 - pdf_reader_tool: PyPDF + ChromaDB vector store wrapper
 """
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,54 @@
 version: "3.9"
 # CouncilOS — local development environment
 # Usage:
 #   docker compose up -d          # Start all services
 #   docker compose down           # Stop all services
 #   docker compose logs -f api    # Follow API logs
 services:
  # ---------------------------------------------------------------------------
  # PostgreSQL — stores council blueprints (used from Phase 2)
  # ---------------------------------------------------------------------------
  db:
    image: postgres:16-alpine
    restart: unless-stopped
    environment:
      POSTGRES_DB: councilOS
      POSTGRES_USER: user
      POSTGRES_PASSWORD: password
    ports:
      - "5432:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U user -d councilOS"]
      interval: 5s
      timeout: 5s
      retries: 5
  # ---------------------------------------------------------------------------
  # CouncilOS API — FastAPI + LangGraph backend
  # ---------------------------------------------------------------------------
  api:
    build:
      context: ./backend
      dockerfile: Dockerfile
    restart: unless-stopped
    ports:
      - "8000:8000"
    env_file:
      - .env
    environment:
      DATABASE_URL: postgresql+asyncpg://user:password@db:5432/councilOS
    volumes:
      - ./backend:/app
      - chroma_data:/app/chroma_db
    depends_on:
      db:
        condition: service_healthy
    command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
 volumes:
  postgres_data:
  chroma_data:
		`@ -0,0 +1 @@`
							`"""Service modules for CouncilOS backend."""`
		`@ -0,0 +1 @@`
							`"""Pytest test suite for CouncilOS backend."""`