""" Critic Agent Node — evaluates the current draft and decides whether to approve or rework. The critic scores the draft from 0–10 and returns structured feedback. If the score meets APPROVAL_THRESHOLD, route_decision is set to "approve". Otherwise it is set to "rework" and the feedback is appended to feedback_history. """ import os import re from langchain_anthropic import ChatAnthropic from langchain_core.messages import HumanMessage, SystemMessage from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS _SYSTEM_PROMPT = """You are the Critic AI in a council of expert AIs. Your job is to rigorously evaluate the quality of a draft document. You must respond in EXACTLY this format — no deviations: SCORE: VERDICT: <"approve" if score >= 8, otherwise "rework"> FEEDBACK: Scoring criteria: - 0–3: Poor structure, major factual gaps, incoherent - 4–6: Adequate but needs significant improvement - 7: Good but has notable weaknesses - 8–9: High quality, minor improvements possible - 10: Exceptional, publication-ready Be strict. Only award 8+ if the document genuinely meets high quality standards.""" def _parse_critic_response(content: str) -> tuple[float, str]: """ Parse the structured critic response. The routing decision is derived exclusively from the numeric score so that the APPROVAL_THRESHOLD constant is the single source of truth — the LLM-reported VERDICT string is intentionally not returned. Returns: (score, feedback) tuple. score: float clamped to 0–10, defaults to 0.0 on parse failure. feedback: FEEDBACK block text, or full content on parse failure. """ score_match = re.search(r"SCORE:\s*(\d+(?:\.\d+)?)", content) feedback_match = re.search(r"FEEDBACK:\s*(.*)", content, re.DOTALL) score = float(score_match.group(1)) if score_match else 0.0 feedback = feedback_match.group(1).strip() if feedback_match else content.strip() # Clamp score to 0–10 score = max(0.0, min(10.0, score)) return score, feedback def critic_agent_node(state: CouncilState) -> dict: """ LangGraph node function for the Critic Agent. Reads current_draft from state, evaluates it, and returns: - route_decision: "approve" or "rework" - critic_score: numeric score - feedback_history: appended with new feedback (if rework) - active_node: "critic_agent" Safety valve: if iteration_count >= MAX_ITERATIONS, force approval to prevent infinite loops. Args: state: The current CouncilState. Returns: A dict with updated state fields. """ # Safety valve: prevent infinite loops if state.get("iteration_count", 0) >= MAX_ITERATIONS: return { "route_decision": "approve", "critic_score": APPROVAL_THRESHOLD, "messages": [], "active_node": "critic_agent", } llm = ChatAnthropic( model="claude-3-5-sonnet-20241022", api_key=os.environ.get("ANTHROPIC_API_KEY"), temperature=0.2, # Low temperature for consistent evaluation max_tokens=1024, ) system_msg = SystemMessage(content=_SYSTEM_PROMPT) user_msg = HumanMessage( content=( f"Please evaluate this draft on the topic '{state['input_topic']}':\n\n" f"{state['current_draft']}" ) ) response = llm.invoke([system_msg, user_msg]) score, feedback = _parse_critic_response(response.content) # Route decision is derived solely from the numeric score against # APPROVAL_THRESHOLD — the LLM's own VERDICT string is not trusted. if score >= APPROVAL_THRESHOLD: route_decision = "approve" else: route_decision = "rework" result: dict = { "critic_score": score, "route_decision": route_decision, "messages": [system_msg, user_msg, response], "active_node": "critic_agent", } # Only append feedback if we're sending back for rework if route_decision == "rework": result["feedback_history"] = [ f"Score: {score}/10\n{feedback}" ] return result