128 lines
4.1 KiB
Python
128 lines
4.1 KiB
Python
"""
|
||
Critic Agent Node — evaluates the current draft and decides whether to approve or rework.
|
||
|
||
The critic scores the draft from 0–10 and returns structured feedback.
|
||
If the score meets APPROVAL_THRESHOLD, route_decision is set to "approve".
|
||
Otherwise it is set to "rework" and the feedback is appended to feedback_history.
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
from langchain_anthropic import ChatAnthropic
|
||
from langchain_core.messages import HumanMessage, SystemMessage
|
||
|
||
from state import CouncilState, APPROVAL_THRESHOLD, MAX_ITERATIONS
|
||
|
||
|
||
_SYSTEM_PROMPT = """You are the Critic AI in a council of expert AIs.
|
||
Your job is to rigorously evaluate the quality of a draft document.
|
||
|
||
You must respond in EXACTLY this format — no deviations:
|
||
|
||
SCORE: <integer 0-10>
|
||
VERDICT: <"approve" if score >= 8, otherwise "rework">
|
||
FEEDBACK:
|
||
<detailed, actionable feedback explaining what must be improved>
|
||
|
||
Scoring criteria:
|
||
- 0–3: Poor structure, major factual gaps, incoherent
|
||
- 4–6: Adequate but needs significant improvement
|
||
- 7: Good but has notable weaknesses
|
||
- 8–9: High quality, minor improvements possible
|
||
- 10: Exceptional, publication-ready
|
||
|
||
Be strict. Only award 8+ if the document genuinely meets high quality standards."""
|
||
|
||
|
||
def _parse_critic_response(content: str) -> tuple[float, str]:
|
||
"""
|
||
Parse the structured critic response.
|
||
|
||
The routing decision is derived exclusively from the numeric score so that
|
||
the APPROVAL_THRESHOLD constant is the single source of truth — the
|
||
LLM-reported VERDICT string is intentionally not returned.
|
||
|
||
Returns:
|
||
(score, feedback) tuple.
|
||
score: float clamped to 0–10, defaults to 0.0 on parse failure.
|
||
feedback: FEEDBACK block text, or full content on parse failure.
|
||
"""
|
||
score_match = re.search(r"SCORE:\s*(\d+(?:\.\d+)?)", content)
|
||
feedback_match = re.search(r"FEEDBACK:\s*(.*)", content, re.DOTALL)
|
||
|
||
score = float(score_match.group(1)) if score_match else 0.0
|
||
feedback = feedback_match.group(1).strip() if feedback_match else content.strip()
|
||
|
||
# Clamp score to 0–10
|
||
score = max(0.0, min(10.0, score))
|
||
|
||
return score, feedback
|
||
|
||
|
||
def critic_agent_node(state: CouncilState) -> dict:
|
||
"""
|
||
LangGraph node function for the Critic Agent.
|
||
|
||
Reads current_draft from state, evaluates it, and returns:
|
||
- route_decision: "approve" or "rework"
|
||
- critic_score: numeric score
|
||
- feedback_history: appended with new feedback (if rework)
|
||
- active_node: "critic_agent"
|
||
|
||
Safety valve: if iteration_count >= MAX_ITERATIONS, force approval
|
||
to prevent infinite loops.
|
||
|
||
Args:
|
||
state: The current CouncilState.
|
||
|
||
Returns:
|
||
A dict with updated state fields.
|
||
"""
|
||
# Safety valve: prevent infinite loops
|
||
if state.get("iteration_count", 0) >= MAX_ITERATIONS:
|
||
return {
|
||
"route_decision": "approve",
|
||
"critic_score": APPROVAL_THRESHOLD,
|
||
"messages": [],
|
||
"active_node": "critic_agent",
|
||
}
|
||
|
||
llm = ChatAnthropic(
|
||
model="claude-3-5-sonnet-20241022",
|
||
api_key=os.environ.get("ANTHROPIC_API_KEY"),
|
||
temperature=0.2, # Low temperature for consistent evaluation
|
||
max_tokens=1024,
|
||
)
|
||
|
||
system_msg = SystemMessage(content=_SYSTEM_PROMPT)
|
||
user_msg = HumanMessage(
|
||
content=(
|
||
f"Please evaluate this draft on the topic '{state['input_topic']}':\n\n"
|
||
f"{state['current_draft']}"
|
||
)
|
||
)
|
||
|
||
response = llm.invoke([system_msg, user_msg])
|
||
score, feedback = _parse_critic_response(response.content)
|
||
|
||
# Route decision is derived solely from the numeric score against
|
||
# APPROVAL_THRESHOLD — the LLM's own VERDICT string is not trusted.
|
||
if score >= APPROVAL_THRESHOLD:
|
||
route_decision = "approve"
|
||
else:
|
||
route_decision = "rework"
|
||
|
||
result: dict = {
|
||
"critic_score": score,
|
||
"route_decision": route_decision,
|
||
"messages": [system_msg, user_msg, response],
|
||
"active_node": "critic_agent",
|
||
}
|
||
|
||
# Only append feedback if we're sending back for rework
|
||
if route_decision == "rework":
|
||
result["feedback_history"] = [
|
||
f"Score: {score}/10\n{feedback}"
|
||
]
|
||
|
||
return result
|