322 lines
12 KiB
Python
322 lines
12 KiB
Python
"""
|
|
Core Prompt Analyzer agent.
|
|
|
|
Takes a prompt, calls Claude via the Anthropic API, and returns
|
|
structured analysis: scores, mistakes, rewritten prompt, and token comparison.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import tiktoken
|
|
|
|
from prompt_analyzer.anthropic_client import AnthropicClient
|
|
from prompt_analyzer.context_store import ContextStore
|
|
from prompt_analyzer.models import (
|
|
AnalysisResult,
|
|
AnalysisMetadata,
|
|
Scores,
|
|
Score,
|
|
Mistake,
|
|
TokenComparison,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SYSTEM_PROMPT = """You are an expert Prompt Quality Analyzer. Your job is to analyze a given prompt and return a structured JSON assessment.
|
|
|
|
ANALYZE THE PROMPT ON THESE 5 DIMENSIONS (score each 0-100):
|
|
|
|
1. **Clarity** (0-100): How unambiguous is the prompt? Can it be misinterpreted? Are instructions precise?
|
|
2. **Token Efficiency** (0-100): How concise is the prompt? Are there redundant words, repeated instructions, or unnecessary filler? Higher = more efficient.
|
|
3. **Goal Alignment** (0-100): Does the prompt clearly state what output is expected? Is the desired result format, length, and style specified?
|
|
4. **Structure** (0-100): Is the prompt well-organized? Does it have logical flow, proper sections, and clear instruction ordering?
|
|
5. **Vagueness Index** (0-100): How many vague/ambiguous phrases exist? ("make it good", "do something nice", "be creative"). Score 0 = extremely vague, 100 = no vagueness at all.
|
|
|
|
ALSO:
|
|
- **Identify specific mistakes** in the prompt. For each mistake, provide:
|
|
- `type`: one of: vague_instruction, missing_context, redundancy, contradiction, poor_formatting, missing_output_format, unclear_scope, overly_complex
|
|
- `text`: the exact problematic text from the prompt (null if the mistake is about something missing)
|
|
- `suggestion`: a concrete fix
|
|
|
|
- **Rewrite the prompt** to be optimal — maximum clarity, minimum tokens, best structure. The rewrite should accomplish the exact same goal as the original.
|
|
|
|
{project_context}
|
|
|
|
RESPOND WITH ONLY VALID JSON in this exact format (no markdown, no code fences, just the JSON):
|
|
{{
|
|
"overall_score": <number 0-100, weighted average: clarity 25%, token_efficiency 20%, goal_alignment 25%, structure 15%, vagueness_index 15%>,
|
|
"scores": {{
|
|
"clarity": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
|
|
"token_efficiency": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
|
|
"goal_alignment": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
|
|
"structure": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
|
|
"vagueness_index": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }}
|
|
}},
|
|
"mistakes": [
|
|
{{ "type": "<type>", "text": "<problematic text or null>", "suggestion": "<fix>" }}
|
|
],
|
|
"rewritten_prompt": "<the optimized version of the prompt>"
|
|
}}"""
|
|
|
|
|
|
class PromptAnalyzer:
|
|
"""
|
|
AI-powered prompt quality analyzer.
|
|
|
|
Usage:
|
|
analyzer = PromptAnalyzer()
|
|
result = await analyzer.analyze("Your prompt here")
|
|
|
|
For context-aware analysis (enterprise):
|
|
result = await analyzer.analyze(
|
|
prompt="...",
|
|
project_id="customer_support",
|
|
source_agent="planner",
|
|
)
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.llm = AnthropicClient()
|
|
self.context_store = ContextStore()
|
|
# Use cl100k_base tokenizer (closest to Claude's tokenization)
|
|
try:
|
|
self.tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
except Exception:
|
|
self.tokenizer = None
|
|
logger.warning("tiktoken not available, token counts will be estimated")
|
|
|
|
def _count_tokens(self, text: str) -> int:
|
|
"""Count tokens in text."""
|
|
if self.tokenizer:
|
|
return len(self.tokenizer.encode(text))
|
|
# Rough estimate: ~4 chars per token
|
|
return len(text) // 4
|
|
|
|
async def analyze(
|
|
self,
|
|
prompt: str,
|
|
context: Optional[str] = None,
|
|
project_id: Optional[str] = None,
|
|
source_agent: Optional[str] = None,
|
|
target_agent: Optional[str] = None,
|
|
) -> AnalysisResult:
|
|
"""
|
|
Analyze a prompt and return structured quality assessment.
|
|
|
|
Args:
|
|
prompt: The prompt to analyze
|
|
context: Optional goal/context for the prompt
|
|
project_id: Optional project ID for context-aware analysis
|
|
source_agent: Optional agent that authored this prompt
|
|
target_agent: Optional agent this prompt is directed to
|
|
|
|
Returns:
|
|
AnalysisResult with scores, mistakes, and rewritten prompt
|
|
"""
|
|
logger.info(
|
|
"Analyzing prompt (length=%d, project=%s, agent=%s)",
|
|
len(prompt),
|
|
project_id,
|
|
source_agent,
|
|
)
|
|
|
|
# Build context-aware system prompt
|
|
project_context = self.context_store.build_context_summary(
|
|
project_id, source_agent
|
|
)
|
|
system_prompt = SYSTEM_PROMPT.format(project_context=project_context)
|
|
|
|
# Build user message
|
|
user_message = self._build_user_message(prompt, context)
|
|
|
|
# Call Gemini
|
|
raw_response = await self.llm.invoke(system_prompt, user_message)
|
|
|
|
# Parse the JSON response
|
|
result = self._parse_response(raw_response, prompt, project_id, source_agent, target_agent)
|
|
|
|
# Update context store (if project-aware)
|
|
if project_id:
|
|
analysis_dict = result.model_dump()
|
|
self.context_store.append_history(project_id, analysis_dict)
|
|
self.context_store.update_patterns(project_id, analysis_dict)
|
|
if source_agent:
|
|
self.context_store.update_agent_context(
|
|
project_id, source_agent, analysis_dict
|
|
)
|
|
|
|
return result
|
|
|
|
def _build_user_message(
|
|
self, prompt: str, context: Optional[str] = None
|
|
) -> str:
|
|
"""Build the user message sent to Claude."""
|
|
parts = ["PROMPT TO ANALYZE:\n---"]
|
|
parts.append(prompt)
|
|
parts.append("---")
|
|
|
|
if context:
|
|
parts.append(f"\nCONTEXT/GOAL: {context}")
|
|
|
|
return "\n".join(parts)
|
|
|
|
def _extract_json(self, raw: str) -> str:
|
|
"""Extract JSON from Claude's response, handling various formatting."""
|
|
import re
|
|
|
|
cleaned = raw.strip()
|
|
|
|
# 1. Remove markdown code fences (```json ... ``` or ``` ... ```)
|
|
fence_pattern = re.compile(r'```(?:json)?\s*\n?(.*?)\n?\s*```', re.DOTALL)
|
|
match = fence_pattern.search(cleaned)
|
|
if match:
|
|
cleaned = match.group(1).strip()
|
|
|
|
# 2. If the response doesn't start with {, try to find the JSON object
|
|
if not cleaned.startswith("{"):
|
|
brace_start = cleaned.find("{")
|
|
if brace_start != -1:
|
|
cleaned = cleaned[brace_start:]
|
|
|
|
# 3. Find the matching closing brace
|
|
if cleaned.startswith("{"):
|
|
depth = 0
|
|
in_string = False
|
|
escape = False
|
|
end_pos = len(cleaned)
|
|
for i, ch in enumerate(cleaned):
|
|
if escape:
|
|
escape = False
|
|
continue
|
|
if ch == '\\' and in_string:
|
|
escape = True
|
|
continue
|
|
if ch == '"' and not escape:
|
|
in_string = not in_string
|
|
continue
|
|
if in_string:
|
|
continue
|
|
if ch == '{':
|
|
depth += 1
|
|
elif ch == '}':
|
|
depth -= 1
|
|
if depth == 0:
|
|
end_pos = i + 1
|
|
break
|
|
cleaned = cleaned[:end_pos]
|
|
|
|
# 4. Fix trailing commas before } or ] (common LLM mistake)
|
|
cleaned = re.sub(r',\s*([}\]])', r'\1', cleaned)
|
|
|
|
return cleaned
|
|
|
|
def _parse_response(
|
|
self,
|
|
raw: str,
|
|
original_prompt: str,
|
|
project_id: Optional[str],
|
|
source_agent: Optional[str],
|
|
target_agent: Optional[str],
|
|
) -> AnalysisResult:
|
|
"""Parse Claude's JSON response into an AnalysisResult."""
|
|
cleaned = self._extract_json(raw)
|
|
|
|
try:
|
|
data = json.loads(cleaned)
|
|
except json.JSONDecodeError as e:
|
|
logger.error("Failed to parse Claude response as JSON: %s", e)
|
|
logger.error("Raw response (first 500 chars): %s", raw[:500])
|
|
logger.error("Cleaned response (first 500 chars): %s", cleaned[:500])
|
|
# Return a fallback result
|
|
return self._fallback_result(original_prompt, str(e), project_id, source_agent, target_agent)
|
|
|
|
# Build structured result
|
|
try:
|
|
scores_data = data.get("scores", {})
|
|
scores = Scores(
|
|
clarity=Score(**scores_data.get("clarity", {"score": 0, "reasoning": "N/A"})),
|
|
token_efficiency=Score(**scores_data.get("token_efficiency", {"score": 0, "reasoning": "N/A"})),
|
|
goal_alignment=Score(**scores_data.get("goal_alignment", {"score": 0, "reasoning": "N/A"})),
|
|
structure=Score(**scores_data.get("structure", {"score": 0, "reasoning": "N/A"})),
|
|
vagueness_index=Score(**scores_data.get("vagueness_index", {"score": 0, "reasoning": "N/A"})),
|
|
)
|
|
|
|
mistakes = [
|
|
Mistake(**m) for m in data.get("mistakes", [])
|
|
]
|
|
|
|
rewritten = data.get("rewritten_prompt", original_prompt)
|
|
original_tokens = self._count_tokens(original_prompt)
|
|
rewritten_tokens = self._count_tokens(rewritten)
|
|
savings = (
|
|
round((1 - rewritten_tokens / original_tokens) * 100, 1)
|
|
if original_tokens > 0
|
|
else 0.0
|
|
)
|
|
|
|
return AnalysisResult(
|
|
original_prompt=original_prompt,
|
|
overall_score=data.get("overall_score", 0),
|
|
scores=scores,
|
|
mistakes=mistakes,
|
|
rewritten_prompt=rewritten,
|
|
token_comparison=TokenComparison(
|
|
original_tokens=original_tokens,
|
|
rewritten_tokens=rewritten_tokens,
|
|
savings_percent=savings,
|
|
),
|
|
metadata=AnalysisMetadata(
|
|
project_id=project_id,
|
|
source_agent=source_agent,
|
|
target_agent=target_agent,
|
|
mode="agent" if source_agent else "human",
|
|
),
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to build AnalysisResult: %s", e)
|
|
return self._fallback_result(original_prompt, str(e), project_id, source_agent, target_agent)
|
|
|
|
def _fallback_result(
|
|
self,
|
|
prompt: str,
|
|
error: str,
|
|
project_id: Optional[str],
|
|
source_agent: Optional[str],
|
|
target_agent: Optional[str],
|
|
) -> AnalysisResult:
|
|
"""Return a fallback result when parsing fails."""
|
|
fallback_score = Score(score=0, reasoning=f"Analysis failed: {error}")
|
|
return AnalysisResult(
|
|
original_prompt=prompt,
|
|
overall_score=0,
|
|
scores=Scores(
|
|
clarity=fallback_score,
|
|
token_efficiency=fallback_score,
|
|
goal_alignment=fallback_score,
|
|
structure=fallback_score,
|
|
vagueness_index=fallback_score,
|
|
),
|
|
mistakes=[
|
|
Mistake(
|
|
type="analysis_error",
|
|
text=None,
|
|
suggestion=f"Re-run analysis. Error: {error}",
|
|
)
|
|
],
|
|
rewritten_prompt=prompt,
|
|
token_comparison=TokenComparison(
|
|
original_tokens=self._count_tokens(prompt),
|
|
rewritten_tokens=self._count_tokens(prompt),
|
|
savings_percent=0.0,
|
|
),
|
|
metadata=AnalysisMetadata(
|
|
project_id=project_id,
|
|
source_agent=source_agent,
|
|
target_agent=target_agent,
|
|
mode="agent" if source_agent else "human",
|
|
),
|
|
)
|