prompt-analyser/prompt_analyzer/analyzer.py

"""
Core Prompt Analyzer agent.

Takes a prompt, calls Claude via the Anthropic API, and returns
structured analysis: scores, mistakes, rewritten prompt, and token comparison.
"""

import json
import logging
from typing import Optional

import tiktoken

from prompt_analyzer.anthropic_client import AnthropicClient
from prompt_analyzer.context_store import ContextStore
from prompt_analyzer.models import (
    AnalysisResult,
    AnalysisMetadata,
    Scores,
    Score,
    Mistake,
    TokenComparison,
)

logger = logging.getLogger(__name__)

SYSTEM_PROMPT = """You are an expert Prompt Quality Analyzer. Your job is to analyze a given prompt and return a structured JSON assessment.

ANALYZE THE PROMPT ON THESE 5 DIMENSIONS (score each 0-100):

1. **Clarity** (0-100): How unambiguous is the prompt? Can it be misinterpreted? Are instructions precise?
2. **Token Efficiency** (0-100): How concise is the prompt? Are there redundant words, repeated instructions, or unnecessary filler? Higher = more efficient.
3. **Goal Alignment** (0-100): Does the prompt clearly state what output is expected? Is the desired result format, length, and style specified?
4. **Structure** (0-100): Is the prompt well-organized? Does it have logical flow, proper sections, and clear instruction ordering?
5. **Vagueness Index** (0-100): How many vague/ambiguous phrases exist? ("make it good", "do something nice", "be creative"). Score 0 = extremely vague, 100 = no vagueness at all.

ALSO:
- **Identify specific mistakes** in the prompt. For each mistake, provide:
  - `type`: one of: vague_instruction, missing_context, redundancy, contradiction, poor_formatting, missing_output_format, unclear_scope, overly_complex
  - `text`: the exact problematic text from the prompt (null if the mistake is about something missing)
  - `suggestion`: a concrete fix

- **Rewrite the prompt** to be optimal — maximum clarity, minimum tokens, best structure. The rewrite should accomplish the exact same goal as the original.

{project_context}

RESPOND WITH ONLY VALID JSON in this exact format (no markdown, no code fences, just the JSON):
{{
  "overall_score": <number 0-100, weighted average: clarity 25%, token_efficiency 20%, goal_alignment 25%, structure 15%, vagueness_index 15%>,
  "scores": {{
    "clarity": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
    "token_efficiency": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
    "goal_alignment": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
    "structure": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }},
    "vagueness_index": {{ "score": <0-100>, "reasoning": "<1-2 sentences>" }}
  }},
  "mistakes": [
    {{ "type": "<type>", "text": "<problematic text or null>", "suggestion": "<fix>" }}
  ],
  "rewritten_prompt": "<the optimized version of the prompt>"
}}"""


class PromptAnalyzer:
    """
    AI-powered prompt quality analyzer.

    Usage:
        analyzer = PromptAnalyzer()
        result = await analyzer.analyze("Your prompt here")

    For context-aware analysis (enterprise):
        result = await analyzer.analyze(
            prompt="...",
            project_id="customer_support",
            source_agent="planner",
        )
    """

    def __init__(self):
        self.llm = AnthropicClient()
        self.context_store = ContextStore()
        # Use cl100k_base tokenizer (closest to Claude's tokenization)
        try:
            self.tokenizer = tiktoken.get_encoding("cl100k_base")
        except Exception:
            self.tokenizer = None
            logger.warning("tiktoken not available, token counts will be estimated")

    def _count_tokens(self, text: str) -> int:
        """Count tokens in text."""
        if self.tokenizer:
            return len(self.tokenizer.encode(text))
        # Rough estimate: ~4 chars per token
        return len(text) // 4

    async def analyze(
        self,
        prompt: str,
        context: Optional[str] = None,
        project_id: Optional[str] = None,
        source_agent: Optional[str] = None,
        target_agent: Optional[str] = None,
    ) -> AnalysisResult:
        """
        Analyze a prompt and return structured quality assessment.

        Args:
            prompt: The prompt to analyze
            context: Optional goal/context for the prompt
            project_id: Optional project ID for context-aware analysis
            source_agent: Optional agent that authored this prompt
            target_agent: Optional agent this prompt is directed to

        Returns:
            AnalysisResult with scores, mistakes, and rewritten prompt
        """
        logger.info(
            "Analyzing prompt (length=%d, project=%s, agent=%s)",
            len(prompt),
            project_id,
            source_agent,
        )

        # Build context-aware system prompt
        project_context = self.context_store.build_context_summary(
            project_id, source_agent
        )
        system_prompt = SYSTEM_PROMPT.format(project_context=project_context)

        # Build user message
        user_message = self._build_user_message(prompt, context)

        # Call Gemini
        raw_response = await self.llm.invoke(system_prompt, user_message)

        # Parse the JSON response
        result = self._parse_response(raw_response, prompt, project_id, source_agent, target_agent)

        # Update context store (if project-aware)
        if project_id:
            analysis_dict = result.model_dump()
            self.context_store.append_history(project_id, analysis_dict)
            self.context_store.update_patterns(project_id, analysis_dict)
            if source_agent:
                self.context_store.update_agent_context(
                    project_id, source_agent, analysis_dict
                )

        return result

    def _build_user_message(
        self, prompt: str, context: Optional[str] = None
    ) -> str:
        """Build the user message sent to Claude."""
        parts = ["PROMPT TO ANALYZE:\n---"]
        parts.append(prompt)
        parts.append("---")

        if context:
            parts.append(f"\nCONTEXT/GOAL: {context}")

        return "\n".join(parts)

    def _extract_json(self, raw: str) -> str:
        """Extract JSON from Claude's response, handling various formatting."""
        import re

        cleaned = raw.strip()

        # 1. Remove markdown code fences (```json ... ``` or ``` ... ```)
        fence_pattern = re.compile(r'```(?:json)?\s*\n?(.*?)\n?\s*```', re.DOTALL)
        match = fence_pattern.search(cleaned)
        if match:
            cleaned = match.group(1).strip()

        # 2. If the response doesn't start with {, try to find the JSON object
        if not cleaned.startswith("{"):
            brace_start = cleaned.find("{")
            if brace_start != -1:
                cleaned = cleaned[brace_start:]

        # 3. Find the matching closing brace
        if cleaned.startswith("{"):
            depth = 0
            in_string = False
            escape = False
            end_pos = len(cleaned)
            for i, ch in enumerate(cleaned):
                if escape:
                    escape = False
                    continue
                if ch == '\\' and in_string:
                    escape = True
                    continue
                if ch == '"' and not escape:
                    in_string = not in_string
                    continue
                if in_string:
                    continue
                if ch == '{':
                    depth += 1
                elif ch == '}':
                    depth -= 1
                    if depth == 0:
                        end_pos = i + 1
                        break
            cleaned = cleaned[:end_pos]

        # 4. Fix trailing commas before } or ] (common LLM mistake)
        cleaned = re.sub(r',\s*([}\]])', r'\1', cleaned)

        return cleaned

    def _parse_response(
        self,
        raw: str,
        original_prompt: str,
        project_id: Optional[str],
        source_agent: Optional[str],
        target_agent: Optional[str],
    ) -> AnalysisResult:
        """Parse Claude's JSON response into an AnalysisResult."""
        cleaned = self._extract_json(raw)

        try:
            data = json.loads(cleaned)
        except json.JSONDecodeError as e:
            logger.error("Failed to parse Claude response as JSON: %s", e)
            logger.error("Raw response (first 500 chars): %s", raw[:500])
            logger.error("Cleaned response (first 500 chars): %s", cleaned[:500])
            # Return a fallback result
            return self._fallback_result(original_prompt, str(e), project_id, source_agent, target_agent)

        # Build structured result
        try:
            scores_data = data.get("scores", {})
            scores = Scores(
                clarity=Score(**scores_data.get("clarity", {"score": 0, "reasoning": "N/A"})),
                token_efficiency=Score(**scores_data.get("token_efficiency", {"score": 0, "reasoning": "N/A"})),
                goal_alignment=Score(**scores_data.get("goal_alignment", {"score": 0, "reasoning": "N/A"})),
                structure=Score(**scores_data.get("structure", {"score": 0, "reasoning": "N/A"})),
                vagueness_index=Score(**scores_data.get("vagueness_index", {"score": 0, "reasoning": "N/A"})),
            )

            mistakes = [
                Mistake(**m) for m in data.get("mistakes", [])
            ]

            rewritten = data.get("rewritten_prompt", original_prompt)
            original_tokens = self._count_tokens(original_prompt)
            rewritten_tokens = self._count_tokens(rewritten)
            savings = (
                round((1 - rewritten_tokens / original_tokens) * 100, 1)
                if original_tokens > 0
                else 0.0
            )

            return AnalysisResult(
                original_prompt=original_prompt,
                overall_score=data.get("overall_score", 0),
                scores=scores,
                mistakes=mistakes,
                rewritten_prompt=rewritten,
                token_comparison=TokenComparison(
                    original_tokens=original_tokens,
                    rewritten_tokens=rewritten_tokens,
                    savings_percent=savings,
                ),
                metadata=AnalysisMetadata(
                    project_id=project_id,
                    source_agent=source_agent,
                    target_agent=target_agent,
                    mode="agent" if source_agent else "human",
                ),
            )

        except Exception as e:
            logger.error("Failed to build AnalysisResult: %s", e)
            return self._fallback_result(original_prompt, str(e), project_id, source_agent, target_agent)

    def _fallback_result(
        self,
        prompt: str,
        error: str,
        project_id: Optional[str],
        source_agent: Optional[str],
        target_agent: Optional[str],
    ) -> AnalysisResult:
        """Return a fallback result when parsing fails."""
        fallback_score = Score(score=0, reasoning=f"Analysis failed: {error}")
        return AnalysisResult(
            original_prompt=prompt,
            overall_score=0,
            scores=Scores(
                clarity=fallback_score,
                token_efficiency=fallback_score,
                goal_alignment=fallback_score,
                structure=fallback_score,
                vagueness_index=fallback_score,
            ),
            mistakes=[
                Mistake(
                    type="analysis_error",
                    text=None,
                    suggestion=f"Re-run analysis. Error: {error}",
                )
            ],
            rewritten_prompt=prompt,
            token_comparison=TokenComparison(
                original_tokens=self._count_tokens(prompt),
                rewritten_tokens=self._count_tokens(prompt),
                savings_percent=0.0,
            ),
            metadata=AnalysisMetadata(
                project_id=project_id,
                source_agent=source_agent,
                target_agent=target_agent,
                mode="agent" if source_agent else "human",
            ),
        )