289 lines
10 KiB
Python
289 lines
10 KiB
Python
"""SQLite database for storing analysis results and aggregations."""
|
|
|
|
import aiosqlite
|
|
import json
|
|
import logging
|
|
from typing import Optional
|
|
|
|
from prompt_analyzer.config import ANALYTICS_DB_PATH
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DB_PATH = ANALYTICS_DB_PATH
|
|
|
|
CREATE_TABLE_SQL = """
|
|
CREATE TABLE IF NOT EXISTS analyses (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
timestamp TEXT NOT NULL,
|
|
mode TEXT NOT NULL DEFAULT 'human',
|
|
source_agent TEXT,
|
|
target_agent TEXT,
|
|
project_id TEXT,
|
|
original_prompt TEXT NOT NULL,
|
|
rewritten_prompt TEXT,
|
|
overall_score INTEGER NOT NULL DEFAULT 0,
|
|
clarity INTEGER NOT NULL DEFAULT 0,
|
|
token_efficiency INTEGER NOT NULL DEFAULT 0,
|
|
goal_alignment INTEGER NOT NULL DEFAULT 0,
|
|
structure INTEGER NOT NULL DEFAULT 0,
|
|
vagueness_index INTEGER NOT NULL DEFAULT 0,
|
|
mistake_count INTEGER NOT NULL DEFAULT 0,
|
|
mistakes_json TEXT,
|
|
original_tokens INTEGER NOT NULL DEFAULT 0,
|
|
rewritten_tokens INTEGER NOT NULL DEFAULT 0,
|
|
token_savings_percent REAL NOT NULL DEFAULT 0.0,
|
|
rewrite_used INTEGER,
|
|
full_result_json TEXT
|
|
);
|
|
"""
|
|
|
|
CREATE_INDEX_SQL = [
|
|
"CREATE INDEX IF NOT EXISTS idx_timestamp ON analyses(timestamp);",
|
|
"CREATE INDEX IF NOT EXISTS idx_project ON analyses(project_id);",
|
|
"CREATE INDEX IF NOT EXISTS idx_source ON analyses(source_agent);",
|
|
]
|
|
|
|
|
|
async def init_db():
|
|
"""Initialize the database and create tables."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
await db.execute(CREATE_TABLE_SQL)
|
|
for idx_sql in CREATE_INDEX_SQL:
|
|
await db.execute(idx_sql)
|
|
await db.commit()
|
|
logger.info("Database initialized at %s", DB_PATH)
|
|
|
|
|
|
async def store_analysis(result_dict: dict) -> int:
|
|
"""Store an analysis result and return its ID."""
|
|
scores = result_dict.get("scores", {})
|
|
meta = result_dict.get("metadata", {})
|
|
tc = result_dict.get("token_comparison", {})
|
|
|
|
def _get_score(dim: str) -> int:
|
|
val = scores.get(dim, {})
|
|
if isinstance(val, dict):
|
|
return val.get("score", 0)
|
|
return 0
|
|
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
cursor = await db.execute(
|
|
"""INSERT INTO analyses (
|
|
timestamp, mode, source_agent, target_agent, project_id,
|
|
original_prompt, rewritten_prompt,
|
|
overall_score, clarity, token_efficiency, goal_alignment,
|
|
structure, vagueness_index,
|
|
mistake_count, mistakes_json,
|
|
original_tokens, rewritten_tokens, token_savings_percent,
|
|
full_result_json
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
(
|
|
meta.get("timestamp", ""),
|
|
meta.get("mode", "human"),
|
|
meta.get("source_agent"),
|
|
meta.get("target_agent"),
|
|
meta.get("project_id"),
|
|
result_dict.get("original_prompt", ""),
|
|
result_dict.get("rewritten_prompt", ""),
|
|
result_dict.get("overall_score", 0),
|
|
_get_score("clarity"),
|
|
_get_score("token_efficiency"),
|
|
_get_score("goal_alignment"),
|
|
_get_score("structure"),
|
|
_get_score("vagueness_index"),
|
|
len(result_dict.get("mistakes", [])),
|
|
json.dumps(result_dict.get("mistakes", []), default=str),
|
|
tc.get("original_tokens", 0),
|
|
tc.get("rewritten_tokens", 0),
|
|
tc.get("savings_percent", 0.0),
|
|
json.dumps(result_dict, default=str),
|
|
),
|
|
)
|
|
await db.commit()
|
|
row_id = cursor.lastrowid
|
|
logger.info("Stored analysis id=%d", row_id)
|
|
return row_id
|
|
|
|
|
|
async def get_interactions(
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
project_id: Optional[str] = None,
|
|
) -> list[dict]:
|
|
"""Get paginated interaction rows."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
if project_id:
|
|
cursor = await db.execute(
|
|
"SELECT * FROM analyses WHERE project_id = ? ORDER BY id DESC LIMIT ? OFFSET ?",
|
|
(project_id, limit, offset),
|
|
)
|
|
else:
|
|
cursor = await db.execute(
|
|
"SELECT * FROM analyses ORDER BY id DESC LIMIT ? OFFSET ?",
|
|
(limit, offset),
|
|
)
|
|
rows = await cursor.fetchall()
|
|
return [dict(row) for row in rows]
|
|
|
|
|
|
async def get_total_count(project_id: Optional[str] = None) -> int:
|
|
"""Get total interaction count."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
if project_id:
|
|
cursor = await db.execute(
|
|
"SELECT COUNT(*) FROM analyses WHERE project_id = ?",
|
|
(project_id,),
|
|
)
|
|
else:
|
|
cursor = await db.execute("SELECT COUNT(*) FROM analyses")
|
|
row = await cursor.fetchone()
|
|
return row[0] if row else 0
|
|
|
|
|
|
async def get_overview_stats() -> dict:
|
|
"""Get aggregate stats for the dashboard overview."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
cursor = await db.execute("""
|
|
SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN mode = 'human' THEN 1 ELSE 0 END) as human_count,
|
|
SUM(CASE WHEN mode = 'agent' THEN 1 ELSE 0 END) as agent_count,
|
|
AVG(overall_score) as avg_score,
|
|
AVG(token_savings_percent) as avg_savings,
|
|
SUM(mistake_count) as total_mistakes,
|
|
SUM(CASE WHEN rewrite_used = 1 THEN 1 ELSE 0 END) as rewrites_used,
|
|
SUM(CASE WHEN rewrite_used IS NOT NULL THEN 1 ELSE 0 END) as rewrites_decided,
|
|
SUM(original_tokens) as total_tokens,
|
|
AVG(original_tokens) as avg_tokens
|
|
FROM analyses
|
|
""")
|
|
row = await cursor.fetchone()
|
|
if not row or row[0] == 0:
|
|
return {
|
|
"total_interactions": 0,
|
|
"human_count": 0,
|
|
"agent_count": 0,
|
|
"avg_overall_score": 0,
|
|
"avg_token_savings": 0,
|
|
"rewrite_acceptance_rate": 0,
|
|
"total_mistakes_found": 0,
|
|
"total_tokens": 0,
|
|
"avg_tokens_per_prompt": 0,
|
|
}
|
|
return {
|
|
"total_interactions": row[0],
|
|
"human_count": row[1] or 0,
|
|
"agent_count": row[2] or 0,
|
|
"avg_overall_score": round(row[3] or 0, 1),
|
|
"avg_token_savings": round(row[4] or 0, 1),
|
|
"rewrite_acceptance_rate": round(
|
|
(row[5] / row[6] * 100) if row[6] and row[6] > 0 else 0, 1
|
|
),
|
|
"total_mistakes_found": row[7] or 0,
|
|
"total_tokens": row[8] or 0,
|
|
"avg_tokens_per_prompt": round(row[9] or 0, 1),
|
|
}
|
|
|
|
|
|
async def get_trends(hours: int = None, days: int = 30) -> list[dict]:
|
|
"""Get score trends over time. If hours is set, group by hour; otherwise by day."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
if hours is not None:
|
|
# Group by hour for short time ranges
|
|
cursor = await db.execute(
|
|
"""
|
|
SELECT
|
|
strftime('%Y-%m-%d %H:00', timestamp) as period,
|
|
AVG(overall_score) as avg_score,
|
|
COUNT(*) as count
|
|
FROM analyses
|
|
WHERE timestamp >= datetime('now', ?)
|
|
GROUP BY strftime('%Y-%m-%d %H:00', timestamp)
|
|
ORDER BY period ASC
|
|
""",
|
|
(f"-{hours} hours",),
|
|
)
|
|
else:
|
|
# Group by day for longer ranges
|
|
cursor = await db.execute(
|
|
"""
|
|
SELECT
|
|
DATE(timestamp) as period,
|
|
AVG(overall_score) as avg_score,
|
|
COUNT(*) as count
|
|
FROM analyses
|
|
WHERE timestamp >= datetime('now', ?)
|
|
GROUP BY DATE(timestamp)
|
|
ORDER BY period ASC
|
|
""",
|
|
(f"-{days} days",),
|
|
)
|
|
rows = await cursor.fetchall()
|
|
return [
|
|
{"date": row[0], "avg_score": round(row[1], 1), "count": row[2]}
|
|
for row in rows
|
|
]
|
|
|
|
|
|
async def get_mistake_frequencies(limit: int = 10) -> list[dict]:
|
|
"""Get the most common mistake types."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
cursor = await db.execute("SELECT mistakes_json FROM analyses WHERE mistakes_json IS NOT NULL")
|
|
rows = await cursor.fetchall()
|
|
|
|
counts: dict[str, int] = {}
|
|
for row in rows:
|
|
try:
|
|
mistakes = json.loads(row[0])
|
|
for m in mistakes:
|
|
mt = m.get("type", "unknown")
|
|
counts[mt] = counts.get(mt, 0) + 1
|
|
except (json.JSONDecodeError, TypeError):
|
|
continue
|
|
|
|
total = sum(counts.values()) or 1
|
|
sorted_counts = sorted(counts.items(), key=lambda x: -x[1])[:limit]
|
|
return [
|
|
{"type": k, "count": v, "percentage": round(v / total * 100, 1)}
|
|
for k, v in sorted_counts
|
|
]
|
|
|
|
|
|
async def get_agent_leaderboard() -> list[dict]:
|
|
"""Get per-agent statistics."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
cursor = await db.execute("""
|
|
SELECT
|
|
source_agent,
|
|
COUNT(*) as total_prompts,
|
|
AVG(overall_score) as avg_score
|
|
FROM analyses
|
|
WHERE source_agent IS NOT NULL
|
|
GROUP BY source_agent
|
|
ORDER BY avg_score DESC
|
|
""")
|
|
rows = await cursor.fetchall()
|
|
|
|
results = []
|
|
for row in rows:
|
|
results.append({
|
|
"agent_id": row[0],
|
|
"total_prompts": row[1],
|
|
"avg_score": round(row[2], 1),
|
|
"weakest_dimension": None,
|
|
"most_common_mistake": None,
|
|
"improvement_trend": "—",
|
|
})
|
|
return results
|
|
|
|
|
|
async def mark_rewrite_used(analysis_id: int, used: bool) -> None:
|
|
"""Mark whether the user chose the rewritten prompt."""
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
await db.execute(
|
|
"UPDATE analyses SET rewrite_used = ? WHERE id = ?",
|
|
(1 if used else 0, analysis_id),
|
|
)
|
|
await db.commit()
|