Spaces:
Sleeping
Sleeping
File size: 3,087 Bytes
d4b664a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | from sqlalchemy import Column, Integer, String, Float, Boolean, DateTime, Text, JSON
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
Base = declarative_base()
class ConversationLog(Base):
__tablename__ = "conversation_logs"
id = Column(Integer, primary_key=True, index=True)
session_id = Column(String(64), index=True)
query = Column(Text, nullable=False)
response = Column(Text, nullable=False)
category = Column(String(64))
prompt_version = Column(String(32), default="standard")
retry_count = Column(Integer, default=0)
created_at = Column(DateTime, default=datetime.utcnow)
class EvaluationResult(Base):
__tablename__ = "evaluation_results"
id = Column(Integer, primary_key=True, index=True)
conversation_id = Column(Integer, index=True)
session_id = Column(String(64), index=True)
query = Column(Text, nullable=False)
response = Column(Text, nullable=False)
# Code eval
code_eval_result = Column(String(16)) # PASS/FAIL
code_eval_details = Column(Text)
# LLM Judge scores
policy_compliance = Column(Float, default=0.0)
faithfulness = Column(Float, default=0.0)
relevance = Column(Float, default=0.0)
tone = Column(Float, default=0.0)
correctness = Column(Float, default=0.0)
judge_verdict = Column(String(16))
judge_reasoning = Column(Text)
# Hallucination
hallucination_detected = Column(Boolean, default=False)
hallucination_details = Column(Text)
hallucination_severity = Column(String(16), default="none")
# Trust score
trust_score = Column(Float, default=0.0)
final_verdict = Column(String(16)) # PASS/FAIL
# Retry
is_retry = Column(Boolean, default=False)
original_eval_id = Column(Integer, nullable=True)
retry_trust_score = Column(Float, nullable=True)
score_improvement = Column(Float, nullable=True)
# Deployment
deployment_ready = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.utcnow)
class FailureLog(Base):
__tablename__ = "failure_logs"
id = Column(Integer, primary_key=True, index=True)
eval_id = Column(Integer, index=True)
session_id = Column(String(64))
query = Column(Text)
response = Column(Text)
primary_failure_reason = Column(Text)
policy_violations = Column(JSON)
hallucinations = Column(JSON)
improvement_suggestions = Column(JSON)
corrected_response = Column(Text)
severity = Column(String(16))
created_at = Column(DateTime, default=datetime.utcnow)
class PromptOptimizationLog(Base):
__tablename__ = "prompt_optimization_logs"
id = Column(Integer, primary_key=True, index=True)
session_id = Column(String(64))
query = Column(Text)
original_response = Column(Text)
original_trust_score = Column(Float)
optimized_response = Column(Text)
optimized_trust_score = Column(Float)
score_improvement = Column(Float)
prompt_version_used = Column(String(32))
created_at = Column(DateTime, default=datetime.utcnow)
|