Spaces:
Sleeping
Sleeping
| """ | |
| Compares different LLM backbones (Llama 3, Mixtral, Gemma). | |
| """ | |
| import sys | |
| import os | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from core.config import settings | |
| from modules.reasoning_engine import create_reasoning_engine | |
| from evaluation.metrics import MetricsRunner | |
| def run_comparison(agent_name: str, queries: list): | |
| backbones = ["llama3", "mixtral", "gemma"] | |
| metrics = MetricsRunner() | |
| original_backbone = getattr(settings, "LLM_BACKBONE", None) | |
| try: | |
| for bb in backbones: | |
| settings.LLM_BACKBONE = bb | |
| print(f"\n--- Testing Backbone: {bb} ---") | |
| try: | |
| # Must recreate engine so GroqClient picks up config | |
| engine = create_reasoning_engine() | |
| for q in queries: | |
| res = engine.reason(agent_name, q) | |
| faithfulness = metrics.extract_faithfulness(res) | |
| print(f"Q: {q}") | |
| print(f"A ({bb}): {res['answer'][:100]}...") | |
| if faithfulness is None: | |
| print("Faithfulness: N/A") | |
| else: | |
| print(f"Faithfulness: {faithfulness:.3f}") | |
| except Exception as e: | |
| print(f"Failed to run with backbone {bb}: {e}") | |
| finally: | |
| settings.LLM_BACKBONE = original_backbone | |
| print(f"\nRestored LLM_BACKBONE to: {original_backbone}") | |
| if __name__ == "__main__": | |
| test_queries = ["What are the symptoms of a common cold?"] | |
| # Replace 'medical_agent' with an actual compiled agent name | |
| run_comparison("medical_agent", test_queries) | |