import asyncio import json import os from datetime import datetime from app.services.llm import LLMService from app.core.config import settings class BenchmarkManager: TEST_QUERIES = [ "What is the biblical basis for Grace vs Works?", "Explain the role of the Holy Spirit in a believer's daily life.", "How should a Christian respond to suffering according to the book of Job?", "Explain the concept of 'Stillness' or 'Sabbath' in modern terms.", "What does the Bible say about caring for the poor and marginalized?" ] RESULTS_DIR = "important/benchmarks" def __init__(self): if not os.path.exists(self.RESULTS_DIR): os.makedirs(self.RESULTS_DIR, exist_ok=True) self.llm = LLMService() async def run_benchmark(self, model_name: str): print(f"\n{'='*50}") print(f"Benchmark: Running tests for model {model_name}...") print(f"{'='*50}") # Patch the model name in settings original_model = settings.MODEL_NAME settings.MODEL_NAME = model_name results = { "model": model_name, "timestamp": datetime.now().isoformat(), "tests": [] } for query in self.TEST_QUERIES: print(f"Testing query: {query}") try: # Re-initialize or clear offline state if needed self.llm.is_offline = False response = await self.llm.generate_response(query) results["tests"].append({ "query": query, "response": response.get("content", ""), "tool_calls": str(response.get("tool_calls", [])) }) except Exception as e: print(f"Error during benchmark: {str(e)}") results["tests"].append({"query": query, "error": str(e)}) # Restore original settings settings.MODEL_NAME = original_model # Save results safe_model_name = model_name.replace(':', '_').replace('/', '_') filename = f"benchmark_{safe_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(os.path.join(self.RESULTS_DIR, filename), "w", encoding="utf-8") as f: json.dump(results, f, indent=2) print(f"Benchmark: Results saved to {filename}") if __name__ == "__main__": benchmark = BenchmarkManager() async def main(): print("Starting ORA Model Benchmarks...") # 2. Test Gabriel-Mini (llama3.2:1b) await benchmark.run_benchmark("llama3.2:1b") print("\nBenchmarks complete. Visit important/benchmarks/ to see the reports.") asyncio.run(main())