ORA / scripts /benchmark_models.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
import asyncio
import json
import os
from datetime import datetime
from app.services.llm import LLMService
from app.core.config import settings
class BenchmarkManager:
TEST_QUERIES = [
"What is the biblical basis for Grace vs Works?",
"Explain the role of the Holy Spirit in a believer's daily life.",
"How should a Christian respond to suffering according to the book of Job?",
"Explain the concept of 'Stillness' or 'Sabbath' in modern terms.",
"What does the Bible say about caring for the poor and marginalized?"
]
RESULTS_DIR = "important/benchmarks"
def __init__(self):
if not os.path.exists(self.RESULTS_DIR):
os.makedirs(self.RESULTS_DIR, exist_ok=True)
self.llm = LLMService()
async def run_benchmark(self, model_name: str):
print(f"\n{'='*50}")
print(f"Benchmark: Running tests for model {model_name}...")
print(f"{'='*50}")
# Patch the model name in settings
original_model = settings.MODEL_NAME
settings.MODEL_NAME = model_name
results = {
"model": model_name,
"timestamp": datetime.now().isoformat(),
"tests": []
}
for query in self.TEST_QUERIES:
print(f"Testing query: {query}")
try:
# Re-initialize or clear offline state if needed
self.llm.is_offline = False
response = await self.llm.generate_response(query)
results["tests"].append({
"query": query,
"response": response.get("content", ""),
"tool_calls": str(response.get("tool_calls", []))
})
except Exception as e:
print(f"Error during benchmark: {str(e)}")
results["tests"].append({"query": query, "error": str(e)})
# Restore original settings
settings.MODEL_NAME = original_model
# Save results
safe_model_name = model_name.replace(':', '_').replace('/', '_')
filename = f"benchmark_{safe_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(os.path.join(self.RESULTS_DIR, filename), "w", encoding="utf-8") as f:
json.dump(results, f, indent=2)
print(f"Benchmark: Results saved to {filename}")
if __name__ == "__main__":
benchmark = BenchmarkManager()
async def main():
print("Starting ORA Model Benchmarks...")
# 2. Test Gabriel-Mini (llama3.2:1b)
await benchmark.run_benchmark("llama3.2:1b")
print("\nBenchmarks complete. Visit important/benchmarks/ to see the reports.")
asyncio.run(main())