File size: 2,763 Bytes
5e0532d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import asyncio
import json
import os
from datetime import datetime
from app.services.llm import LLMService
from app.core.config import settings

class BenchmarkManager:
    TEST_QUERIES = [
        "What is the biblical basis for Grace vs Works?",
        "Explain the role of the Holy Spirit in a believer's daily life.",
        "How should a Christian respond to suffering according to the book of Job?",
        "Explain the concept of 'Stillness' or 'Sabbath' in modern terms.",
        "What does the Bible say about caring for the poor and marginalized?"
    ]
    
    RESULTS_DIR = "important/benchmarks"

    def __init__(self):
        if not os.path.exists(self.RESULTS_DIR):
            os.makedirs(self.RESULTS_DIR, exist_ok=True)
        self.llm = LLMService()

    async def run_benchmark(self, model_name: str):
        print(f"\n{'='*50}")
        print(f"Benchmark: Running tests for model {model_name}...")
        print(f"{'='*50}")
        
        # Patch the model name in settings
        original_model = settings.MODEL_NAME
        settings.MODEL_NAME = model_name
        
        results = {
            "model": model_name,
            "timestamp": datetime.now().isoformat(),
            "tests": []
        }
        
        for query in self.TEST_QUERIES:
            print(f"Testing query: {query}")
            try:
                # Re-initialize or clear offline state if needed
                self.llm.is_offline = False 
                response = await self.llm.generate_response(query)
                results["tests"].append({
                    "query": query,
                    "response": response.get("content", ""),
                    "tool_calls": str(response.get("tool_calls", []))
                })
            except Exception as e:
                print(f"Error during benchmark: {str(e)}")
                results["tests"].append({"query": query, "error": str(e)})

        # Restore original settings
        settings.MODEL_NAME = original_model

        # Save results
        safe_model_name = model_name.replace(':', '_').replace('/', '_')
        filename = f"benchmark_{safe_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(os.path.join(self.RESULTS_DIR, filename), "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2)
        print(f"Benchmark: Results saved to {filename}")

if __name__ == "__main__":
    benchmark = BenchmarkManager()
    
    async def main():
        print("Starting ORA Model Benchmarks...")
        
        # 2. Test Gabriel-Mini (llama3.2:1b)
        await benchmark.run_benchmark("llama3.2:1b")
        
        print("\nBenchmarks complete. Visit important/benchmarks/ to see the reports.")

    asyncio.run(main())