Spaces:

Abdalkaderdev
/

ORA

Sleeping

App Files Files Community

ORA / scripts /benchmark_models.py

Abdalkaderdev

Initial ORA deployment

5e0532d 8 days ago

raw

history blame contribute delete

2.76 kB

	import asyncio
	import json
	import os
	from datetime import datetime
	from app.services.llm import LLMService
	from app.core.config import settings

	class BenchmarkManager:
	TEST_QUERIES = [
	"What is the biblical basis for Grace vs Works?",
	"Explain the role of the Holy Spirit in a believer's daily life.",
	"How should a Christian respond to suffering according to the book of Job?",
	"Explain the concept of 'Stillness' or 'Sabbath' in modern terms.",
	"What does the Bible say about caring for the poor and marginalized?"
	]

	RESULTS_DIR = "important/benchmarks"

	def __init__(self):
	if not os.path.exists(self.RESULTS_DIR):
	os.makedirs(self.RESULTS_DIR, exist_ok=True)
	self.llm = LLMService()

	async def run_benchmark(self, model_name: str):
	print(f"\n{'='*50}")
	print(f"Benchmark: Running tests for model {model_name}...")
	print(f"{'='*50}")

	# Patch the model name in settings
	original_model = settings.MODEL_NAME
	settings.MODEL_NAME = model_name

	results = {
	"model": model_name,
	"timestamp": datetime.now().isoformat(),
	"tests": []
	}

	for query in self.TEST_QUERIES:
	print(f"Testing query: {query}")
	try:
	# Re-initialize or clear offline state if needed
	self.llm.is_offline = False
	response = await self.llm.generate_response(query)
	results["tests"].append({
	"query": query,
	"response": response.get("content", ""),
	"tool_calls": str(response.get("tool_calls", []))
	})
	except Exception as e:
	print(f"Error during benchmark: {str(e)}")
	results["tests"].append({"query": query, "error": str(e)})

	# Restore original settings
	settings.MODEL_NAME = original_model

	# Save results
	safe_model_name = model_name.replace(':', '_').replace('/', '_')
	filename = f"benchmark_{safe_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
	with open(os.path.join(self.RESULTS_DIR, filename), "w", encoding="utf-8") as f:
	json.dump(results, f, indent=2)
	print(f"Benchmark: Results saved to {filename}")

	if __name__ == "__main__":
	benchmark = BenchmarkManager()

	async def main():
	print("Starting ORA Model Benchmarks...")

	# 2. Test Gabriel-Mini (llama3.2:1b)
	await benchmark.run_benchmark("llama3.2:1b")

	print("\nBenchmarks complete. Visit important/benchmarks/ to see the reports.")

	asyncio.run(main())