from benchmarking import BenchmarkEvaluator

def main():
    # Configuration
    model_name = "aaditya/Llama3-OpenBioLLM-8B"
    FILE_PATH = "_Benchmarking_DB.json"

    # Initialize evaluator
    evaluator = BenchmarkEvaluator(model_name, model_type)

    try:
        # Run benchmark
        results = evaluator.run_benchmark(FILE_PATH)
        
        # Print results
        print("\nBenchmark Results:")
        print(f"Accuracy: {results['accuracy']:.2%}")
        print(f"Total samples: {results['total_samples']}")
        print(f"Processed samples: {results['processed_samples']}")
        
    except Exception as e:
        logger.error(f"Error during benchmarking: {e}")

if __name__ == "__main__":
    main()


        # instruct_client = VLLMClient(
    #     model_path="meta-llama/Llama-3.2-3B-Instruct",
    #     model_type=ModelType.INSTRUCT
    # )
    
    # # Single prompt with instruct model
    # response = instruct_client.send_message(
    #     system="You are a helpful assistant.",
    #     content="What is the capital of France?",
    #     max_tokens=16000,
    #     temperature=0.0
    # )
    # print("Instruct model response:", response)