from benchmarking import BenchmarkEvaluator def main(): # Configuration model_name = "aaditya/Llama3-OpenBioLLM-8B" FILE_PATH = "_Benchmarking_DB.json" # Initialize evaluator evaluator = BenchmarkEvaluator(model_name, model_type) try: # Run benchmark results = evaluator.run_benchmark(FILE_PATH) # Print results print("\nBenchmark Results:") print(f"Accuracy: {results['accuracy']:.2%}") print(f"Total samples: {results['total_samples']}") print(f"Processed samples: {results['processed_samples']}") except Exception as e: logger.error(f"Error during benchmarking: {e}") if __name__ == "__main__": main() # instruct_client = VLLMClient( # model_path="meta-llama/Llama-3.2-3B-Instruct", # model_type=ModelType.INSTRUCT # ) # # Single prompt with instruct model # response = instruct_client.send_message( # system="You are a helpful assistant.", # content="What is the capital of France?", # max_tokens=16000, # temperature=0.0 # ) # print("Instruct model response:", response)