| from benchmarking import BenchmarkEvaluator | |
| def main(): | |
| # Configuration | |
| model_name = "aaditya/Llama3-OpenBioLLM-8B" | |
| FILE_PATH = "_Benchmarking_DB.json" | |
| # Initialize evaluator | |
| evaluator = BenchmarkEvaluator(model_name, model_type) | |
| try: | |
| # Run benchmark | |
| results = evaluator.run_benchmark(FILE_PATH) | |
| # Print results | |
| print("\nBenchmark Results:") | |
| print(f"Accuracy: {results['accuracy']:.2%}") | |
| print(f"Total samples: {results['total_samples']}") | |
| print(f"Processed samples: {results['processed_samples']}") | |
| except Exception as e: | |
| logger.error(f"Error during benchmarking: {e}") | |
| if __name__ == "__main__": | |
| main() | |
| # instruct_client = VLLMClient( | |
| # model_path="meta-llama/Llama-3.2-3B-Instruct", | |
| # model_type=ModelType.INSTRUCT | |
| # ) | |
| # # Single prompt with instruct model | |
| # response = instruct_client.send_message( | |
| # system="You are a helpful assistant.", | |
| # content="What is the capital of France?", | |
| # max_tokens=16000, | |
| # temperature=0.0 | |
| # ) | |
| # print("Instruct model response:", response) | |