Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import openai | |
| import pandas as pd | |
| import numpy as np | |
| # Example API key setup (for testing) | |
| openai.api_key = "YOUR_API_KEY" | |
| def evaluate_prompt(model_name, prompt_text): | |
| """ | |
| Runs a single evaluation against OpenAI or Anthropic API | |
| Returns text output and token count | |
| """ | |
| if model_name.lower() == "gpt-4": | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4", | |
| messages=[{"role":"user","content":prompt_text}], | |
| temperature=0.5 | |
| ) | |
| output_text = response['choices'][0]['message']['content'] | |
| tokens = response['usage']['total_tokens'] | |
| return f"Output:\n{output_text}\n\nTokens used: {tokens}" | |
| else: | |
| return "Model not supported yet" | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## MotionEval: AI Model Evaluation MVP") | |
| model_name = gr.Textbox(label="Model Name (e.g. GPT-4)") | |
| prompt_text = gr.Textbox(label="Prompt", lines=5) | |
| run_button = gr.Button("Run Evaluation") | |
| output = gr.Textbox(label="Result", lines=15) | |
| run_button.click(fn=evaluate_prompt, inputs=[model_name, prompt_text], outputs=output) | |
| demo.launch() | |
| import pandas as pd | |
| def batch_evaluate(file, model_name): | |
| df = pd.read_csv(file.name) # CSV must have a column called 'prompt' | |
| results = [] | |
| for idx, row in df.iterrows(): | |
| prompt_text = row['prompt'] | |
| response = openai.ChatCompletion.create( | |
| model=model_name, | |
| messages=[{"role":"user","content":prompt_text}], | |
| temperature=0.5 | |
| ) | |
| results.append({ | |
| 'prompt': prompt_text, | |
| 'model': model_name, | |
| 'response': response['choices'][0]['message']['content'] | |
| }) | |
| return pd.DataFrame(results) | |
| with gr.Blocks() as demo: | |
| csv_input = gr.File(label="Upload CSV with 'prompt' column") | |
| model_input = gr.Textbox(label="Model Name") | |
| run_button = gr.Button("Run Batch Eval") | |
| output_table = gr.Dataframe(headers=["prompt", "model", "response"]) | |
| run_button.click(batch_evaluate, inputs=[csv_input, model_input], outputs=output_table) | |
| demo.launch() | |
| import io | |
| def results_to_csv(df): | |
| buffer = io.StringIO() | |
| df.to_csv(buffer, index=False) | |
| buffer.seek(0) | |
| return buffer | |
| download_btn = gr.Button("Download CSV") | |
| download_file = gr.File() | |
| download_btn.click(results_to_csv, inputs=output_table, outputs=download_file) | |
| def metrics(df): | |
| df['length'] = df['response'].apply(len) | |
| summary = df.groupby('model').agg(avg_length=('length','mean')).reset_index() | |
| return summary |