Spaces:
Runtime error
Runtime error
| import html | |
| import json | |
| import logging | |
| import gradio as gr | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from pages.summarization_playground import custom_css, generate_answer | |
| from utils.metric import metric_rouge_score | |
| from utils.model import Model | |
| load_dotenv() | |
| def display_results(response_list): | |
| overall_score = np.mean([r["metric_score"]["rouge_score"] for r in response_list]) | |
| html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>" | |
| for i, item in enumerate(response_list, 1): | |
| dialogue = item["dialogue"] | |
| summary = item["summary"] | |
| response = item["response"] | |
| rouge_score = item["metric_score"]["rouge_score"] | |
| dialogue = html.escape(item["dialogue"]).replace("\n", "<br>") | |
| summary = html.escape(item["summary"]).replace("\n", "<br>") | |
| response = html.escape(item["response"]).replace("\n", "<br>") | |
| html_output += f""" | |
| <details> | |
| <summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary> | |
| <div style="display: flex; justify-content: space-between;"> | |
| <div style="width: 30%;"> | |
| <h3>Dialogue</h3> | |
| <pre style="white-space: pre-wrap; word-wrap: break-word;">{dialogue}</pre> | |
| </div> | |
| <div style="width: 30%;"> | |
| <h3>Summary</h3> | |
| <pre style="white-space: pre-wrap; word-wrap: break-word;">{summary}</pre> | |
| </div> | |
| <div style="width: 30%;"> | |
| <h3>Response</h3> | |
| <pre style="white-space: pre-wrap; word-wrap: break-word;">{response}</pre> | |
| </div> | |
| </div> | |
| </details> | |
| """ | |
| return html_output | |
| def process(model_selection, prompt, num=10): | |
| response_list = [] | |
| with open("test_samples/test_data.json", "r") as file: | |
| json_data = file.read() | |
| dataset = json.loads(json_data) | |
| for i, data in enumerate(dataset): | |
| logging.info(f"Start testing datapoint {i+1}") | |
| dialogue = data["dialogue"] | |
| format = data["format"] | |
| summary = data["summary"] | |
| response = generate_answer( | |
| dialogue, model_selection, prompt + f" Output following {format} format." | |
| ) | |
| rouge_score = metric_rouge_score(response, summary) | |
| response_list.append( | |
| { | |
| "dialogue": dialogue, | |
| "summary": summary, | |
| "response": response, | |
| "metric_score": {"rouge_score": rouge_score}, | |
| } | |
| ) | |
| logging.info(f"Complete testing datapoint {i+1}") | |
| return display_results(response_list) | |
| def create_batch_evaluation_interface(): | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(spacing_size="sm", text_size="sm"), css=custom_css | |
| ) as demo: | |
| gr.Markdown( | |
| "## Here are evaluation setups. It will run though datapoints in test_data.josn to generate and evaluate. Show results once finished." | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=Model.__model_list__, | |
| label="Choose a model", | |
| value=Model.__model_list__[0], | |
| ) | |
| Template_text = gr.Textbox( | |
| value="""Summarize the following dialogue""", | |
| label="Input Prompting Template", | |
| lines=8, | |
| placeholder="Input your prompts", | |
| ) | |
| submit_button = gr.Button("✨ Submit ✨") | |
| output = gr.HTML(label="Results") | |
| submit_button.click( | |
| process, inputs=[model_dropdown, Template_text], outputs=output | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_batch_evaluation_interface() | |
| demo.launch() | |