Spaces:
Build error
Build error
| import evaluate | |
| import sys | |
| from pathlib import Path | |
| from evaluate.utils import infer_gradio_input_types, json_to_string_type, parse_readme, parse_gradio_data, parse_test_cases | |
| def launch_gradio_widget(metric): | |
| """Launches `metric` widget with Gradio.""" | |
| try: | |
| import gradio as gr | |
| except ImportError as error: | |
| logger.error("To create a metric widget with Gradio make sure gradio is installed.") | |
| raise error | |
| local_path = Path(sys.path[0]) | |
| # if there are several input types, use first as default. | |
| if isinstance(metric.features, list): | |
| (feature_names, feature_types) = zip(*metric.features[0].items()) | |
| else: | |
| (feature_names, feature_types) = zip(*metric.features.items()) | |
| gradio_input_types = infer_gradio_input_types(feature_types) | |
| def compute(data): | |
| return metric.compute(**parse_gradio_data(data, gradio_input_types)) | |
| header_html = '''<div style="max-width:800px; margin:auto; float:center; margin-top:0; margin-bottom:0; padding:0;"> | |
| <img src="https://huggingface.co/spaces/xu1998hz/sescore/resolve/main/img/logo_sescore.png" style="margin:0; padding:0; margin-top:-10px; margin-bottom:-50px;"> | |
| </div> | |
| <h2 style='margin-top: 5pt; padding-top:10pt;'>About <i>SEScore</i></h2> | |
| <p><b>SEScore</b> is a reference-based text-generation evaluation metric that requires no pre-human-annotated error data, | |
| described in our paper <a href="https://arxiv.org/abs/2210.05035"><b>"Not All Errors are Equal: Learning Text Generation Metrics using | |
| Stratified Error Synthesis"</b></a> from EMNLP 2022.</p> | |
| <p>Its effectiveness over prior methods like BLEU, BERTScore, BARTScore, PRISM, COMET and BLEURT has been demonstrated on a diverse set of language generation tasks, including | |
| translation, captioning, and web text generation. <a href="https://twitter.com/LChoshen/status/1580136005654700033">Readers have even described SEScore as "one unsupervised evaluation to rule them all"</a> | |
| and we are very excited to share it with you!</p> | |
| <h2 style='margin-top: 10pt; padding-top:0;'>Try it yourself!</h2> | |
| <p>Provide sample (gold) reference text and (model output) predicted text below and see how SEScore rates them! It is most performant | |
| in a relative ranking setting, so in general <b>it will rank better predictions higher than worse ones.</b> Providing useful | |
| absolute numbers based on SEScore is an ongoing direction of investigation.</p> | |
| '''.replace('\n',' ') | |
| tail_markdown = parse_readme(local_path / "description.md") | |
| iface = gr.Interface( | |
| fn=compute, | |
| inputs=gr.inputs.Dataframe( | |
| headers=feature_names, | |
| col_count=len(feature_names), | |
| row_count=2, | |
| datatype=json_to_string_type(gradio_input_types), | |
| ), | |
| outputs=gr.outputs.Textbox(label=metric.name), | |
| description=header_html, | |
| #title=f"SEScore Metric Usage Example", | |
| article=tail_markdown, | |
| # TODO: load test cases and use them to populate examples | |
| # examples=[parse_test_cases(test_cases, feature_names, gradio_input_types)] | |
| ) | |
| print(dir(iface)) | |
| iface.launch() | |
| module = evaluate.load("xu1998hz/sescore") | |
| launch_gradio_widget(module) | |