Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import shutil | |
| import gradio as gr | |
| from datasets import load_dataset | |
| from huggingface_hub import upload_file | |
| from io import StringIO | |
| import pandas as pd | |
| import datetime | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| DIALOGUES_DATASET = "ArmelRandy/MT_dialogues" | |
| def load_data(): | |
| dataset = load_dataset("ArmelR/oasst1_guanaco_english", use_auth_token=HF_TOKEN) | |
| return dataset | |
| samples = load_data() | |
| splits = list(samples.keys()) | |
| languages = ["Wolof"] | |
| print(f"current directory {os.getcwd()}") | |
| print(f"total path {os.path.dirname(os.path.realpath(__file__))}") | |
| custom_css = """ | |
| #banner-image { | |
| display: block; | |
| margin-left: auto; | |
| margin-right: auto; | |
| } | |
| #chat-message { | |
| font-size: 14px; | |
| min-height: 300px; | |
| } | |
| """ | |
| def caller_split(s): | |
| return 0, samples[s][0]["prompt"], samples[s][0]["completion"] | |
| def identity(index, split): | |
| ds = samples[split][index] | |
| return ds["prompt"], ds["completion"] | |
| def save(index, language, split, prompt, completion): | |
| buffer = StringIO() | |
| now = datetime.datetime.now() | |
| timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f") | |
| file_name = f"prompts_{timestamp}.jsonl" | |
| if len(prompt) != 0 and len(completion) != 0 : | |
| print("Saving ...") | |
| data = {"prompt": prompt, "completion": completion, "language": language, "index": index} | |
| pd.DataFrame([data]).to_json(buffer, orient="records", lines=True) | |
| # Push to Hub | |
| upload_file( | |
| path_in_repo=f"{now.date()}/{now.hour}/{file_name}", | |
| path_or_fileobj=buffer.getvalue().encode(), | |
| repo_id=DIALOGUES_DATASET, | |
| token=HF_TOKEN, | |
| repo_type="dataset", | |
| ) | |
| # Clean and rerun | |
| buffer.close() | |
| next_index = min(1+index, len(samples[split])-1) | |
| return next_index, samples[split][next_index]["prompt"], samples[split][next_index]["completion"], "", "" | |
| else : | |
| return index, samples[split][index]["prompt"], samples[split][index]["completion"], "", "" | |
| with gr.Blocks(analytics_enabled=False, css=custom_css) as demo: | |
| gr.HTML("""<h1 align="center">MT💫</h1>""") | |
| # gr.Markdown("""""") | |
| with gr.Blocks(): | |
| with gr.Row() : | |
| split = gr.Dropdown(choices=splits, label="Dataset split", value=splits[0]) | |
| with gr.Row() : | |
| index_example = gr.Slider(minimum=0, maximum=10000, step=1, value=0, interactive=True, info=f"Index of the chosen instruction-output pair.") | |
| with gr.Row() : | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="prompt") | |
| with gr.Column(): | |
| completion = gr.Code(label="Completion") | |
| with gr.Blocks(): | |
| with gr.Row() : | |
| language = gr.Dropdown(choices=languages, label="Translation language", value=languages[0]) | |
| with gr.Row() : | |
| with gr.Column() : | |
| translated_prompt = gr.Textbox(label="Translated prompt") | |
| with gr.Column() : | |
| translated_completion = gr.Textbox(label="Translated completion") | |
| with gr.Row() : | |
| button = gr.Button(value="Submit") | |
| split.change(caller_split, inputs=[split], outputs=[index_example, prompt, completion]) | |
| index_example.release(identity, inputs=[index_example, split], outputs=[prompt, completion]) | |
| button.click(save, inputs=[index_example, language, split, translated_prompt, translated_completion], outputs=[index_example, prompt, completion, translated_prompt, translated_completion]) | |
| demo.launch(debug=True) |