Spaces:

MaroueneA
/

feedback-app

Sleeping

App Files Files Community

MaroueneA commited on Feb 1, 2025

Commit

07de731

verified ·

1 Parent(s): e5f3c05

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -142

app.py CHANGED Viewed

@@ -1,142 +1,118 @@
-import gradio as gr
-from datasets import load_dataset, Dataset
-import pandas as pd
-import os
-from huggingface_hub import HfApi
-# Read the Hugging Face token from the environment variable
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# Authenticate with Hugging Face
-api = HfApi(token=HF_TOKEN)
-# Load the dataset from Hugging Face
-try:
-    # Replace with your dataset file
-    dataset = load_dataset("MaroueneA/feedback-dataset",
-                           data_files="dataset.csv")
-    df = dataset["train"].to_pandas()
-    if "CurrentPromptIndex" not in df.columns:
-        df["CurrentPromptIndex"] = 0  # Initialize the column
-except Exception as e:
-    print(f"Error loading dataset: {e}")
-    df = pd.DataFrame(columns=[
-        "Prompt", "LLM1 Response", "LLM2 Response", "Human judges quality", "Human judges correctness",
-        "Human judges relevance", "Human LLM1 Tunisian usage score", "Human LLM2 Tunisian usage score",
-        "Human LLM1 flagged issues", "Human LLM2 flagged issues", "Evaluated", "CurrentPromptIndex"
-    ])
-# Function to save feedback to the dataset
-def save_feedback(prompt_idx, preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian):
-    # Update the dataset with feedback
-    df.at[prompt_idx, "Human judges quality"] = str(preference)
-    df.at[prompt_idx, "Human judges correctness"] = str(factual_accuracy)
-    df.at[prompt_idx, "Human judges relevance"] = str(relevance)
-    df.at[prompt_idx, "Human LLM1 Tunisian usage score"] = int(llm1_tunisian)
-    df.at[prompt_idx, "Human LLM2 Tunisian usage score"] = int(llm2_tunisian)
-    df.at[prompt_idx, "Human LLM1 flagged issues"] = ", ".join(llm1_issues)
-    df.at[prompt_idx, "Human LLM2 flagged issues"] = ", ".join(llm2_issues)
-    df.at[prompt_idx, "Evaluated"] = True
-    # Convert the updated DataFrame back to a Hugging Face Dataset
-    updated_dataset = Dataset.from_pandas(df)
-    # Push the updated dataset back to Hugging Face
-    updated_dataset.push_to_hub("MaroueneA/feedback-dataset", token=HF_TOKEN)
-# Function to get the next unevaluated prompt
-def get_next_prompt():
-    # Get the current prompt index from the dataset
-    current_prompt_idx = df["CurrentPromptIndex"].iloc[0]
-    # Iterate through the DataFrame to find the next unevaluated prompt
-    for idx in range(current_prompt_idx, len(df)):
-        if not df.at[idx, "Evaluated"]:
-            # Update the current prompt index in the dataset
-            df.at[0, "CurrentPromptIndex"] = idx
-            return df.iloc[idx]
-    return None
-# Initialize the first prompt
-current_prompt = get_next_prompt()
-if current_prompt is not None:
-    initial_prompt = current_prompt["Prompt"]
-    initial_llm1 = current_prompt["LLM1 response"]
-    initial_llm2 = current_prompt["LLM2 response"]
-else:
-    initial_prompt = "No prompts available."
-    initial_llm1 = ""
-    initial_llm2 = ""
-# Function to submit feedback and get the next prompt
-def submit_feedback(preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian):
-    # Get the next unevaluated prompt
-    next_prompt = get_next_prompt()
-    if next_prompt is None:
-        return "No more prompts available.", "", "", "Feedback saved successfully!"
-    # Save feedback to the dataset
-    save_feedback(df["CurrentPromptIndex"].iloc[0], preference, factual_accuracy,
-                  relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian)
-    # Increment the prompt index and save it to the dataset
-    df.at[0, "CurrentPromptIndex"] += 1
-    print(f"Updated Prompt Index: {df['CurrentPromptIndex'].iloc[0]}")
-    # Return the next prompt and responses
-    return next_prompt["Prompt"], next_prompt["LLM1 response"], next_prompt["LLM2 response"], "Feedback saved successfully!"
-# Gradio interface
-with gr.Blocks() as demo:
-    with gr.Row():
-        prompt = gr.Textbox(
-            label="Prompt", value=initial_prompt, interactive=False)
-    with gr.Row():
-        llm1_response = gr.Textbox(
-            label="LLM1 Response", value=initial_llm1, interactive=False)
-        llm2_response = gr.Textbox(
-            label="LLM2 Response", value=initial_llm2, interactive=False)
-    with gr.Row():
-        preference = gr.Radio(
-            ["LLM1", "LLM2", "Tie", "Both are bad"], label="Which response do you prefer?")
-        factual_accuracy = gr.Radio(
-            ["LLM1", "LLM2", "Tie", "Both are bad"], label="Which response is more factually accurate?")
-        relevance = gr.Radio(["LLM1", "LLM2", "Tie", "Both are bad"],
-                             label="Which response better addresses the prompt?")
-    with gr.Row():
-        llm1_issues = gr.CheckboxGroup(
-            ["Hate Speech", "Not Arabic", "Inappropriate Content", "Sexual Content",
-                "Untruthful Info", "Violent Content", "Personal Information"],
-            label="Does Response 1 contain any issues?"
-        )
-        llm2_issues = gr.CheckboxGroup(
-            ["Hate Speech", "Not Arabic", "Inappropriate Content", "Sexual Content",
-                "Untruthful Info", "Violent Content", "Personal Information"],
-            label="Does Response 2 contain any issues?"
-        )
-    with gr.Row():
-        llm1_tunisian = gr.Radio(
-            [0, 1, 2], label="Rate LLM1's use of Tunisian Arabic")
-        llm2_tunisian = gr.Radio(
-            [0, 1, 2], label="Rate LLM2's use of Tunisian Arabic")
-    with gr.Row():
-        submit_btn = gr.Button("Submit Feedback and Next Prompt")
-    # Submit feedback and load the next prompt
-    submit_btn.click(
-        submit_feedback,
-        inputs=[preference, factual_accuracy, relevance,
-                llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian],
-        outputs=[prompt, llm1_response,
-                 llm2_response, gr.Textbox(label="Status")]
-    )
-# Launch the app
-demo.launch()

+import gradio as gr
+from datasets import load_dataset, Dataset
+import pandas as pd
+import os
+from huggingface_hub import HfApi
+# Read the Hugging Face token from the environment variable
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# Authenticate with Hugging Face
+api = HfApi(token=HF_TOKEN)
+# Load the dataset from Hugging Face
+try:
+    dataset = load_dataset("MaroueneA/feedback-dataset", data_files="dataset.csv")  # Replace with your dataset file
+    df = dataset["train"].to_pandas()
+    if "CurrentPromptIndex" not in df.columns:
+        df["CurrentPromptIndex"] = 0  # Initialize the column as an integer
+except Exception as e:
+    print(f"Error loading dataset: {e}")
+    df = pd.DataFrame(columns=[
+        "Prompt", "LLM1 Response", "LLM2 Response", "Human judges quality", "Human judges correctness",
+        "Human judges relevance", "Human LLM1 Tunisian usage score", "Human LLM2 Tunisian usage score",
+        "Human LLM1 flagged issues", "Human LLM2 flagged issues", "Evaluated", "CurrentPromptIndex"
+    ])
+# Function to save feedback to the dataset
+def save_feedback(prompt_idx, preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian):
+    # Update the dataset with feedback
+    df.at[prompt_idx, "Human judges quality"] = str(preference)
+    df.at[prompt_idx, "Human judges correctness"] = str(factual_accuracy)
+    df.at[prompt_idx, "Human judges relevance"] = str(relevance)
+    df.at[prompt_idx, "Human LLM1 Tunisian usage score"] = int(llm1_tunisian)
+    df.at[prompt_idx, "Human LLM2 Tunisian usage score"] = int(llm2_tunisian)
+    df.at[prompt_idx, "Human LLM1 flagged issues"] = ", ".join(llm1_issues)
+    df.at[prompt_idx, "Human LLM2 flagged issues"] = ", ".join(llm2_issues)
+    df.at[prompt_idx, "Evaluated"] = True
+    # Convert the updated DataFrame back to a Hugging Face Dataset
+    updated_dataset = Dataset.from_pandas(df)
+    # Push the updated dataset back to Hugging Face
+    updated_dataset.push_to_hub("MaroueneA/feedback-dataset", token=HF_TOKEN)
+# Function to get the next unevaluated prompt
+def get_next_prompt():
+    # Get the current prompt index from the dataset and convert it to an integer
+    current_prompt_idx = int(df["CurrentPromptIndex"].iloc[0])
+    # Iterate through the DataFrame to find the next unevaluated prompt
+    for idx in range(current_prompt_idx, len(df)):
+        if not df.at[idx, "Evaluated"]:
+            df.at[0, "CurrentPromptIndex"] = idx  # Update the current prompt index in the dataset
+            return df.iloc[idx]
+    return None
+# Initialize the first prompt
+current_prompt = get_next_prompt()
+if current_prompt is not None:
+    initial_prompt = current_prompt["Prompt"]
+    initial_llm1 = current_prompt["LLM1 response"]
+    initial_llm2 = current_prompt["LLM2 response"]
+else:
+    initial_prompt = "No prompts available."
+    initial_llm1 = ""
+    initial_llm2 = ""
+# Function to submit feedback and get the next prompt
+def submit_feedback(preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian):
+    # Get the next unevaluated prompt
+    next_prompt = get_next_prompt()
+    if next_prompt is None:
+        return "No more prompts available.", "", "", "Feedback saved successfully!"
+    # Save feedback to the dataset
+    save_feedback(df["CurrentPromptIndex"].iloc[0], preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian)
+    # Increment the prompt index and save it to the dataset
+    df.at[0, "CurrentPromptIndex"] += 1
+    print(f"Updated Prompt Index: {df['CurrentPromptIndex'].iloc[0]}")
+    # Return the next prompt and responses
+    return next_prompt["Prompt"], next_prompt["LLM1 response"], next_prompt["LLM2 response"], "Feedback saved successfully!"
+# Gradio interface
+with gr.Blocks() as demo:
+    with gr.Row():
+        prompt = gr.Textbox(label="Prompt", value=initial_prompt, interactive=False)
+    with gr.Row():
+        llm1_response = gr.Textbox(label="LLM1 Response", value=initial_llm1, interactive=False)
+        llm2_response = gr.Textbox(label="LLM2 Response", value=initial_llm2, interactive=False)
+    with gr.Row():
+        preference = gr.Radio(["LLM1", "LLM2", "Tie", "Both are bad"], label="Which response do you prefer?")
+        factual_accuracy = gr.Radio(["LLM1", "LLM2", "Tie", "Both are bad"], label="Which response is more factually accurate?")
+        relevance = gr.Radio(["LLM1", "LLM2", "Tie", "Both are bad"], label="Which response better addresses the prompt?")
+    with gr.Row():
+        llm1_issues = gr.CheckboxGroup(
+            ["Hate Speech", "Not Arabic", "Inappropriate Content", "Sexual Content", "Untruthful Info", "Violent Content", "Personal Information"],
+            label="Does Response 1 contain any issues?"
+        )
+        llm2_issues = gr.CheckboxGroup(
+            ["Hate Speech", "Not Arabic", "Inappropriate Content", "Sexual Content", "Untruthful Info", "Violent Content", "Personal Information"],
+            label="Does Response 2 contain any issues?"
+        )
+    with gr.Row():
+        llm1_tunisian = gr.Radio([0, 1, 2], label="Rate LLM1's use of Tunisian Arabic")
+        llm2_tunisian = gr.Radio([0, 1, 2], label="Rate LLM2's use of Tunisian Arabic")
+    with gr.Row():
+        submit_btn = gr.Button("Submit Feedback and Next Prompt")
+    # Submit feedback and load the next prompt
+    submit_btn.click(
+        submit_feedback,
+        inputs=[preference, factual_accuracy, relevance, llm1_issues, llm2_issues, llm1_tunisian, llm2_tunisian],
+        outputs=[prompt, llm1_response, llm2_response, gr.Textbox(label="Status")]
+    )
+# Launch the app
+demo.launch()