import gradio as gr
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials

SPREADSHEET_NAME = "dataset"    # The name of your Google Sheet
WORKSHEET_NAME = "sheet1"       # The tab/worksheet name
df = None
current_index = None
ws = None

def init_gsheets():
    """
    Authenticate with Google Sheets and load the entire worksheet into a DataFrame.
    We'll identify the next unreviewed row and store that in global variables.
    """
    global df, current_index, ws
    
    # Scopes for Google Sheets
    scope = [
        "https://spreadsheets.google.com/feeds",
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive"
    ]

    creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
    gc = gspread.authorize(creds)
    sh = gc.open(SPREADSHEET_NAME)
    ws = sh.worksheet(WORKSHEET_NAME)
    
    # Read all values from the sheet
    data = ws.get_all_values()
    df = pd.DataFrame(data[1:], columns=data[0])  # row 1 = headers

    # Identify first unreviewed row (example: "Human judges quality" is empty)
    unreviewed_rows = df[df["Human judges quality"] == ""]
    if len(unreviewed_rows) > 0:
        current_index = unreviewed_rows.index[0]
    else:
        current_index = None

def get_prompt_data():
    """
    Returns the current prompt and responses if any are left,
    or a "done" message if everything is reviewed.
    """
    global df, current_index
    if current_index is None:
        return "All rows have been reviewed.", "", "", True  # all_done = True
    
    row = df.loc[current_index]
    return row["Prompt"], row["LLM1 response"], row["LLM2 response"], False

def save_and_load_next(
    preference,
    factual_accuracy,
    relevance,
    llm1_issues,
    llm2_issues,
    llm1_tunisian_score,
    llm2_tunisian_score
):
    """
    1) Saves feedback for the current row to Google Sheets
    2) Moves to the next unreviewed row
    3) Returns the next prompt + updated status message in one step
    """
    global df, current_index, ws

    # If we're out of rows, just return "all done"
    if current_index is None:
        return (
            "All rows have been reviewed.",  # prompt
            "",                              # LLM1 resp
            "",                              # LLM2 resp
            "No more rows to review!"        # status message
        )
    
    # 1. Update the in-memory DataFrame
    df.at[current_index, "Human judges quality"] = preference
    df.at[current_index, "Human judges correctness"] = factual_accuracy
    df.at[current_index, "Human judges relevance"] = relevance
    df.at[current_index, "Human LLM1 flagged issues"] = ", ".join(llm1_issues) if llm1_issues else ""
    df.at[current_index, "Human LLM2 flagged issues"] = ", ".join(llm2_issues) if llm2_issues else ""
    df.at[current_index, "Human LLM1 Tunisian usage score"] = llm1_tunisian_score
    df.at[current_index, "Human LLM2 Tunisian usage score"] = llm2_tunisian_score
    
    # 2. Write updates back to Google Sheets
    sheet_row = current_index + 2  # row 1 is headers
    headers = list(df.columns)
    
    def update_sheet_cell(column_name, value):
        col_index = headers.index(column_name) + 1  # 1-based indexing
        ws.update_cell(sheet_row, col_index, value)
    
    update_sheet_cell("Human judges quality", preference)
    update_sheet_cell("Human judges correctness", factual_accuracy)
    update_sheet_cell("Human judges relevance", relevance)
    update_sheet_cell("Human LLM1 flagged issues", df.at[current_index, "Human LLM1 flagged issues"])
    update_sheet_cell("Human LLM2 flagged issues", df.at[current_index, "Human LLM2 flagged issues"])
    update_sheet_cell("Human LLM1 Tunisian usage score", llm1_tunisian_score)
    update_sheet_cell("Human LLM2 Tunisian usage score", llm2_tunisian_score)
    
    # 3. Move to the next unreviewed row
    unreviewed_rows = df[df["Human judges quality"] == ""]
    if len(unreviewed_rows) == 0:
        current_index = None
        return (
            "All rows have been reviewed.",  # prompt
            "",                              # LLM1 resp
            "",                              # LLM2 resp
            "All rows have been reviewed. Thank you!"  # status message
        )
    else:
        current_index = unreviewed_rows.index[0]
        prompt, llm1resp, llm2resp, _ = get_prompt_data()
        return (
            prompt,
            llm1resp,
            llm2resp,
            "Feedback saved! Moving to the next prompt..."
        )

def on_load():
    """
    Called on interface load. Returns the current prompt, or 'done' if there's none.
    """
    prompt, llm1resp, llm2resp, all_done = get_prompt_data()
    if all_done:
        return prompt, llm1resp, llm2resp, "No next prompt. All done."
    else:
        return prompt, llm1resp, llm2resp, ""

# ---- Initialize Google Sheets data
init_gsheets()

# ---- Build Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# LLM Responses Evaluation (Google Sheets)")

    prompt_text = gr.Textbox(label="Prompt", interactive=False)
    llm1_text = gr.Textbox(label="LLM1 Response", interactive=False)
    llm2_text = gr.Textbox(label="LLM2 Response", interactive=False)
    status_msg = gr.Markdown()

    preference = gr.Radio(
        ["LLM1", "LLM2", "Tie", "Both are bad"],
        label="Which response do you prefer?"
    )
    factual_accuracy = gr.Radio(
        ["LLM1", "LLM2", "Tie", "Both are bad"],
        label="Which response is more factually accurate?"
    )
    relevance = gr.Radio(
        ["LLM1", "LLM2", "Tie", "Both are bad"],
        label="Which response better addresses the prompt?"
    )

    llm1_issues = gr.CheckboxGroup(
        [
            "Hate Speech",
            "Not Arabic",
            "Inappropriate Content",
            "Sexual Content",
            "Untruthful Info",
            "Violent Content",
            "Personal Information"
        ],
        label="Does Response 1 contain any issues?"
    )
    llm2_issues = gr.CheckboxGroup(
        [
            "Hate Speech",
            "Not Arabic",
            "Inappropriate Content",
            "Sexual Content",
            "Untruthful Info",
            "Violent Content",
            "Personal Information"
        ],
        label="Does Response 2 contain any issues?"
    )

    llm1_tunisian_score = gr.Radio(
        [0, 1, 2],
        label="Rate LLM1's use of Tunisian Arabic",
        value=0
    )
    llm2_tunisian_score = gr.Radio(
        [0, 1, 2],
        label="Rate LLM2's use of Tunisian Arabic",
        value=0
    )

    submit_btn = gr.Button("Submit Feedback")

    # Single callback: save feedback and immediately load next prompt
    submit_btn.click(
        fn=save_and_load_next,
        inputs=[
            preference,
            factual_accuracy,
            relevance,
            llm1_issues,
            llm2_issues,
            llm1_tunisian_score,
            llm2_tunisian_score
        ],
        outputs=[prompt_text, llm1_text, llm2_text, status_msg]
    )

    # On initial load: display the first unreviewed prompt
    demo.load(
        fn=on_load,
        inputs=[],
        outputs=[prompt_text, llm1_text, llm2_text, status_msg]
    )

demo.launch()