import gradio as gr
import pandas as pd
from transformers import pipeline

# Load pretrained BERT NER pipeline
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True)

# Extract entities from text
def extract_entities(text):
    try:
        entities = ner_pipeline(text)
        return [(e['word'].replace("##", ""), e['entity_group']) for e in entities]
    except:
        return []

# Main function: take uploaded CSV and process it
def process_csv(file):
    df = pd.read_csv(file.name)
    
    # Check for 'text' column
    if "text" not in df.columns:
        return "❌ CSV must contain a column named 'text'", None

    df["entities"] = df["text"].apply(extract_entities)

    # Flatten to 1 entity per row
    rows = []
    for _, row in df.iterrows():
        for word, label in row["entities"]:
            rows.append({
                "text": row["text"],
                "entity": word,
                "label": label
            })

    flat_df = pd.DataFrame(rows)
    flat_csv_path = "ner_results.csv"
    flat_df.to_csv(flat_csv_path, index=False)

    return flat_df, flat_csv_path

# Gradio UI
def build_ui():
    with gr.Blocks() as demo:
        gr.Markdown("## 🧠 BERT NER Extractor (Upload CSV with 'text' column)")

        with gr.Row():
            file_input = gr.File(label="Upload CSV", file_types=[".csv"])
            submit_btn = gr.Button("Extract Entities")

        entity_output = gr.Dataframe(label="Extracted Entities")
        download_output = gr.File(label="📥 Download CSV")

        submit_btn.click(fn=process_csv, inputs=file_input, outputs=[entity_output, download_output])

    return demo

# Run the app
if __name__ == "__main__":
    build_ui().launch()