import gradio as gr import pandas as pd from transformers import pipeline # Load pretrained BERT NER pipeline ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True) # Extract entities from text def extract_entities(text): try: entities = ner_pipeline(text) return [(e['word'].replace("##", ""), e['entity_group']) for e in entities] except: return [] # Main function: take uploaded CSV and process it def process_csv(file): df = pd.read_csv(file.name) # Check for 'text' column if "text" not in df.columns: return "❌ CSV must contain a column named 'text'", None df["entities"] = df["text"].apply(extract_entities) # Flatten to 1 entity per row rows = [] for _, row in df.iterrows(): for word, label in row["entities"]: rows.append({ "text": row["text"], "entity": word, "label": label }) flat_df = pd.DataFrame(rows) flat_csv_path = "ner_results.csv" flat_df.to_csv(flat_csv_path, index=False) return flat_df, flat_csv_path # Gradio UI def build_ui(): with gr.Blocks() as demo: gr.Markdown("## 🧠 BERT NER Extractor (Upload CSV with 'text' column)") with gr.Row(): file_input = gr.File(label="Upload CSV", file_types=[".csv"]) submit_btn = gr.Button("Extract Entities") entity_output = gr.Dataframe(label="Extracted Entities") download_output = gr.File(label="📥 Download CSV") submit_btn.click(fn=process_csv, inputs=file_input, outputs=[entity_output, download_output]) return demo # Run the app if __name__ == "__main__": build_ui().launch()