Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from transformers import pipeline | |
| # Load pretrained BERT NER pipeline | |
| ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True) | |
| # Extract entities from text | |
| def extract_entities(text): | |
| try: | |
| entities = ner_pipeline(text) | |
| return [(e['word'].replace("##", ""), e['entity_group']) for e in entities] | |
| except: | |
| return [] | |
| # Main function: take uploaded CSV and process it | |
| def process_csv(file): | |
| df = pd.read_csv(file.name) | |
| # Check for 'text' column | |
| if "text" not in df.columns: | |
| return "❌ CSV must contain a column named 'text'", None | |
| df["entities"] = df["text"].apply(extract_entities) | |
| # Flatten to 1 entity per row | |
| rows = [] | |
| for _, row in df.iterrows(): | |
| for word, label in row["entities"]: | |
| rows.append({ | |
| "text": row["text"], | |
| "entity": word, | |
| "label": label | |
| }) | |
| flat_df = pd.DataFrame(rows) | |
| flat_csv_path = "ner_results.csv" | |
| flat_df.to_csv(flat_csv_path, index=False) | |
| return flat_df, flat_csv_path | |
| # Gradio UI | |
| def build_ui(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🧠 BERT NER Extractor (Upload CSV with 'text' column)") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV", file_types=[".csv"]) | |
| submit_btn = gr.Button("Extract Entities") | |
| entity_output = gr.Dataframe(label="Extracted Entities") | |
| download_output = gr.File(label="📥 Download CSV") | |
| submit_btn.click(fn=process_csv, inputs=file_input, outputs=[entity_output, download_output]) | |
| return demo | |
| # Run the app | |
| if __name__ == "__main__": | |
| build_ui().launch() | |