ma4389's picture
Upload 2 files
52dc64a verified
import gradio as gr
import pandas as pd
from transformers import pipeline
# Load pretrained BERT NER pipeline
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True)
# Extract entities from text
def extract_entities(text):
try:
entities = ner_pipeline(text)
return [(e['word'].replace("##", ""), e['entity_group']) for e in entities]
except:
return []
# Main function: take uploaded CSV and process it
def process_csv(file):
df = pd.read_csv(file.name)
# Check for 'text' column
if "text" not in df.columns:
return "❌ CSV must contain a column named 'text'", None
df["entities"] = df["text"].apply(extract_entities)
# Flatten to 1 entity per row
rows = []
for _, row in df.iterrows():
for word, label in row["entities"]:
rows.append({
"text": row["text"],
"entity": word,
"label": label
})
flat_df = pd.DataFrame(rows)
flat_csv_path = "ner_results.csv"
flat_df.to_csv(flat_csv_path, index=False)
return flat_df, flat_csv_path
# Gradio UI
def build_ui():
with gr.Blocks() as demo:
gr.Markdown("## 🧠 BERT NER Extractor (Upload CSV with 'text' column)")
with gr.Row():
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
submit_btn = gr.Button("Extract Entities")
entity_output = gr.Dataframe(label="Extracted Entities")
download_output = gr.File(label="📥 Download CSV")
submit_btn.click(fn=process_csv, inputs=file_input, outputs=[entity_output, download_output])
return demo
# Run the app
if __name__ == "__main__":
build_ui().launch()