Spaces:

ma4389
/

NER_BERT_Cyber_Data_

Sleeping

NER_BERT_Cyber_Data_ / app.py

Upload 2 files

52dc64a verified 7 months ago

1.8 kB

	import gradio as gr
	import pandas as pd
	from transformers import pipeline

	# Load pretrained BERT NER pipeline
	ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True)

	# Extract entities from text
	def extract_entities(text):
	try:
	entities = ner_pipeline(text)
	return [(e['word'].replace("##", ""), e['entity_group']) for e in entities]
	except:
	return []

	# Main function: take uploaded CSV and process it
	def process_csv(file):
	df = pd.read_csv(file.name)

	# Check for 'text' column
	if "text" not in df.columns:
	return "❌ CSV must contain a column named 'text'", None

	df["entities"] = df["text"].apply(extract_entities)

	# Flatten to 1 entity per row
	rows = []
	for _, row in df.iterrows():
	for word, label in row["entities"]:
	rows.append({
	"text": row["text"],
	"entity": word,
	"label": label
	})

	flat_df = pd.DataFrame(rows)
	flat_csv_path = "ner_results.csv"
	flat_df.to_csv(flat_csv_path, index=False)

	return flat_df, flat_csv_path

	# Gradio UI
	def build_ui():
	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 BERT NER Extractor (Upload CSV with 'text' column)")

	with gr.Row():
	file_input = gr.File(label="Upload CSV", file_types=[".csv"])
	submit_btn = gr.Button("Extract Entities")

	entity_output = gr.Dataframe(label="Extracted Entities")
	download_output = gr.File(label="📥 Download CSV")

	submit_btn.click(fn=process_csv, inputs=file_input, outputs=[entity_output, download_output])

	return demo

	# Run the app
	if __name__ == "__main__":
	build_ui().launch()