Spaces:

tudorgeorgescu
/

transaction_ner

Sleeping

App Files Files Community

transaction_ner / app.py

tudorgeorgescu

fix

b86ae30 verified about 1 year ago

raw

history blame contribute delete

3.4 kB

	import streamlit as st
	import spacy
	from spacy.matcher import Matcher
	import subprocess

	# Download the language model
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"])

	# Load the glinter-spacy model
	nlp = spacy.load("en_core_web_lg")

	# Initialize the PhraseMatcher with the shared vocab
	matcher = Matcher(nlp.vocab)

	# Define the patterns for agg_type
	patterns = {
	"COUNT": [
	[{"LOWER": "how"}, {"LOWER": "many"}],
	[{"LOWER": "how"}, {"LOWER": "frequent"}],
	[{"LOWER": "how"}, {"LOWER": "often"}],
	[{"LOWER": "count"}],
	[{"LOWER": "total"}, {"LOWER": "number"}]
	],
	"SUM": [
	[{"LOWER": "how"}, {"LOWER": "much"}],
	[{"LOWER": "total"}, {"LOWER": "amount"}],
	[{"LOWER": "sum"}, {"LOWER": "of"}],
	[{"LOWER": "sum"}]
	],
	"AVG": [
	[{"LOWER": "usually"}, {"LOWER": "paid"}],
	[{"LOWER": "usually"}, {"LOWER": "spend"}],
	[{"LOWER": "average"}],
	[{"LOWER": "avg"}],
	[{"LOWER": "per"}, {"LOWER": "month"}],
	[{"LOWER": "usual"}, {"LOWER": "amount"}]
	],
	"MIN": [
	[{"LOWER": "minimum"}],
	[{"LOWER": "min"}],
	[{"LOWER": "lowest"}],
	[{"LOWER": "the"}, {"LOWER": "least"}]
	],
	"MAX": [
	[{"LOWER": "maximum"}],
	[{"LOWER": "max"}],
	[{"LOWER": "highest"}],
	[{"LOWER": "the"}, {"LOWER": "most"}]
	]
	}

	# Add the patterns to the matcher with the label "agg_type"
	for label, pattern in patterns.items():
	matcher.add(label, pattern)

	# Streamlit app
	st.title("Entity Extraction")

	options = [
	"How much do I spend on average on groceries?",
	"Which is the most I have spent at Nike?",
	"How much do I usually spend at McDonalds per month?",
	"How much do I usually spend at McDonalds?",
	"How much do I spend at McDonalds per month?",
	"How many transactions do I have with Parkichen in December 2024?",
	"How often do I shop at Peek & Cloppenburg?",
	"How frequent do I dine at Parkitchen?"
	]
	selection = st.pills("Sample questions about transactions", options, selection_mode="single")

	# Text input from the user
	user_input = st.text_area("Enter query:", value=selection if selection else "")

	if user_input:
	# Process the text
	doc = nlp(user_input)

	# Apply the matcher to the doc
	matches = matcher(doc)

	# Extract and display entities
	st.subheader("Extracted Entities and matched patterns")
	for ent in doc.ents:
	st.markdown(f"<span class='entity'>{ent.text} <span class='label'>{ent.label_}</span></span>", unsafe_allow_html=True)

	# Extract and display matched patterns
	for match_id, start, end in matches:
	span = doc[start:end]
	label = nlp.vocab.strings[match_id] # Get the label for the matched pattern
	st.markdown(f"<span class='entity'>{span.text} <span class='label'>{label}</span></span>", unsafe_allow_html=True)

	# Add custom CSS for styling
	st.markdown("""
	<style>
	.entity {
	display: inline-block;
	padding: 0.25em 0.4em;
	margin: 0 0.25em 0.25em 0;
	border-radius: 0.25rem;
	background: #e2e2e2;
	border: 1px solid #cccccc;
	}
	.label {
	font-size: 0.75em;
	font-weight: bold;
	color: #333333;
	margin-left: 0.5em;
	}
	</style>
	""", unsafe_allow_html=True)