Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import spacy | |
| from spacy.matcher import Matcher | |
| import subprocess | |
| # Download the language model | |
| subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"]) | |
| # Load the glinter-spacy model | |
| nlp = spacy.load("en_core_web_lg") | |
| # Initialize the PhraseMatcher with the shared vocab | |
| matcher = Matcher(nlp.vocab) | |
| # Define the patterns for agg_type | |
| patterns = { | |
| "COUNT": [ | |
| [{"LOWER": "how"}, {"LOWER": "many"}], | |
| [{"LOWER": "how"}, {"LOWER": "frequent"}], | |
| [{"LOWER": "how"}, {"LOWER": "often"}], | |
| [{"LOWER": "count"}], | |
| [{"LOWER": "total"}, {"LOWER": "number"}] | |
| ], | |
| "SUM": [ | |
| [{"LOWER": "how"}, {"LOWER": "much"}], | |
| [{"LOWER": "total"}, {"LOWER": "amount"}], | |
| [{"LOWER": "sum"}, {"LOWER": "of"}], | |
| [{"LOWER": "sum"}] | |
| ], | |
| "AVG": [ | |
| [{"LOWER": "usually"}, {"LOWER": "paid"}], | |
| [{"LOWER": "usually"}, {"LOWER": "spend"}], | |
| [{"LOWER": "average"}], | |
| [{"LOWER": "avg"}], | |
| [{"LOWER": "per"}, {"LOWER": "month"}], | |
| [{"LOWER": "usual"}, {"LOWER": "amount"}] | |
| ], | |
| "MIN": [ | |
| [{"LOWER": "minimum"}], | |
| [{"LOWER": "min"}], | |
| [{"LOWER": "lowest"}], | |
| [{"LOWER": "the"}, {"LOWER": "least"}] | |
| ], | |
| "MAX": [ | |
| [{"LOWER": "maximum"}], | |
| [{"LOWER": "max"}], | |
| [{"LOWER": "highest"}], | |
| [{"LOWER": "the"}, {"LOWER": "most"}] | |
| ] | |
| } | |
| # Add the patterns to the matcher with the label "agg_type" | |
| for label, pattern in patterns.items(): | |
| matcher.add(label, pattern) | |
| # Streamlit app | |
| st.title("Entity Extraction") | |
| options = [ | |
| "How much do I spend on average on groceries?", | |
| "Which is the most I have spent at Nike?", | |
| "How much do I usually spend at McDonalds per month?", | |
| "How much do I usually spend at McDonalds?", | |
| "How much do I spend at McDonalds per month?", | |
| "How many transactions do I have with Parkichen in December 2024?", | |
| "How often do I shop at Peek & Cloppenburg?", | |
| "How frequent do I dine at Parkitchen?" | |
| ] | |
| selection = st.pills("Sample questions about transactions", options, selection_mode="single") | |
| # Text input from the user | |
| user_input = st.text_area("Enter query:", value=selection if selection else "") | |
| if user_input: | |
| # Process the text | |
| doc = nlp(user_input) | |
| # Apply the matcher to the doc | |
| matches = matcher(doc) | |
| # Extract and display entities | |
| st.subheader("Extracted Entities and matched patterns") | |
| for ent in doc.ents: | |
| st.markdown(f"<span class='entity'>{ent.text} <span class='label'>{ent.label_}</span></span>", unsafe_allow_html=True) | |
| # Extract and display matched patterns | |
| for match_id, start, end in matches: | |
| span = doc[start:end] | |
| label = nlp.vocab.strings[match_id] # Get the label for the matched pattern | |
| st.markdown(f"<span class='entity'>{span.text} <span class='label'>{label}</span></span>", unsafe_allow_html=True) | |
| # Add custom CSS for styling | |
| st.markdown(""" | |
| <style> | |
| .entity { | |
| display: inline-block; | |
| padding: 0.25em 0.4em; | |
| margin: 0 0.25em 0.25em 0; | |
| border-radius: 0.25rem; | |
| background: #e2e2e2; | |
| border: 1px solid #cccccc; | |
| } | |
| .label { | |
| font-size: 0.75em; | |
| font-weight: bold; | |
| color: #333333; | |
| margin-left: 0.5em; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) |