Spaces:

JasonTPhillipsJr
/

SpaGAN

Sleeping

App Files Files Community

JasonTPhillipsJr commited on Nov 12, 2024

Commit

7837592

verified ·

1 Parent(s): 52ad8f8

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -5

app.py CHANGED Viewed

@@ -116,14 +116,14 @@ def get_bert_embedding(review_text):
 #Get SpaBERT Embedding for geo-entity
-def get_spaBert_embedding(entity):
     entity_index = entity_index_dict.get(entity.lower(), None)
     if entity_index is None:
         if(dev_mode == True):
             st.write("Got Bert embedding for: ", entity)
         return get_bert_embedding(entity)                            #Fallback in-case SpaBERT could not resolve entity to retrieve embedding. Rare-cases only.
     else:
-        st.write("Pseudo Sentnece:",pseudo_sentences[entity_index])
         if(dev_mode == True):
             st.write("Got SpaBert embedding for: ", entity)
         return spaBERT_embeddings[entity_index]
@@ -134,11 +134,12 @@ def processSpatialEntities(review, nlp):
     doc = nlp(review)
     entity_spans = [(ent.start, ent.end, ent.text, ent.label_) for ent in doc.ents]
     token_embeddings = []
     # Iterate over each entity span and process only geo entities
     for start, end, text, label in entity_spans:
         if label in ['FAC', 'ORG', 'LOC', 'GPE']:  # Filter to geo-entities
-            spaBert_emb = get_spaBert_embedding(text)
             token_embeddings.append(spaBert_emb)
             if(dev_mode == True):
                 st.write("Geo-Entity Found in review: ", text)
@@ -146,7 +147,7 @@ def processSpatialEntities(review, nlp):
     token_embeddings = torch.stack(token_embeddings, dim=0)
     processed_embedding = token_embeddings.mean(dim=0)  # Shape: (768)
     #processed_embedding = processed_embedding.unsqueeze(0)  # Shape: (1, 768)
-    return processed_embedding
 #Initialize discriminator module
@@ -262,7 +263,7 @@ selected_review = example_reviews[selected_key]
 if st.button("Process Review"):
     if selected_review.strip():
         bert_embedding = get_bert_embedding(selected_review)
-        spaBert_embedding = processSpatialEntities(selected_review,nlp)
         combined_embedding = torch.cat((bert_embedding,spaBert_embedding),dim=-1)
         if(dev_mode == True):
@@ -290,6 +291,10 @@ if st.button("Process Review"):
         # Display the highlighted text with HTML support
         st.markdown(highlighted_text, unsafe_allow_html=True)
         #Display the models prediction
         if(prediction == 0):
             st.write("Prediction: Not Spam")

 #Get SpaBERT Embedding for geo-entity
+def get_spaBert_embedding(entity,current_pseudo_sentences):
     entity_index = entity_index_dict.get(entity.lower(), None)
     if entity_index is None:
         if(dev_mode == True):
             st.write("Got Bert embedding for: ", entity)
         return get_bert_embedding(entity)                            #Fallback in-case SpaBERT could not resolve entity to retrieve embedding. Rare-cases only.
     else:
+        current_pseudo_sentences.append(pseudo_sentences[entity_index])
         if(dev_mode == True):
             st.write("Got SpaBert embedding for: ", entity)
         return spaBERT_embeddings[entity_index]
     doc = nlp(review)
     entity_spans = [(ent.start, ent.end, ent.text, ent.label_) for ent in doc.ents]
     token_embeddings = []
+    current_pseudo_sentences = []
     # Iterate over each entity span and process only geo entities
     for start, end, text, label in entity_spans:
         if label in ['FAC', 'ORG', 'LOC', 'GPE']:  # Filter to geo-entities
+            spaBert_emb = get_spaBert_embedding(text,current_pseudo_sentences)
             token_embeddings.append(spaBert_emb)
             if(dev_mode == True):
                 st.write("Geo-Entity Found in review: ", text)
     token_embeddings = torch.stack(token_embeddings, dim=0)
     processed_embedding = token_embeddings.mean(dim=0)  # Shape: (768)
     #processed_embedding = processed_embedding.unsqueeze(0)  # Shape: (1, 768)
+    return processed_embedding,current_pseudo_sentences
 #Initialize discriminator module
 if st.button("Process Review"):
     if selected_review.strip():
         bert_embedding = get_bert_embedding(selected_review)
+        spaBert_embedding, current_pseudo_sentences = processSpatialEntities(selected_review,nlp)
         combined_embedding = torch.cat((bert_embedding,spaBert_embedding),dim=-1)
         if(dev_mode == True):
         # Display the highlighted text with HTML support
         st.markdown(highlighted_text, unsafe_allow_html=True)
+        #Display pseudo sentences found
+        for sentence in current_pseudo_sentences:
+            st.write("Pseudo-Sentence: ", sentence)
         #Display the models prediction
         if(prediction == 0):
             st.write("Prediction: Not Spam")