Spaces:
Sleeping
Sleeping
Nikhil Singh
commited on
Commit
·
bd2022e
1
Parent(s):
86fc40d
more fixes
Browse files- app.py +30 -4
- requirements.txt +1 -1
- spaces.yml +2 -2
app.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
from mailparser import parse_from_string
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
from gliner import GLiNER
|
|
|
|
|
|
|
| 7 |
import spacy
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
nlp = spacy.load("en_core_web_sm")
|
| 10 |
_MODEL = {}
|
|
@@ -48,6 +52,26 @@ def get_model(model_name: str = None):
|
|
| 48 |
|
| 49 |
return _MODEL[model_name]
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def present(email_content, labels):
|
| 52 |
email = accept_mail(email_content)
|
| 53 |
cleaned_text = clean_email(email)
|
|
@@ -55,13 +79,15 @@ def present(email_content, labels):
|
|
| 55 |
sentence_list = get_sentences(further_cleaned_text)
|
| 56 |
# entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
|
| 57 |
|
|
|
|
|
|
|
| 58 |
email_info = {
|
| 59 |
"Subject": email.subject,
|
| 60 |
"From": email.from_,
|
| 61 |
"To": email.to,
|
| 62 |
"Date": email.date,
|
| 63 |
"Cleaned Body": further_cleaned_text,
|
| 64 |
-
|
| 65 |
}
|
| 66 |
return [email_info[key] for key in email_info]
|
| 67 |
|
|
@@ -81,7 +107,7 @@ demo = gr.Interface(
|
|
| 81 |
gr.components.Textbox(label="To"),
|
| 82 |
gr.components.Textbox(label="Date"),
|
| 83 |
gr.components.Textbox(label="Cleaned Body"),
|
| 84 |
-
|
| 85 |
],
|
| 86 |
title="Email Info",
|
| 87 |
description="Enter the email content below to view its details and detected entities."
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
|
|
|
|
| 3 |
from mailparser import parse_from_string
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
from gliner import GLiNER
|
| 6 |
+
from typing import Dict, Union, List
|
| 7 |
+
|
| 8 |
import spacy
|
| 9 |
+
import re
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
| 14 |
_MODEL = {}
|
|
|
|
| 52 |
|
| 53 |
return _MODEL[model_name]
|
| 54 |
|
| 55 |
+
def parse_query(sentences: List[str], labels: Union[str, list], threshold: float = 0.3, nested_ner: bool = False, model_name: str = None) -> List[Dict[str, Union[str, list]]]:
|
| 56 |
+
model = get_model(model_name)
|
| 57 |
+
|
| 58 |
+
if isinstance(labels, str):
|
| 59 |
+
labels = [i.strip() for i in labels.split(",")]
|
| 60 |
+
|
| 61 |
+
results = []
|
| 62 |
+
|
| 63 |
+
for sentence in sentences:
|
| 64 |
+
_entities = model.predict_entities(sentence, labels, threshold=threshold)
|
| 65 |
+
|
| 66 |
+
entities = []
|
| 67 |
+
|
| 68 |
+
for entity in _entities:
|
| 69 |
+
entities.append(entity)
|
| 70 |
+
|
| 71 |
+
results.append({"sentence": sentence, "entities": entities})
|
| 72 |
+
|
| 73 |
+
return results
|
| 74 |
+
|
| 75 |
def present(email_content, labels):
|
| 76 |
email = accept_mail(email_content)
|
| 77 |
cleaned_text = clean_email(email)
|
|
|
|
| 79 |
sentence_list = get_sentences(further_cleaned_text)
|
| 80 |
# entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
|
| 81 |
|
| 82 |
+
result = parse_query(sentence_list, labels, threshold=0.3, nested_ner=False, model_name="urchade/gliner_base")
|
| 83 |
+
|
| 84 |
email_info = {
|
| 85 |
"Subject": email.subject,
|
| 86 |
"From": email.from_,
|
| 87 |
"To": email.to,
|
| 88 |
"Date": email.date,
|
| 89 |
"Cleaned Body": further_cleaned_text,
|
| 90 |
+
"Extracted Entities": result
|
| 91 |
}
|
| 92 |
return [email_info[key] for key in email_info]
|
| 93 |
|
|
|
|
| 107 |
gr.components.Textbox(label="To"),
|
| 108 |
gr.components.Textbox(label="Date"),
|
| 109 |
gr.components.Textbox(label="Cleaned Body"),
|
| 110 |
+
gr.components.Textbox(label="Extracted Entities")
|
| 111 |
],
|
| 112 |
title="Email Info",
|
| 113 |
description="Enter the email content below to view its details and detected entities."
|
requirements.txt
CHANGED
|
@@ -2,5 +2,5 @@ gliner
|
|
| 2 |
mail-parser
|
| 3 |
gradio
|
| 4 |
beautifulsoup4
|
| 5 |
-
spacy
|
| 6 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
|
|
|
| 2 |
mail-parser
|
| 3 |
gradio
|
| 4 |
beautifulsoup4
|
| 5 |
+
spacy==3.0.6
|
| 6 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
spaces.yml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
# spaces.yml
|
| 2 |
pip:
|
| 3 |
-
- spacy==3.
|
| 4 |
-
- https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.
|
|
|
|
| 1 |
# spaces.yml
|
| 2 |
pip:
|
| 3 |
+
- spacy==3.0.6
|
| 4 |
+
- https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|