Spaces:

poltextlab
/

mores_pulse

Sleeping

App Files Files Community

vkovacs commited on Jan 10, 2025

Commit

cec858f

1 Parent(s): aa975e0

sentence split logic added

Browse files

Files changed (1) hide show

app.py +52 -8

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import torch
 import numpy as np
 from transformers import AutoModelForSequenceClassification
 from transformers import AutoTokenizer
@@ -16,6 +17,23 @@ HF_TOKEN = os.environ["hf_read"]
 SENTIMENT_LABEL_NAMES = {0: "Negative", 1: "No sentiment or Neutral sentiment", 2: "Positive"}
 LANGUAGES = ["Czech", "English", "French", "German", "Hungarian", "Polish", "Slovakian"]
 def build_huggingface_path(language: str):
     if language == "Czech" or language == "Slovakian":
@@ -39,22 +57,48 @@ def predict(text, model_id, tokenizer_id):
         logits = model(**inputs).logits
     probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
-    output_pred = {model.config.id2label[i]: probs[i] for i in np.argsort(probs)[::-1]}
-    output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id}">{model_id}</a> model.</p>'
-    return output_pred, output_info
 def predict_wrapper(text, language):
     model_id = build_huggingface_path(language)
     tokenizer_id = "xlm-roberta-large"
-    return predict(text, model_id, tokenizer_id)
 with gr.Blocks() as demo:
-    gr.Interface(
         fn=predict_wrapper,
-        inputs=[gr.Textbox(lines=6, label="Input"),
-                gr.Dropdown(LANGUAGES, label="Language")],
-        outputs=[gr.Label(num_top_classes=3, label="Output"), gr.Markdown()])
 if __name__ == "__main__":
     demo.launch()

 import os
 import torch
+import spacy
 import numpy as np
 from transformers import AutoModelForSequenceClassification
 from transformers import AutoTokenizer
 SENTIMENT_LABEL_NAMES = {0: "Negative", 1: "No sentiment or Neutral sentiment", 2: "Positive"}
 LANGUAGES = ["Czech", "English", "French", "German", "Hungarian", "Polish", "Slovakian"]
+def load_spacy_model(model_name="xx_sent_ud_sm"):
+    try:
+        model = spacy.load(model_name)
+    except OSError:
+        spacy.cli.download(model_name)
+        model = spacy.load(model_name)
+    return model
+def split_sentences(text, model):
+    # disable pipeline components not necessary for splitting
+    model.disable_pipes(model.pipe_names)  # first disable all the pipes
+    model.enable_pipe("senter") # then enable the sentence splitter only
+    doc = model(text)
+    sentences = [sent.text for sent in doc.sents]
+    return sentences
 def build_huggingface_path(language: str):
     if language == "Czech" or language == "Slovakian":
         logits = model(**inputs).logits
     probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
+    label_pred = model.config.id2label[probs.argmax()]
+    probability_pred = f"{100*probs.max()}%"
+    return label_pred, probability_pred
 def predict_wrapper(text, language):
     model_id = build_huggingface_path(language)
     tokenizer_id = "xlm-roberta-large"
+    spacy_model = load_spacy_model()
+    sentences = split_sentences(text, spacy_model)
+    results = []
+    for sentence in sentences:
+        label, probability = predict(sentence, model_id, tokenizer_id)
+        results.append({
+            "Sentence": sentence,
+            "Prediction": label,
+            "Probability": probability
+        })
+    output_info = f'Prediction made using the <a href="https://huggingface.co/{model_id}">{model_id}</a> model.'
+    return results, output_info
 with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(lines=6, label="Input Text", placeholder="Enter your text here...")
+            language_choice = gr.Dropdown(choices=LANGUAGES, label="Language", value="English")
+            predict_button = gr.Button("Submit")
+        with gr.Column():
+            result_table = gr.Dataframe(headers=["Sentence", "Prediction", "Probability"],
+                                        label="Sentence-level Predictions")
+            model_info = gr.Markdown()
+    predict_button.click(
         fn=predict_wrapper,
+        inputs=[input_text, language_choice],
+        outputs=[result_table, model_info]
+    )
 if __name__ == "__main__":
     demo.launch()