Spaces:

dipannitaray
/

0423

Build error

App Files Files Community

dipannitaray commited on May 28, 2024

Commit

3e7ec50

verified ·

1 Parent(s): 8b6f8d6

Upload 4 files

Browse files

Files changed (4) hide show

sentiback2.jpg +0 -0
sentiback3.jpg +0 -0
sr.py +98 -0
tokenizer_and_sequences.pkl +3 -0

sentiback2.jpg ADDED Viewed

sentiback3.jpg ADDED Viewed

sr.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import streamlit as st
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import load_model
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import pickle
+import joblib
+# Load models and tokenizers
+model = load_model('rnn_lstm_final.h5')
+loaded_model = joblib.load("my_rnn_model.joblib")
+with open("tokenizer_and_sequences.pkl", "rb") as f:
+    tokenizer, data = pickle.load(f)
+model1 = AutoModelForSequenceClassification.from_pretrained('punjabiSentimentAnalysis')
+tokenizer1 = AutoTokenizer.from_pretrained('punjabiSentimentAnalysis')
+model_summ = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicSentenceSummarizationSS")
+tokenizer_summ = AutoTokenizer.from_pretrained("ai4bharat/MultiIndicSentenceSummarizationSS",
+                                                do_lower_case=False, use_fast=False, keep_accents=True)
+bos_id = tokenizer_summ._convert_token_to_id_with_added_voc("<s>")
+eos_id = tokenizer_summ._convert_token_to_id_with_added_voc("</s>")
+pad_id = tokenizer_summ._convert_token_to_id_with_added_voc("<pad>")
+# Define helper functions
+def is_valid_punjabi_text(text):
+    english_alphabet = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+    numbers = set("0123456789")
+    punctuation = set("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
+    for char in text:
+        if char in english_alphabet or char in numbers or char in punctuation:
+            return False
+    return True
+def predict_sentiment(text, model, tokenizer):
+    inputs = tokenizer(text, return_tensors="pt")
+    outputs = model(**inputs)
+    predicted_class = torch.argmax(outputs.logits, dim=-1).item()
+    return "Negative" if predicted_class == 0 else "Positive"
+def summarize(text):
+    input_ids = tokenizer_summ(f"{text} </s> <2pa>", add_special_tokens=False, return_tensors="pt",
+                                padding=True).input_ids
+    model_output = model_summ.generate(input_ids, use_cache=True, no_repeat_ngram_size=3, num_beams=5,
+                                        length_penalty=0.8, max_length=20, min_length=1, early_stopping=True,
+                                        pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id,
+                                        decoder_start_token_id=tokenizer_summ._convert_token_to_id_with_added_voc("<2pa>"))
+    decoded_output = tokenizer_summ.decode(model_output[0], skip_special_tokens=True,
+                                            clean_up_tokenization_spaces=False)
+    return decoded_output
+def process_input(text):
+    a = [text]
+    a = tokenizer.texts_to_sequences(a)
+    a = np.array(a)
+    a = pad_sequences(a, padding='post', maxlen=100)
+    a = a.reshape((a.shape[0], a.shape[1], 1))
+    prediction = model.predict(np.array(a))
+    for row in prediction:
+        element1 = row[0]
+        element2 = row[1]
+        return "Negative" if element1 > element2 else "Positive"
+# Streamlit app
+st.title("Indic Sentence Summarization & Sentiment Analysis")
+st.header("Insightful Echoes: Crafting Summaries with Sentiments (for ਪੰਜਾਬੀ Text)")
+model_choice = st.selectbox("Select the Model", ["Indic-Bert", "RNN"])
+summarize_before_sentiment = st.checkbox("Summarize before analyzing sentiment")
+user_input = st.text_area("Enter some text here")
+if st.button("Analyze Sentiment"):
+    if not is_valid_punjabi_text(user_input):
+        st.warning("Please enter valid Punjabi text.")
+    else:
+        sentiment_output = ""
+        if summarize_before_sentiment:
+            summarized_text = summarize(user_input)
+            sentiment_bert = predict_sentiment(summarized_text, model1, tokenizer1)
+            sentiment_output = f'Sentiment (Indic-BERT): {sentiment_bert}\nSummary: {summarized_text}'
+        else:
+            sentiment_bert = predict_sentiment(user_input, model1, tokenizer1)
+            sentiment_output = f'Sentiment (Indic-BERT): {sentiment_bert}'
+        if model_choice == "RNN":
+            sentiment_rnn = process_input(user_input)
+            sentiment_output += f"\nSentiment (Bidirectional LSTM): {sentiment_rnn}"
+            if summarize_before_sentiment:
+                summarized_text_rnn = summarize(user_input)
+                sentiment_output += f"\nSummary (Bidirectional LSTM): {summarized_text_rnn}"
+        st.text_area("Sentiment Output", sentiment_output, height=200)

tokenizer_and_sequences.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8802bb9e970ab9643357f0b384773dc4a2dd7514a396a3898c5e7903a563e36f
+size 613474