Spaces:

Tachygraphy-Microtext-Normalization-IEMK25
/

Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

Sleeping

App Files Files Community

Archisman Karmakar commited on Mar 21, 2025

Commit

2d6564f

1 Parent(s): 4822903

2025.03.21.post1

Browse files

Files changed (9) hide show

app_main_hf.py +24 -7
emotionMoodtag_analysis/emotion_analysis_main.py +2 -2
pyproject.toml +1 -1
sentimentPolarity_analysis/sentiment_analysis_main.py +2 -2
transformation_and_Normalization/config/stage3_models.json +30 -0
transformation_and_Normalization/hmv_cfg_base_stage3/model1.py +1 -1
transformation_and_Normalization/hmv_cfg_base_stage3/model2.py +117 -0
transformation_and_Normalization/hmv_cfg_base_stage3/model3.py +117 -0
transformation_and_Normalization/transformationNormalization_main.py +12 -6

app_main_hf.py CHANGED Viewed

@@ -31,6 +31,12 @@ else:
     except RuntimeError:
         asyncio.set_event_loop(asyncio.new_event_loop())
 import joblib
 import importlib
@@ -47,10 +53,6 @@ from dashboard import show_dashboard
 # from text_transformation import show_text_transformation
-st.set_page_config(
-    page_title="Tachygraphy Microtext Analysis & Normalization",
-    # layout="wide"
-)
 def free_memory():
@@ -112,10 +114,21 @@ def main():
     st.sidebar.title("Navigation")
     with st.sidebar:
         selection = option_menu(
             menu_title=None,          # No title for a sleek look
             options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization"],
-            icons=None,
             menu_icon="cast",          # Main menu icon
             default_index=0,           # Highlight the first option
             orientation="vertical",
@@ -126,11 +139,11 @@ def main():
                     "font-size": "16px",
                     "text-align": "left",
                     "margin": "0px",
-                    "color": "#6c757d",
                     "transition": "0.3s",
                 },
                 "nav-link-selected": {
-                    "background-color": "#FF4B4B",
                     "color": "white",
                     "font-weight": "bold",
                     "border-radius": "8px",
@@ -160,22 +173,26 @@ def main():
         st.session_state.current_page = selection
     if selection == "Dashboard":
         # st.cache_resource.clear()
         # free_memory()
         show_dashboard()
     elif selection == "Stage 1: Sentiment Polarity Analysis":
         # st.cache_resource.clear()
         # free_memory()
         show_sentiment_analysis()
     elif selection == "Stage 2: Emotion Mood-tag Analysis":
         # st.cache_resource.clear()
         # free_memory()
         show_emotion_analysis()
         # st.write("This section is under development.")
     elif selection == "Stage 3: Text Transformation & Normalization":
         # st.cache_resource.clear()
         # free_memory()
         transform_and_normalize()

     except RuntimeError:
         asyncio.set_event_loop(asyncio.new_event_loop())
+st.set_page_config(
+    # page_title="Tachygraphy Microtext Analysis & Normalization",
+    layout="wide"
+)
 import joblib
 import importlib
 # from text_transformation import show_text_transformation
 def free_memory():
     st.sidebar.title("Navigation")
     with st.sidebar:
+        # selected = option_menu("Main Menu", ["Home", 'Settings'],
+        #         icons=['house', 'gear'], menu_icon="cast", default_index=1)
+        # selected
+        # # 2. horizontal menu
+        # selected2 = option_menu(None, ["Home", "Upload", "Tasks", 'Settings'],
+        #     icons=['house', 'cloud-upload', "list-task", 'gear'],
+        #     menu_icon="cast", default_index=0, orientation="horizontal")
+        # selected2
         selection = option_menu(
             menu_title=None,          # No title for a sleek look
             options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization"],
+            icons=['house', 'diagram-3', "snow", 'activity'],
             menu_icon="cast",          # Main menu icon
             default_index=0,           # Highlight the first option
             orientation="vertical",
                     "font-size": "16px",
                     "text-align": "left",
                     "margin": "0px",
+                    "color": "#000000",
                     "transition": "0.3s",
                 },
                 "nav-link-selected": {
+                    "background-color": "#020045",
                     "color": "white",
                     "font-weight": "bold",
                     "border-radius": "8px",
         st.session_state.current_page = selection
     if selection == "Dashboard":
+        # st.title("Tachygraphy Micro-text Analysis & Normalization")
         # st.cache_resource.clear()
         # free_memory()
         show_dashboard()
     elif selection == "Stage 1: Sentiment Polarity Analysis":
+        # st.title("Sentiment Polarity Analysis")
         # st.cache_resource.clear()
         # free_memory()
         show_sentiment_analysis()
     elif selection == "Stage 2: Emotion Mood-tag Analysis":
+        # st.title("Emotion Mood-tag Analysis")
         # st.cache_resource.clear()
         # free_memory()
         show_emotion_analysis()
         # st.write("This section is under development.")
     elif selection == "Stage 3: Text Transformation & Normalization":
+        # st.title("Text Transformation & Normalization")
         # st.cache_resource.clear()
         # free_memory()
         transform_and_normalize()

emotionMoodtag_analysis/emotion_analysis_main.py CHANGED Viewed

@@ -217,12 +217,12 @@ def show_emotion_analysis():
     # Model selection with change detection
     selected_model = st.selectbox(
-        "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
-        "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
     )
     user_input_copy = user_input

     # Model selection with change detection
     selected_model = st.selectbox(
+        "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model_stage2", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
+        "Enter text for emotions mood-tag analysis:", key="user_input_stage2", on_change=on_text_change
     )
     user_input_copy = user_input

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
-version = "2025.03.21.post1"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "92569441+ArchismanKarmakar@users.noreply.github.com" },

 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
+version = "2025.03.22.post1"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "92569441+ArchismanKarmakar@users.noreply.github.com" },

sentimentPolarity_analysis/sentiment_analysis_main.py CHANGED Viewed

@@ -215,12 +215,12 @@ def show_sentiment_analysis():
     # Model selection with change detection
     selected_model = st.selectbox(
-        "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
-        "Enter text for sentiment analysis:", key="user_input", on_change=on_text_change
     )
     user_input_copy = user_input

     # Model selection with change detection
     selected_model = st.selectbox(
+        "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model_stage1", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
+        "Enter text for sentiment analysis:", key="user_input_stage1", on_change=on_text_change
     )
     user_input_copy = user_input

transformation_and_Normalization/config/stage3_models.json CHANGED Viewed

@@ -13,5 +13,35 @@
         "max_top_k": 50265,
         "load_function": "load_model",
         "predict_function": "predict"
     }
 }

         "max_top_k": 50265,
         "load_function": "load_model",
         "predict_function": "predict"
+    },
+    "2": {
+        "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
+        "type": "hf_automodel_finetuned_mstctg",
+        "module_path": "hmv_cfg_base_stage3.model2",
+        "hf_location": "tachygraphy-microtrext-norm-org/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
+        "tokenizer_class": "ProphetNetTokenizer",
+        "model_class": "ProphetNetForConditionalGeneration",
+        "problem_type": "text_transformamtion_and_normalization",
+        "base_model": "microsoft/prophetnet-large-uncased",
+        "base_model_class": "ProphetNetForConditionalGeneration",
+        "device": "cpu",
+        "max_top_k": 32128,
+        "load_function": "load_model",
+        "predict_function": "predict"
+    },
+    "3": {
+        "name": "Google T5 v1.1 Base for Conditional Text Generation",
+        "type": "hf_automodel_finetuned_gt5tctg",
+        "module_path": "hmv_cfg_base_stage3.model3",
+        "hf_location": "tachygraphy-microtrext-norm-org/T5-1.1-HF-seq2seq-Trainer-Batch4",
+        "tokenizer_class": "T5Tokenizer",
+        "model_class": "T5ForConditionalGeneration",
+        "problem_type": "text_transformamtion_and_normalization",
+        "base_model": "google/t5-v1_1-base",
+        "base_model_class": "T5ForConditionalGeneration",
+        "device": "cpu",
+        "max_top_k": 32128,
+        "load_function": "load_model",
+        "predict_function": "predict"
     }
 }

transformation_and_Normalization/hmv_cfg_base_stage3/model1.py CHANGED Viewed

@@ -9,7 +9,7 @@ import sys
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
 MODEL_OPTIONS = {

 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "config", "stage3_models.json")
 MODEL_OPTIONS = {

transformation_and_Normalization/hmv_cfg_base_stage3/model2.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from transformers import ProphetNetTokenizer, ProphetNetForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification, AutoModel
+import torch.nn.functional as F
+from imports import *
+import torch.nn as nn
+import torch
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "config", "stage3_models.json")
+MODEL_OPTIONS = {
+    "2": {
+        "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
+        "type": "hf_automodel_finetuned_mstctg",
+        "module_path": "hmv_cfg_base_stage3.model2",
+        "hf_location": "tachygraphy-microtrext-norm-org/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
+        "tokenizer_class": "ProphetNetTokenizer",
+        "model_class": "ProphetNetForConditionalGeneration",
+        "problem_type": "text_transformamtion_and_normalization",
+        "base_model": "microsoft/prophetnet-large-uncased",
+        "base_model_class": "ProphetNetForConditionalGeneration",
+        "device": "cpu",
+        "max_top_k": 32128,
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}
+model_key = "2"
+model_info = MODEL_OPTIONS[model_key]
+hf_location = model_info["hf_location"]
+tokenizer_class = globals()[model_info["tokenizer_class"]]
+model_class = globals()[model_info["model_class"]]
+@st.cache_resource
+def load_model():
+    tokenizer = tokenizer_class.from_pretrained(hf_location)
+    print("Loading model 2")
+    model = model_class.from_pretrained(hf_location,
+                                        # device_map=torch.device(
+                                        #     "cuda" if torch.cuda.is_available() else "cpu")
+                                        )
+    print("Model 2 loaded")
+    return model, tokenizer
+def predict(
+    model, tokenizer, text, device,
+    num_return_sequences=1,
+    beams=None,  # Beam search
+    do_sample=False,  # Sampling flag
+    temp=None,  # Temperature (only for sampling)
+    top_p=None,
+    top_k=None,
+    max_new_tokens=1024,
+    early_stopping=True
+):
+    # Tokenize input
+    padded = tokenizer(text, return_tensors='pt', truncation=False, padding=True).to(device)
+    input_ids = padded['input_ids'].to(device)
+    attention_mask = padded['attention_mask'].to(device)
+    # Validate arguments
+    if beams is not None and do_sample:
+        raise ValueError("Cannot use `beams` and `do_sample=True` together. Choose either beam search (`beams=5`) or sampling (`do_sample=True, temp=0.7`).")
+    if temp is not None and not do_sample:
+        raise ValueError("`temp` (temperature) can only be used in sampling mode (`do_sample=True`).")
+    if (top_p is not None or top_k is not None) and not do_sample:
+        raise ValueError("`top_p` and `top_k` can only be used in sampling mode (`do_sample=True`).")
+    # Beam search (Deterministic)
+    if beams is not None:
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=max_new_tokens,
+            num_return_sequences=num_return_sequences,
+            num_beams=beams,
+            early_stopping=early_stopping,
+            do_sample=False  # No randomness
+        )
+    # Sampling Cases
+    else:
+        generate_args = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "max_new_tokens": max_new_tokens,
+            "num_return_sequences": num_return_sequences,
+            "do_sample": True,  # Enable stochastic sampling
+            "temperature": temp if temp is not None else 0.7,  # Default temp if not passed
+        }
+        # Add `top_p` if set
+        if top_p is not None:
+            generate_args["top_p"] = top_p
+        # Add `top_k` if set
+        if top_k is not None:
+            generate_args["top_k"] = top_k
+        # Generate
+        outputs = model.generate(**generate_args)
+    # Decode predictions into human-readable text
+    predictions = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return predictions

transformation_and_Normalization/hmv_cfg_base_stage3/model3.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification, AutoModel
+import torch.nn.functional as F
+from imports import *
+import torch.nn as nn
+import torch
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "config", "stage3_models.json")
+MODEL_OPTIONS = {
+    "3": {
+        "name": "Google T5 v1.1 Base for Conditional Text Generation",
+        "type": "hf_automodel_finetuned_gt5tctg",
+        "module_path": "hmv_cfg_base_stage3.model3",
+        "hf_location": "tachygraphy-microtrext-norm-org/T5-1.1-HF-seq2seq-Trainer-Batch4",
+        "tokenizer_class": "T5Tokenizer",
+        "model_class": "T5ForConditionalGeneration",
+        "problem_type": "text_transformamtion_and_normalization",
+        "base_model": "google/t5-v1_1-base",
+        "base_model_class": "T5ForConditionalGeneration",
+        "device": "cpu",
+        "max_top_k": 32128,
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}
+model_key = "3"
+model_info = MODEL_OPTIONS[model_key]
+hf_location = model_info["hf_location"]
+tokenizer_class = globals()[model_info["tokenizer_class"]]
+model_class = globals()[model_info["model_class"]]
+@st.cache_resource
+def load_model():
+    tokenizer = tokenizer_class.from_pretrained(hf_location)
+    print("Loading model 3")
+    model = model_class.from_pretrained(hf_location,
+                                        # device_map=torch.device(
+                                        #     "cuda" if torch.cuda.is_available() else "cpu")
+                                        )
+    print("Model 3 loaded")
+    return model, tokenizer
+def predict(
+    model, tokenizer, text, device,
+    num_return_sequences=1,
+    beams=None,  # Beam search
+    do_sample=False,  # Sampling flag
+    temp=None,  # Temperature (only for sampling)
+    top_p=None,
+    top_k=None,
+    max_new_tokens=1024,
+    early_stopping=True
+):
+    # Tokenize input
+    padded = tokenizer(text, return_tensors='pt', truncation=False, padding=True).to(device)
+    input_ids = padded['input_ids'].to(device)
+    attention_mask = padded['attention_mask'].to(device)
+    # Validate arguments
+    if beams is not None and do_sample:
+        raise ValueError("Cannot use `beams` and `do_sample=True` together. Choose either beam search (`beams=5`) or sampling (`do_sample=True, temp=0.7`).")
+    if temp is not None and not do_sample:
+        raise ValueError("`temp` (temperature) can only be used in sampling mode (`do_sample=True`).")
+    if (top_p is not None or top_k is not None) and not do_sample:
+        raise ValueError("`top_p` and `top_k` can only be used in sampling mode (`do_sample=True`).")
+    # Beam search (Deterministic)
+    if beams is not None:
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=max_new_tokens,
+            num_return_sequences=num_return_sequences,
+            num_beams=beams,
+            early_stopping=early_stopping,
+            do_sample=False  # No randomness
+        )
+    # Sampling Cases
+    else:
+        generate_args = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "max_new_tokens": max_new_tokens,
+            "num_return_sequences": num_return_sequences,
+            "do_sample": True,  # Enable stochastic sampling
+            "temperature": temp if temp is not None else 0.7,  # Default temp if not passed
+        }
+        # Add `top_p` if set
+        if top_p is not None:
+            generate_args["top_p"] = top_p
+        # Add `top_k` if set
+        if top_k is not None:
+            generate_args["top_k"] = top_k
+        # Generate
+        outputs = model.generate(**generate_args)
+    # Decode predictions into human-readable text
+    predictions = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return predictions

transformation_and_Normalization/transformationNormalization_main.py CHANGED Viewed

@@ -224,6 +224,12 @@ def transform_and_normalize():
     # No cache clearing here—only in the model change callback!
     # st.write(st.session_state)
     if "top_k" not in st.session_state:
         st.session_state.top_k = 50
@@ -242,12 +248,12 @@ def transform_and_normalize():
     # Model selection with change detection; clearing cache happens in on_model_change()
     selected_model = st.selectbox(
-        "Choose a model:", model_names, key="selected_model", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
-        "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
     )
     st.markdown("#### Generation Parameters")
@@ -321,7 +327,7 @@ def transform_and_normalize():
         user_input_copy = user_input
     current_time = time.time()
-    if user_input.strip() and (current_time - st.session_state.last_change >= 1.5):
         st.session_state.last_processed_input = user_input
         progress_bar = st.progress(0)
@@ -348,11 +354,11 @@ def transform_and_normalize():
         update_progress(progress_bar, 10, 100)
         if len(predictions) > 1:
-            st.write("### Multiple Predictions:")
             for i, pred in enumerate(predictions, start=1):
-                st.markdown(f"**Sequence {i}:** {pred}")
         else:
-            st.write("### Prediction:")
             st.write(predictions[0])
         progress_bar.empty()
     # else:

     # No cache clearing here—only in the model change callback!
     # st.write(st.session_state)
+    if "last_change" not in st.session_state:
+        st.session_state.last_change = time.time()
+    if "auto_predict_triggered" not in st.session_state:
+        st.session_state.auto_predict_triggered = False
     if "top_k" not in st.session_state:
         st.session_state.top_k = 50
     # Model selection with change detection; clearing cache happens in on_model_change()
     selected_model = st.selectbox(
+        "Choose a model:", model_names, key="selected_model_stage3", on_change=on_model_change
     )
     # Text input with change detection
     user_input = st.text_input(
+        "Enter text for emotions mood-tag analysis:", key="user_input_stage3", on_change=on_text_change
     )
     st.markdown("#### Generation Parameters")
         user_input_copy = user_input
     current_time = time.time()
+    if user_input.strip() and (current_time - st.session_state.last_change >= 1.25):
         st.session_state.last_processed_input = user_input
         progress_bar = st.progress(0)
         update_progress(progress_bar, 10, 100)
         if len(predictions) > 1:
+            st.write("### Predictions:")
             for i, pred in enumerate(predictions, start=1):
+                st.markdown(f"**Prediction Sequence {i}:** {pred}")
         else:
+            st.write("### Predicted Sequence:")
             st.write(predictions[0])
         progress_bar.empty()
     # else: