Archisman Karmakar
commited on
Commit
·
f31213f
1
Parent(s):
986513a
2025.03.17.post1
Browse files- .github/workflows/ossar.yml +4 -0
- __pycache__/dashboard.cpython-312.pyc +0 -0
- __pycache__/emotion_analysis.cpython-312.pyc +0 -0
- __pycache__/imports.cpython-312.pyc +0 -0
- app_main_hf.py +18 -1
- imports.py +7 -1
- poetry.lock +0 -0
- pyproject.toml +3 -1
- pyprojectOLD.toml +202 -0
- requirements.txt +8 -4
- sentiment_analysis/__pycache__/__init__.cpython-312.pyc +0 -0
- sentiment_analysis/__pycache__/sentiment_analysis.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/imports.py +4 -1
- sentiment_analysis/{sentiment_analysis.py → sentiment_analysis_main.py} +295 -295
.github/workflows/ossar.yml
CHANGED
|
@@ -31,6 +31,10 @@ jobs:
|
|
| 31 |
runs-on: windows-latest
|
| 32 |
|
| 33 |
steps:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
- name: Checkout repository
|
| 35 |
uses: actions/checkout@v4
|
| 36 |
|
|
|
|
| 31 |
runs-on: windows-latest
|
| 32 |
|
| 33 |
steps:
|
| 34 |
+
|
| 35 |
+
- name: Enable long paths in Git
|
| 36 |
+
run: git config --system core.longpaths true
|
| 37 |
+
|
| 38 |
- name: Checkout repository
|
| 39 |
uses: actions/checkout@v4
|
| 40 |
|
__pycache__/dashboard.cpython-312.pyc
ADDED
|
Binary file (844 Bytes). View file
|
|
|
__pycache__/emotion_analysis.cpython-312.pyc
ADDED
|
Binary file (668 Bytes). View file
|
|
|
__pycache__/imports.cpython-312.pyc
ADDED
|
Binary file (765 Bytes). View file
|
|
|
app_main_hf.py
CHANGED
|
@@ -1,13 +1,30 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
|
|
|
| 3 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import joblib
|
|
|
|
| 5 |
import importlib.util
|
| 6 |
|
| 7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from dashboard import show_dashboard
|
| 10 |
-
from sentiment_analysis.
|
| 11 |
from emotion_analysis import show_emotion_analysis
|
| 12 |
# from text_transformation import show_text_transformation
|
| 13 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
+
import asyncio
|
| 4 |
import sys
|
| 5 |
+
|
| 6 |
+
if sys.platform == "win32":
|
| 7 |
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
| 8 |
+
else:
|
| 9 |
+
try:
|
| 10 |
+
asyncio.get_running_loop()
|
| 11 |
+
except RuntimeError:
|
| 12 |
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
| 13 |
+
|
| 14 |
+
|
| 15 |
import joblib
|
| 16 |
+
import importlib
|
| 17 |
import importlib.util
|
| 18 |
|
| 19 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
| 20 |
|
| 21 |
+
from imports import *
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
from dashboard import show_dashboard
|
| 27 |
+
from sentiment_analysis.sentiment_analysis_main import show_sentiment_analysis
|
| 28 |
from emotion_analysis import show_emotion_analysis
|
| 29 |
# from text_transformation import show_text_transformation
|
| 30 |
|
imports.py
CHANGED
|
@@ -9,5 +9,11 @@ import json
|
|
| 9 |
import gc
|
| 10 |
import psutil
|
| 11 |
import os
|
|
|
|
| 12 |
import importlib.util
|
| 13 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
import gc
|
| 10 |
import psutil
|
| 11 |
import os
|
| 12 |
+
import importlib
|
| 13 |
import importlib.util
|
| 14 |
+
import asyncio
|
| 15 |
+
import sys
|
| 16 |
+
import pytorch_lightning as pl
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[project]
|
| 2 |
name = "tachygraphy-microtext-analysis-and-normalization"
|
| 3 |
-
version = "2025.03.
|
| 4 |
description = ""
|
| 5 |
authors = [
|
| 6 |
{ name = "Archisman Karmakar", email = "92569441+ArchismanKarmakar@users.noreply.github.com" },
|
|
@@ -192,6 +192,8 @@ dependencies = [
|
|
| 192 |
"zict (>=3.0.0,<4.0.0)",
|
| 193 |
"zipp (>=3.21.0,<4.0.0)",
|
| 194 |
"zstandard (>=0.23.0,<0.24.0)",
|
|
|
|
|
|
|
| 195 |
]
|
| 196 |
|
| 197 |
|
|
|
|
| 1 |
[project]
|
| 2 |
name = "tachygraphy-microtext-analysis-and-normalization"
|
| 3 |
+
version = "2025.03.17.post1"
|
| 4 |
description = ""
|
| 5 |
authors = [
|
| 6 |
{ name = "Archisman Karmakar", email = "92569441+ArchismanKarmakar@users.noreply.github.com" },
|
|
|
|
| 192 |
"zict (>=3.0.0,<4.0.0)",
|
| 193 |
"zipp (>=3.21.0,<4.0.0)",
|
| 194 |
"zstandard (>=0.23.0,<0.24.0)",
|
| 195 |
+
"asyncio (>=3.4.3,<4.0.0)",
|
| 196 |
+
"pytorch-lightning (>=2.5.0.post0,<3.0.0)",
|
| 197 |
]
|
| 198 |
|
| 199 |
|
pyprojectOLD.toml
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "tachygraphy-microtext-analysis-and-normalization"
|
| 3 |
+
version = "2025.03.16.post3"
|
| 4 |
+
description = ""
|
| 5 |
+
authors = [
|
| 6 |
+
{ name = "Archisman Karmakar", email = "92569441+ArchismanKarmakar@users.noreply.github.com" },
|
| 7 |
+
]
|
| 8 |
+
readme = "README.md"
|
| 9 |
+
requires-python = ">=3.12"
|
| 10 |
+
dependencies = [
|
| 11 |
+
"numpy (==2.1.3)",
|
| 12 |
+
"torch (>=2.6.0,<3.0.0)",
|
| 13 |
+
"torchvision (>=0.21.0,<0.22.0)",
|
| 14 |
+
"tensorflow (>=2.19.0,<3.0.0)",
|
| 15 |
+
"pandas (>=2.2.3,<3.0.0)",
|
| 16 |
+
"openpyxl (>=3.1.5,<4.0.0)",
|
| 17 |
+
"streamlit (>=1.43.2,<2.0.0)",
|
| 18 |
+
"transformers (>=4.49.0,<5.0.0)",
|
| 19 |
+
"datasets (>=3.4.0,<4.0.0)",
|
| 20 |
+
"autograd (>=1.7.0,<2.0.0)",
|
| 21 |
+
"ipykernel (>=6.29.5,<7.0.0)",
|
| 22 |
+
"matplotlib (>=3.10.1,<4.0.0)",
|
| 23 |
+
"plotly (>=6.0.0,<7.0.0)",
|
| 24 |
+
"importlib (>=1.0.4,<2.0.0)",
|
| 25 |
+
"joblib (>=1.4.2,<2.0.0)",
|
| 26 |
+
"accelerate (>=1.5.2,<2.0.0)",
|
| 27 |
+
"peft (>=0.14.0,<0.15.0)",
|
| 28 |
+
"mpi4py (>=4.0.3,<5.0.0)",
|
| 29 |
+
"tiktoken (>=0.9.0,<0.10.0)",
|
| 30 |
+
"sentencepiece (>=0.2.0,<0.3.0)",
|
| 31 |
+
"bs4 (>=0.0.2,<0.0.3)",
|
| 32 |
+
"emoji (>=2.14.1,<3.0.0)",
|
| 33 |
+
"safetensors (>=0.5.3,<0.6.0)",
|
| 34 |
+
"lxml (>=5.3.1,<6.0.0)",
|
| 35 |
+
"evaluate (>=0.4.3,<0.5.0)",
|
| 36 |
+
"scipy (>=1.15.2,<2.0.0)",
|
| 37 |
+
"sacrebleu (>=2.5.1,<3.0.0)",
|
| 38 |
+
"rouge-score (>=0.1.2,<0.2.0)",
|
| 39 |
+
"scikit-learn (>=1.6.1,<2.0.0)",
|
| 40 |
+
"regex (>=2024.11.6,<2025.0.0)",
|
| 41 |
+
"pywin32 (>=309,<310)",
|
| 42 |
+
"pycurl (>=7.45.6,<8.0.0)",
|
| 43 |
+
"pillow (>=11.1.0,<12.0.0)",
|
| 44 |
+
"diffusers (>=0.32.2,<0.33.0)",
|
| 45 |
+
"onnx (>=1.17.0,<2.0.0)",
|
| 46 |
+
"keras (>=3.9.0,<4.0.0)",
|
| 47 |
+
"h2o (>=3.46.0.6,<4.0.0.0)",
|
| 48 |
+
"distributed (>=2025.2.0,<2026.0.0)",
|
| 49 |
+
"absl-py (>=2.1.0,<3.0.0)",
|
| 50 |
+
"aiohappyeyeballs (>=2.6.1,<3.0.0)",
|
| 51 |
+
"aiohttp (>=3.11.13,<4.0.0)",
|
| 52 |
+
"aiosignal (>=1.3.2,<2.0.0)",
|
| 53 |
+
"altair (>=5.5.0,<6.0.0)",
|
| 54 |
+
"anyio (>=4.8.0,<5.0.0)",
|
| 55 |
+
"asttokens (>=3.0.0,<4.0.0)",
|
| 56 |
+
"astunparse (>=1.6.3,<2.0.0)",
|
| 57 |
+
"attrs (>=25.3.0,<26.0.0)",
|
| 58 |
+
"beautifulsoup4 (>=4.13.3,<5.0.0)",
|
| 59 |
+
"blinker (>=1.9.0,<2.0.0)",
|
| 60 |
+
"build (>=1.2.2.post1,<2.0.0)",
|
| 61 |
+
"cachecontrol (>=0.14.2,<0.15.0)",
|
| 62 |
+
"cachetools (>=5.5.2,<6.0.0)",
|
| 63 |
+
"certifi (>=2025.1.31,<2026.0.0)",
|
| 64 |
+
"charset-normalizer (>=3.4.1,<4.0.0)",
|
| 65 |
+
"click (>=8.1.8,<9.0.0)",
|
| 66 |
+
"cloudpickle (>=3.1.1,<4.0.0)",
|
| 67 |
+
"colorama (>=0.4.6,<0.5.0)",
|
| 68 |
+
"comm (>=0.2.2,<0.3.0)",
|
| 69 |
+
"contourpy (>=1.3.1,<2.0.0)",
|
| 70 |
+
"cycler (>=0.12.1,<0.13.0)",
|
| 71 |
+
"dask (>=2025.2.0,<2026.0.0)",
|
| 72 |
+
"debugpy (>=1.8.13,<2.0.0)",
|
| 73 |
+
"decorator (>=5.2.1,<6.0.0)",
|
| 74 |
+
"distlib (>=0.3.9,<0.4.0)",
|
| 75 |
+
"dulwich (>=0.22.8,<0.23.0)",
|
| 76 |
+
"et-xmlfile (>=2.0.0,<3.0.0)",
|
| 77 |
+
"executing (>=2.2.0,<3.0.0)",
|
| 78 |
+
"fastjsonschema (>=2.21.1,<3.0.0)",
|
| 79 |
+
"filelock (>=3.18.0,<4.0.0)",
|
| 80 |
+
"findpython (>=0.6.3,<0.7.0)",
|
| 81 |
+
"flatbuffers (>=25.2.10,<26.0.0)",
|
| 82 |
+
"fonttools (>=4.56.0,<5.0.0)",
|
| 83 |
+
"frozenlist (>=1.5.0,<2.0.0)",
|
| 84 |
+
"gast (>=0.6.0,<0.7.0)",
|
| 85 |
+
"gitdb (>=4.0.12,<5.0.0)",
|
| 86 |
+
"gitpython (>=3.1.44,<4.0.0)",
|
| 87 |
+
"google-pasta (>=0.2.0,<0.3.0)",
|
| 88 |
+
"grpcio (>=1.71.0,<2.0.0)",
|
| 89 |
+
"h11 (>=0.14.0,<0.15.0)",
|
| 90 |
+
"h5py (>=3.13.0,<4.0.0)",
|
| 91 |
+
"httpcore (>=1.0.7,<2.0.0)",
|
| 92 |
+
"httpx (>=0.28.1,<0.29.0)",
|
| 93 |
+
"huggingface-hub (>=0.29.3,<0.30.0)",
|
| 94 |
+
"idna (>=3.10,<4.0)",
|
| 95 |
+
"importlib-metadata (>=8.6.1,<9.0.0)",
|
| 96 |
+
"installer (>=0.7.0,<0.8.0)",
|
| 97 |
+
"ipython (>=9.0.2,<10.0.0)",
|
| 98 |
+
"ipython-pygments-lexers (>=1.1.1,<2.0.0)",
|
| 99 |
+
"jaraco-classes (>=3.4.0,<4.0.0)",
|
| 100 |
+
"jaraco-context (>=6.0.1,<7.0.0)",
|
| 101 |
+
"jaraco-functools (>=4.1.0,<5.0.0)",
|
| 102 |
+
"jedi (>=0.19.2,<0.20.0)",
|
| 103 |
+
"jinja2 (>=3.1.6,<4.0.0)",
|
| 104 |
+
"jsonschema (>=4.23.0,<5.0.0)",
|
| 105 |
+
"jsonschema-specifications (>=2024.10.1,<2025.0.0)",
|
| 106 |
+
"jupyter-client (>=8.6.3,<9.0.0)",
|
| 107 |
+
"jupyter-core (>=5.7.2,<6.0.0)",
|
| 108 |
+
"kagglehub (>=0.3.10,<0.4.0)",
|
| 109 |
+
"keyring (>=25.6.0,<26.0.0)",
|
| 110 |
+
"kiwisolver (>=1.4.8,<2.0.0)",
|
| 111 |
+
"libclang (>=18.1.1,<19.0.0)",
|
| 112 |
+
"locket (>=1.0.0,<2.0.0)",
|
| 113 |
+
"markdown (>=3.7,<4.0)",
|
| 114 |
+
"markdown-it-py (>=3.0.0,<4.0.0)",
|
| 115 |
+
"markupsafe (>=3.0.2,<4.0.0)",
|
| 116 |
+
"matplotlib-inline (>=0.1.7,<0.2.0)",
|
| 117 |
+
"mdurl (>=0.1.2,<0.2.0)",
|
| 118 |
+
"ml-dtypes (>=0.5.1,<0.6.0)",
|
| 119 |
+
"more-itertools (>=10.6.0,<11.0.0)",
|
| 120 |
+
"mpmath (>=1.3.0,<2.0.0)",
|
| 121 |
+
"msgpack (>=1.1.0,<2.0.0)",
|
| 122 |
+
"multidict (>=6.1.0,<7.0.0)",
|
| 123 |
+
"namex (>=0.0.8,<0.0.9)",
|
| 124 |
+
"narwhals (>=1.30.0,<2.0.0)",
|
| 125 |
+
"nest-asyncio (>=1.6.0,<2.0.0)",
|
| 126 |
+
"networkx (>=3.4.2,<4.0.0)",
|
| 127 |
+
"nltk (>=3.9.1,<4.0.0)",
|
| 128 |
+
"opt-einsum (>=3.4.0,<4.0.0)",
|
| 129 |
+
"optree (>=0.14.1,<0.15.0)",
|
| 130 |
+
"packaging (>=24.2,<25.0)",
|
| 131 |
+
"parso (>=0.8.4,<0.9.0)",
|
| 132 |
+
"partd (>=1.4.2,<2.0.0)",
|
| 133 |
+
"pbs-installer (>=2025.3.11,<2026.0.0)",
|
| 134 |
+
"pkginfo (>=1.12.1.2,<2.0.0.0)",
|
| 135 |
+
"platformdirs (>=4.3.6,<5.0.0)",
|
| 136 |
+
"portalocker (>=3.1.1,<4.0.0)",
|
| 137 |
+
"prompt-toolkit (>=3.0.50,<4.0.0)",
|
| 138 |
+
"propcache (>=0.3.0,<0.4.0)",
|
| 139 |
+
"psutil (>=7.0.0,<8.0.0)",
|
| 140 |
+
"pure-eval (>=0.2.3,<0.3.0)",
|
| 141 |
+
"pyarrow (>=19.0.1,<20.0.0)",
|
| 142 |
+
"pydeck (>=0.9.1,<0.10.0)",
|
| 143 |
+
"pygments (>=2.19.1,<3.0.0)",
|
| 144 |
+
"pyparsing (>=3.2.1,<4.0.0)",
|
| 145 |
+
"pyproject-hooks (>=1.2.0,<2.0.0)",
|
| 146 |
+
"python-dateutil (>=2.9.0.post0,<3.0.0)",
|
| 147 |
+
"pytz (>=2025.1,<2026.0)",
|
| 148 |
+
"pywin32-ctypes (>=0.2.3,<0.3.0)",
|
| 149 |
+
"pyyaml (>=6.0.2,<7.0.0)",
|
| 150 |
+
"pyzmq (>=26.3.0,<27.0.0)",
|
| 151 |
+
"rapidfuzz (>=3.12.2,<4.0.0)",
|
| 152 |
+
"referencing (>=0.36.2,<0.37.0)",
|
| 153 |
+
"requests (>=2.32.3,<3.0.0)",
|
| 154 |
+
"requests-toolbelt (>=1.0.0,<2.0.0)",
|
| 155 |
+
"rich (>=13.9.4,<14.0.0)",
|
| 156 |
+
"rpds-py (>=0.23.1,<0.24.0)",
|
| 157 |
+
"sentence-transformers (>=3.4.1,<4.0.0)",
|
| 158 |
+
"setuptools (>=76.0.0,<77.0.0)",
|
| 159 |
+
"shellingham (>=1.5.4,<2.0.0)",
|
| 160 |
+
"six (>=1.17.0,<2.0.0)",
|
| 161 |
+
"smmap (>=5.0.2,<6.0.0)",
|
| 162 |
+
"sniffio (>=1.3.1,<2.0.0)",
|
| 163 |
+
"sortedcontainers (>=2.4.0,<3.0.0)",
|
| 164 |
+
"soupsieve (>=2.6,<3.0)",
|
| 165 |
+
"stack-data (>=0.6.3,<0.7.0)",
|
| 166 |
+
"tabulate (>=0.9.0,<0.10.0)",
|
| 167 |
+
"tblib (>=3.0.0,<4.0.0)",
|
| 168 |
+
"tenacity (>=9.0.0,<10.0.0)",
|
| 169 |
+
"tensorboard (>=2.19.0,<3.0.0)",
|
| 170 |
+
"tensorboard-data-server (>=0.7.2,<0.8.0)",
|
| 171 |
+
"termcolor (>=2.5.0,<3.0.0)",
|
| 172 |
+
"threadpoolctl (>=3.6.0,<4.0.0)",
|
| 173 |
+
"tokenizers (>=0.21.1,<0.22.0)",
|
| 174 |
+
"toml (>=0.10.2,<0.11.0)",
|
| 175 |
+
"tomlkit (>=0.13.2,<0.14.0)",
|
| 176 |
+
"toolz (>=1.0.0,<2.0.0)",
|
| 177 |
+
"tornado (>=6.4.2,<7.0.0)",
|
| 178 |
+
"tqdm (>=4.67.1,<5.0.0)",
|
| 179 |
+
"traitlets (>=5.14.3,<6.0.0)",
|
| 180 |
+
"trove-classifiers (>=2025.3.13.13,<2026.0.0.0)",
|
| 181 |
+
"typing-extensions (>=4.12.2,<5.0.0)",
|
| 182 |
+
"tzdata (>=2025.1,<2026.0)",
|
| 183 |
+
"urllib3 (>=2.3.0,<3.0.0)",
|
| 184 |
+
"virtualenv (>=20.29.3,<21.0.0)",
|
| 185 |
+
"watchdog (>=6.0.0,<7.0.0)",
|
| 186 |
+
"wcwidth (>=0.2.13,<0.3.0)",
|
| 187 |
+
"werkzeug (>=3.1.3,<4.0.0)",
|
| 188 |
+
"wheel (>=0.45.1,<0.46.0)",
|
| 189 |
+
"wrapt (>=1.17.2,<2.0.0)",
|
| 190 |
+
"xxhash (>=3.5.0,<4.0.0)",
|
| 191 |
+
"yarl (>=1.18.3,<2.0.0)",
|
| 192 |
+
"zict (>=3.0.0,<4.0.0)",
|
| 193 |
+
"zipp (>=3.21.0,<4.0.0)",
|
| 194 |
+
"zstandard (>=0.23.0,<0.24.0)",
|
| 195 |
+
"asyncio (>=3.4.3,<4.0.0)",
|
| 196 |
+
"pytorch-lightning (>=2.5.0.post0,<3.0.0)",
|
| 197 |
+
]
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
[build-system]
|
| 201 |
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
| 202 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
absl-py==2.1.0 ; python_version >= "3.12"
|
| 2 |
accelerate==1.5.2 ; python_version >= "3.12"
|
| 3 |
aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
|
| 4 |
-
aiohttp==3.11.
|
| 5 |
aiosignal==1.3.2 ; python_version >= "3.12"
|
| 6 |
altair==5.5.0 ; python_version >= "3.12"
|
| 7 |
-
anyio==4.
|
| 8 |
appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
|
| 9 |
asttokens==3.0.0 ; python_version >= "3.12"
|
| 10 |
astunparse==1.6.3 ; python_version >= "3.12"
|
|
|
|
| 11 |
attrs==25.3.0 ; python_version >= "3.12"
|
| 12 |
autograd==1.7.0 ; python_version >= "3.12"
|
| 13 |
beautifulsoup4==4.13.3 ; python_version >= "3.12"
|
|
@@ -80,6 +81,7 @@ keras==3.9.0 ; python_version >= "3.12"
|
|
| 80 |
keyring==25.6.0 ; python_version >= "3.12"
|
| 81 |
kiwisolver==1.4.8 ; python_version >= "3.12"
|
| 82 |
libclang==18.1.1 ; python_version >= "3.12"
|
|
|
|
| 83 |
locket==1.0.0 ; python_version >= "3.12"
|
| 84 |
lxml==5.3.1 ; python_version >= "3.12"
|
| 85 |
markdown-it-py==3.0.0 ; python_version >= "3.12"
|
|
@@ -144,9 +146,10 @@ pygments==2.19.1 ; python_version >= "3.12"
|
|
| 144 |
pyparsing==3.2.1 ; python_version >= "3.12"
|
| 145 |
pyproject-hooks==1.2.0 ; python_version >= "3.12"
|
| 146 |
python-dateutil==2.9.0.post0 ; python_version >= "3.12"
|
|
|
|
| 147 |
pytz==2025.1 ; python_version >= "3.12"
|
| 148 |
-
|
| 149 |
-
|
| 150 |
pyyaml==6.0.2 ; python_version >= "3.12"
|
| 151 |
pyzmq==26.3.0 ; python_version >= "3.12"
|
| 152 |
rapidfuzz==3.12.2 ; python_version >= "3.12"
|
|
@@ -188,6 +191,7 @@ toml==0.10.2 ; python_version >= "3.12"
|
|
| 188 |
tomlkit==0.13.2 ; python_version >= "3.12"
|
| 189 |
toolz==1.0.0 ; python_version >= "3.12"
|
| 190 |
torch==2.6.0 ; python_version >= "3.12"
|
|
|
|
| 191 |
torchvision==0.21.0 ; python_version >= "3.12"
|
| 192 |
tornado==6.4.2 ; python_version >= "3.12"
|
| 193 |
tqdm==4.67.1 ; python_version >= "3.12"
|
|
|
|
| 1 |
absl-py==2.1.0 ; python_version >= "3.12"
|
| 2 |
accelerate==1.5.2 ; python_version >= "3.12"
|
| 3 |
aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
|
| 4 |
+
aiohttp==3.11.14 ; python_version >= "3.12"
|
| 5 |
aiosignal==1.3.2 ; python_version >= "3.12"
|
| 6 |
altair==5.5.0 ; python_version >= "3.12"
|
| 7 |
+
anyio==4.9.0 ; python_version >= "3.12"
|
| 8 |
appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
|
| 9 |
asttokens==3.0.0 ; python_version >= "3.12"
|
| 10 |
astunparse==1.6.3 ; python_version >= "3.12"
|
| 11 |
+
asyncio==3.4.3 ; python_version >= "3.12"
|
| 12 |
attrs==25.3.0 ; python_version >= "3.12"
|
| 13 |
autograd==1.7.0 ; python_version >= "3.12"
|
| 14 |
beautifulsoup4==4.13.3 ; python_version >= "3.12"
|
|
|
|
| 81 |
keyring==25.6.0 ; python_version >= "3.12"
|
| 82 |
kiwisolver==1.4.8 ; python_version >= "3.12"
|
| 83 |
libclang==18.1.1 ; python_version >= "3.12"
|
| 84 |
+
lightning-utilities==0.14.1 ; python_version >= "3.12"
|
| 85 |
locket==1.0.0 ; python_version >= "3.12"
|
| 86 |
lxml==5.3.1 ; python_version >= "3.12"
|
| 87 |
markdown-it-py==3.0.0 ; python_version >= "3.12"
|
|
|
|
| 146 |
pyparsing==3.2.1 ; python_version >= "3.12"
|
| 147 |
pyproject-hooks==1.2.0 ; python_version >= "3.12"
|
| 148 |
python-dateutil==2.9.0.post0 ; python_version >= "3.12"
|
| 149 |
+
pytorch-lightning==2.5.0.post0 ; python_version >= "3.12"
|
| 150 |
pytz==2025.1 ; python_version >= "3.12"
|
| 151 |
+
pywin32-ctypes==0.2.3 ; python_version >= "3.12"
|
| 152 |
+
pywin32==309 ; python_version >= "3.12"
|
| 153 |
pyyaml==6.0.2 ; python_version >= "3.12"
|
| 154 |
pyzmq==26.3.0 ; python_version >= "3.12"
|
| 155 |
rapidfuzz==3.12.2 ; python_version >= "3.12"
|
|
|
|
| 191 |
tomlkit==0.13.2 ; python_version >= "3.12"
|
| 192 |
toolz==1.0.0 ; python_version >= "3.12"
|
| 193 |
torch==2.6.0 ; python_version >= "3.12"
|
| 194 |
+
torchmetrics==1.6.3 ; python_version >= "3.12"
|
| 195 |
torchvision==0.21.0 ; python_version >= "3.12"
|
| 196 |
tornado==6.4.2 ; python_version >= "3.12"
|
| 197 |
tqdm==4.67.1 ; python_version >= "3.12"
|
sentiment_analysis/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (264 Bytes). View file
|
|
|
sentiment_analysis/__pycache__/sentiment_analysis.cpython-312.pyc
ADDED
|
Binary file (8.94 kB). View file
|
|
|
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (284 Bytes). View file
|
|
|
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc
ADDED
|
Binary file (2.97 kB). View file
|
|
|
sentiment_analysis/hmv_cfg_base_stage1/imports.py
CHANGED
|
@@ -9,5 +9,8 @@ import json
|
|
| 9 |
import gc
|
| 10 |
import psutil
|
| 11 |
import os
|
|
|
|
| 12 |
import importlib.util
|
| 13 |
-
import
|
|
|
|
|
|
|
|
|
| 9 |
import gc
|
| 10 |
import psutil
|
| 11 |
import os
|
| 12 |
+
import importlib
|
| 13 |
import importlib.util
|
| 14 |
+
import asyncio
|
| 15 |
+
import sys
|
| 16 |
+
import pytorch_lightning as pl
|
sentiment_analysis/{sentiment_analysis.py → sentiment_analysis_main.py}
RENAMED
|
@@ -1,296 +1,296 @@
|
|
| 1 |
-
from imports import *
|
| 2 |
-
import importlib.util
|
| 3 |
-
import os
|
| 4 |
-
import sys
|
| 5 |
-
import joblib
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
| 8 |
-
|
| 9 |
-
# from hmv_cfg_base_stage1.model1 import load_model as load_model1
|
| 10 |
-
# from hmv_cfg_base_stage1.model1 import predict as predict1
|
| 11 |
-
|
| 12 |
-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 13 |
-
CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
|
| 14 |
-
LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
|
| 15 |
-
|
| 16 |
-
# Load the model and tokenizer
|
| 17 |
-
# model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
|
| 18 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 19 |
-
# model = AutoModel.from_pretrained(model_name)
|
| 20 |
-
|
| 21 |
-
SENTIMENT_POLARITY_LABELS = [
|
| 22 |
-
"negative", "neutral", "positive"
|
| 23 |
-
]
|
| 24 |
-
|
| 25 |
-
current_model = None
|
| 26 |
-
current_tokenizer = None
|
| 27 |
-
|
| 28 |
-
# Enabling Resource caching
|
| 29 |
-
@st.cache_resource
|
| 30 |
-
|
| 31 |
-
def load_model_config():
|
| 32 |
-
with open(CONFIG_STAGE1, "r") as f:
|
| 33 |
-
model_data = json.load(f)
|
| 34 |
-
|
| 35 |
-
model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
|
| 36 |
-
return model_data, model_options
|
| 37 |
-
|
| 38 |
-
MODEL_DATA, MODEL_OPTIONS = load_model_config()
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
# def load_model():
|
| 43 |
-
# model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
|
| 44 |
-
# tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
| 45 |
-
# return model, tokenizer
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# ✅ Dynamically Import Model Functions
|
| 49 |
-
def import_from_module(module_name, function_name):
|
| 50 |
-
try:
|
| 51 |
-
module = importlib.import_module(module_name)
|
| 52 |
-
return getattr(module, function_name)
|
| 53 |
-
except (ModuleNotFoundError, AttributeError) as e:
|
| 54 |
-
st.error(f"❌ Import Error: {e}")
|
| 55 |
-
return None
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def free_memory():
|
| 59 |
-
# """Free up CPU & GPU memory before loading a new model."""
|
| 60 |
-
global current_model, current_tokenizer
|
| 61 |
-
|
| 62 |
-
if current_model is not None:
|
| 63 |
-
del current_model # Delete the existing model
|
| 64 |
-
current_model = None # Reset reference
|
| 65 |
-
|
| 66 |
-
if current_tokenizer is not None:
|
| 67 |
-
del current_tokenizer # Delete the tokenizer
|
| 68 |
-
current_tokenizer = None
|
| 69 |
-
|
| 70 |
-
gc.collect() # Force garbage collection for CPU memory
|
| 71 |
-
|
| 72 |
-
if torch.cuda.is_available():
|
| 73 |
-
torch.cuda.empty_cache() # Free GPU memory
|
| 74 |
-
torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
|
| 75 |
-
|
| 76 |
-
# If running on CPU, reclaim memory using OS-level commands
|
| 77 |
-
try:
|
| 78 |
-
if torch.cuda.is_available() is False:
|
| 79 |
-
psutil.virtual_memory() # Refresh memory stats
|
| 80 |
-
except Exception as e:
|
| 81 |
-
print(f"Memory cleanup error: {e}")
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
def load_selected_model(model_name):
|
| 85 |
-
global current_model, current_tokenizer
|
| 86 |
-
|
| 87 |
-
free_memory()
|
| 88 |
-
|
| 89 |
-
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
| 90 |
-
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
|
| 91 |
-
# st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
|
| 92 |
-
|
| 93 |
-
if model_name not in MODEL_OPTIONS:
|
| 94 |
-
st.error(f"⚠️ Model '{model_name}' not found in config!")
|
| 95 |
-
return None, None, None
|
| 96 |
-
|
| 97 |
-
model_info = MODEL_OPTIONS[model_name]
|
| 98 |
-
hf_location = model_info["hf_location"]
|
| 99 |
-
|
| 100 |
-
model_module = model_info["module_path"]
|
| 101 |
-
load_function = model_info["load_function"]
|
| 102 |
-
predict_function = model_info["predict_function"]
|
| 103 |
-
|
| 104 |
-
load_model_func = import_from_module(model_module, load_function)
|
| 105 |
-
predict_func = import_from_module(model_module, predict_function)
|
| 106 |
-
|
| 107 |
-
if load_model_func is None or predict_func is None:
|
| 108 |
-
st.error("❌ Model functions could not be loaded!")
|
| 109 |
-
return None, None, None
|
| 110 |
-
|
| 111 |
-
model, tokenizer = load_model_func()
|
| 112 |
-
|
| 113 |
-
current_model, current_tokenizer = model, tokenizer
|
| 114 |
-
return model, tokenizer, predict_func
|
| 115 |
-
|
| 116 |
-
# def load_selected_model(model_name):
|
| 117 |
-
# # """Load model and tokenizer based on user selection."""
|
| 118 |
-
# global current_model, current_tokenizer
|
| 119 |
-
|
| 120 |
-
# # Free memory before loading a new model
|
| 121 |
-
# free_memory()
|
| 122 |
-
|
| 123 |
-
# if model_name not in MODEL_OPTIONS:
|
| 124 |
-
# st.error(f"⚠️ Model '{model_name}' not found in config!")
|
| 125 |
-
# return None, None
|
| 126 |
-
|
| 127 |
-
# model_info = MODEL_OPTIONS[model_name]
|
| 128 |
-
# hf_location = model_info["hf_location"]
|
| 129 |
-
|
| 130 |
-
# model_module = model_info["module_path"]
|
| 131 |
-
# # load_function = "load_model"
|
| 132 |
-
# # predict_function = "predict"
|
| 133 |
-
|
| 134 |
-
# load_function = model_info["load_function"]
|
| 135 |
-
# predict_function = model_info["predict_function"]
|
| 136 |
-
|
| 137 |
-
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
| 138 |
-
# # model_class = globals()[model_info["model_class"]]
|
| 139 |
-
|
| 140 |
-
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
# load_model_func = import_from_module(model_module, load_function)
|
| 144 |
-
# predict_func = import_from_module(model_module, predict_function)
|
| 145 |
-
|
| 146 |
-
# # # Load model
|
| 147 |
-
# # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
|
| 148 |
-
# # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
|
| 149 |
-
# # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
|
| 150 |
-
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
| 151 |
-
# # model_class = globals()[model_info["model_class"]]
|
| 152 |
-
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
| 153 |
-
# # model = model_class.from_pretrained(hf_location,
|
| 154 |
-
# # problem_type=model_info["problem_type"],
|
| 155 |
-
# # num_labels=model_info["num_labels"]
|
| 156 |
-
# # )
|
| 157 |
-
# # else:
|
| 158 |
-
# # st.error("Invalid model selection")
|
| 159 |
-
# # return None, None
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
# if load_model_func is None or predict_func is None:
|
| 163 |
-
# st.error("❌ Model functions could not be loaded!")
|
| 164 |
-
# return None, None
|
| 165 |
-
|
| 166 |
-
# # current_model, current_tokenizer = model, tokenizer # Store references
|
| 167 |
-
# # return model, tokenizer
|
| 168 |
-
|
| 169 |
-
# model, tokenizer = load_model_func(hf_location)
|
| 170 |
-
|
| 171 |
-
# current_model, current_tokenizer = model, tokenizer
|
| 172 |
-
# return model, tokenizer, predict_func
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
def predict(text, model, tokenizer, device, max_len=128):
|
| 177 |
-
# Tokenize and pad the input text
|
| 178 |
-
inputs = tokenizer(
|
| 179 |
-
text,
|
| 180 |
-
add_special_tokens=True,
|
| 181 |
-
padding=True,
|
| 182 |
-
truncation=False,
|
| 183 |
-
return_tensors="pt",
|
| 184 |
-
return_token_type_ids=False,
|
| 185 |
-
).to(device) # Move input tensors to the correct device
|
| 186 |
-
|
| 187 |
-
with torch.no_grad():
|
| 188 |
-
outputs = model(**inputs)
|
| 189 |
-
|
| 190 |
-
# Apply sigmoid activation (for BCEWithLogitsLoss)
|
| 191 |
-
probabilities = outputs.logits.cpu().numpy()
|
| 192 |
-
|
| 193 |
-
return probabilities
|
| 194 |
-
|
| 195 |
-
# def show_sentiment_analysis():
|
| 196 |
-
|
| 197 |
-
# Add your sentiment analysis code here
|
| 198 |
-
|
| 199 |
-
# user_input = st.text_input("Enter text for sentiment analysis:")
|
| 200 |
-
# user_input = st.text_area("Enter text for sentiment analysis:", height=200)
|
| 201 |
-
# user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
|
| 202 |
-
|
| 203 |
-
def show_sentiment_analysis():
|
| 204 |
-
st.title("Stage 1: Sentiment Polarity Analysis")
|
| 205 |
-
st.write("This section will handle sentiment analysis.")
|
| 206 |
-
|
| 207 |
-
if "selected_model" not in st.session_state:
|
| 208 |
-
st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
|
| 209 |
-
|
| 210 |
-
if "clear_output" not in st.session_state:
|
| 211 |
-
st.session_state.clear_output = False
|
| 212 |
-
|
| 213 |
-
st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
|
| 214 |
-
|
| 215 |
-
selected_model = st.session_state.selected_model
|
| 216 |
-
|
| 217 |
-
if selected_model not in MODEL_OPTIONS:
|
| 218 |
-
st.error(f"❌ Selected model '{selected_model}' not found!")
|
| 219 |
-
st.stop()
|
| 220 |
-
|
| 221 |
-
st.session_state.clear_output = True # Reset output when model changes
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
| 225 |
-
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
user_input = st.text_input("Enter text for sentiment analysis:")
|
| 229 |
-
|
| 230 |
-
if user_input:
|
| 231 |
-
# Make prediction
|
| 232 |
-
|
| 233 |
-
# model, tokenizer = load_model()
|
| 234 |
-
# model, tokenizer = load_selected_model(selected_model)
|
| 235 |
-
|
| 236 |
-
model, tokenizer, predict_func = load_selected_model(selected_model)
|
| 237 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 238 |
-
|
| 239 |
-
if model is None:
|
| 240 |
-
st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
|
| 241 |
-
st.stop()
|
| 242 |
-
|
| 243 |
-
model.to(device)
|
| 244 |
-
|
| 245 |
-
# predictions = predict(user_input, model, tokenizer, device)
|
| 246 |
-
|
| 247 |
-
predictions = predict_func(user_input, model, tokenizer, device)
|
| 248 |
-
|
| 249 |
-
# Squeeze predictions to remove extra dimensions
|
| 250 |
-
predictions_array = predictions.squeeze()
|
| 251 |
-
|
| 252 |
-
# Convert to binary predictions (argmax)
|
| 253 |
-
binary_predictions = np.zeros_like(predictions_array)
|
| 254 |
-
max_indices = np.argmax(predictions_array)
|
| 255 |
-
binary_predictions[max_indices] = 1
|
| 256 |
-
|
| 257 |
-
# Display raw predictions
|
| 258 |
-
st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
|
| 259 |
-
|
| 260 |
-
# Display binary classification result
|
| 261 |
-
st.write(f"**Predicted Sentiment:**")
|
| 262 |
-
st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
|
| 263 |
-
# st.write(f"**NEUTRAL:** {binary_predictions[1]}")
|
| 264 |
-
# st.write(f"**POSITIVE:** {binary_predictions[2]}")
|
| 265 |
-
|
| 266 |
-
# 1️⃣ **Polar Plot (Plotly)**
|
| 267 |
-
sentiment_polarities = predictions_array.tolist()
|
| 268 |
-
fig_polar = px.line_polar(
|
| 269 |
-
pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
|
| 270 |
-
r='r', theta='theta', line_close=True
|
| 271 |
-
)
|
| 272 |
-
st.plotly_chart(fig_polar)
|
| 273 |
-
|
| 274 |
-
# 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
|
| 275 |
-
normalized_predictions = predictions_array / predictions_array.sum()
|
| 276 |
-
|
| 277 |
-
fig, ax = plt.subplots(figsize=(8, 2))
|
| 278 |
-
left = 0
|
| 279 |
-
for i in range(len(normalized_predictions)):
|
| 280 |
-
ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
|
| 281 |
-
left += normalized_predictions[i]
|
| 282 |
-
|
| 283 |
-
# Configure the chart
|
| 284 |
-
ax.set_xlim(0, 1)
|
| 285 |
-
ax.set_yticks([])
|
| 286 |
-
ax.set_xticks(np.arange(0, 1.1, 0.1))
|
| 287 |
-
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
|
| 288 |
-
plt.title("Sentiment Polarity Prediction Distribution")
|
| 289 |
-
|
| 290 |
-
# Display in Streamlit
|
| 291 |
-
st.pyplot(fig)
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
if __name__ == "__main__":
|
| 296 |
show_sentiment_analysis()
|
|
|
|
| 1 |
+
from imports import *
|
| 2 |
+
import importlib.util
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import joblib
|
| 6 |
+
|
| 7 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
| 8 |
+
|
| 9 |
+
# from hmv_cfg_base_stage1.model1 import load_model as load_model1
|
| 10 |
+
# from hmv_cfg_base_stage1.model1 import predict as predict1
|
| 11 |
+
|
| 12 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 13 |
+
CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
|
| 14 |
+
LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
|
| 15 |
+
|
| 16 |
+
# Load the model and tokenizer
|
| 17 |
+
# model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
|
| 18 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 19 |
+
# model = AutoModel.from_pretrained(model_name)
|
| 20 |
+
|
| 21 |
+
SENTIMENT_POLARITY_LABELS = [
|
| 22 |
+
"negative", "neutral", "positive"
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
current_model = None
|
| 26 |
+
current_tokenizer = None
|
| 27 |
+
|
| 28 |
+
# Enabling Resource caching
|
| 29 |
+
@st.cache_resource
|
| 30 |
+
|
| 31 |
+
def load_model_config():
|
| 32 |
+
with open(CONFIG_STAGE1, "r") as f:
|
| 33 |
+
model_data = json.load(f)
|
| 34 |
+
|
| 35 |
+
model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
|
| 36 |
+
return model_data, model_options
|
| 37 |
+
|
| 38 |
+
MODEL_DATA, MODEL_OPTIONS = load_model_config()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# def load_model():
|
| 43 |
+
# model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
|
| 44 |
+
# tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
| 45 |
+
# return model, tokenizer
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ✅ Dynamically Import Model Functions
|
| 49 |
+
def import_from_module(module_name, function_name):
|
| 50 |
+
try:
|
| 51 |
+
module = importlib.import_module(module_name)
|
| 52 |
+
return getattr(module, function_name)
|
| 53 |
+
except (ModuleNotFoundError, AttributeError) as e:
|
| 54 |
+
st.error(f"❌ Import Error: {e}")
|
| 55 |
+
return None
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def free_memory():
|
| 59 |
+
# """Free up CPU & GPU memory before loading a new model."""
|
| 60 |
+
global current_model, current_tokenizer
|
| 61 |
+
|
| 62 |
+
if current_model is not None:
|
| 63 |
+
del current_model # Delete the existing model
|
| 64 |
+
current_model = None # Reset reference
|
| 65 |
+
|
| 66 |
+
if current_tokenizer is not None:
|
| 67 |
+
del current_tokenizer # Delete the tokenizer
|
| 68 |
+
current_tokenizer = None
|
| 69 |
+
|
| 70 |
+
gc.collect() # Force garbage collection for CPU memory
|
| 71 |
+
|
| 72 |
+
if torch.cuda.is_available():
|
| 73 |
+
torch.cuda.empty_cache() # Free GPU memory
|
| 74 |
+
torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
|
| 75 |
+
|
| 76 |
+
# If running on CPU, reclaim memory using OS-level commands
|
| 77 |
+
try:
|
| 78 |
+
if torch.cuda.is_available() is False:
|
| 79 |
+
psutil.virtual_memory() # Refresh memory stats
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Memory cleanup error: {e}")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def load_selected_model(model_name):
|
| 85 |
+
global current_model, current_tokenizer
|
| 86 |
+
|
| 87 |
+
free_memory()
|
| 88 |
+
|
| 89 |
+
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
| 90 |
+
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
|
| 91 |
+
# st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
|
| 92 |
+
|
| 93 |
+
if model_name not in MODEL_OPTIONS:
|
| 94 |
+
st.error(f"⚠️ Model '{model_name}' not found in config!")
|
| 95 |
+
return None, None, None
|
| 96 |
+
|
| 97 |
+
model_info = MODEL_OPTIONS[model_name]
|
| 98 |
+
hf_location = model_info["hf_location"]
|
| 99 |
+
|
| 100 |
+
model_module = model_info["module_path"]
|
| 101 |
+
load_function = model_info["load_function"]
|
| 102 |
+
predict_function = model_info["predict_function"]
|
| 103 |
+
|
| 104 |
+
load_model_func = import_from_module(model_module, load_function)
|
| 105 |
+
predict_func = import_from_module(model_module, predict_function)
|
| 106 |
+
|
| 107 |
+
if load_model_func is None or predict_func is None:
|
| 108 |
+
st.error("❌ Model functions could not be loaded!")
|
| 109 |
+
return None, None, None
|
| 110 |
+
|
| 111 |
+
model, tokenizer = load_model_func()
|
| 112 |
+
|
| 113 |
+
current_model, current_tokenizer = model, tokenizer
|
| 114 |
+
return model, tokenizer, predict_func
|
| 115 |
+
|
| 116 |
+
# def load_selected_model(model_name):
|
| 117 |
+
# # """Load model and tokenizer based on user selection."""
|
| 118 |
+
# global current_model, current_tokenizer
|
| 119 |
+
|
| 120 |
+
# # Free memory before loading a new model
|
| 121 |
+
# free_memory()
|
| 122 |
+
|
| 123 |
+
# if model_name not in MODEL_OPTIONS:
|
| 124 |
+
# st.error(f"⚠️ Model '{model_name}' not found in config!")
|
| 125 |
+
# return None, None
|
| 126 |
+
|
| 127 |
+
# model_info = MODEL_OPTIONS[model_name]
|
| 128 |
+
# hf_location = model_info["hf_location"]
|
| 129 |
+
|
| 130 |
+
# model_module = model_info["module_path"]
|
| 131 |
+
# # load_function = "load_model"
|
| 132 |
+
# # predict_function = "predict"
|
| 133 |
+
|
| 134 |
+
# load_function = model_info["load_function"]
|
| 135 |
+
# predict_function = model_info["predict_function"]
|
| 136 |
+
|
| 137 |
+
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
| 138 |
+
# # model_class = globals()[model_info["model_class"]]
|
| 139 |
+
|
| 140 |
+
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# load_model_func = import_from_module(model_module, load_function)
|
| 144 |
+
# predict_func = import_from_module(model_module, predict_function)
|
| 145 |
+
|
| 146 |
+
# # # Load model
|
| 147 |
+
# # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
|
| 148 |
+
# # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
|
| 149 |
+
# # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
|
| 150 |
+
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
| 151 |
+
# # model_class = globals()[model_info["model_class"]]
|
| 152 |
+
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
| 153 |
+
# # model = model_class.from_pretrained(hf_location,
|
| 154 |
+
# # problem_type=model_info["problem_type"],
|
| 155 |
+
# # num_labels=model_info["num_labels"]
|
| 156 |
+
# # )
|
| 157 |
+
# # else:
|
| 158 |
+
# # st.error("Invalid model selection")
|
| 159 |
+
# # return None, None
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
# if load_model_func is None or predict_func is None:
|
| 163 |
+
# st.error("❌ Model functions could not be loaded!")
|
| 164 |
+
# return None, None
|
| 165 |
+
|
| 166 |
+
# # current_model, current_tokenizer = model, tokenizer # Store references
|
| 167 |
+
# # return model, tokenizer
|
| 168 |
+
|
| 169 |
+
# model, tokenizer = load_model_func(hf_location)
|
| 170 |
+
|
| 171 |
+
# current_model, current_tokenizer = model, tokenizer
|
| 172 |
+
# return model, tokenizer, predict_func
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def predict(text, model, tokenizer, device, max_len=128):
|
| 177 |
+
# Tokenize and pad the input text
|
| 178 |
+
inputs = tokenizer(
|
| 179 |
+
text,
|
| 180 |
+
add_special_tokens=True,
|
| 181 |
+
padding=True,
|
| 182 |
+
truncation=False,
|
| 183 |
+
return_tensors="pt",
|
| 184 |
+
return_token_type_ids=False,
|
| 185 |
+
).to(device) # Move input tensors to the correct device
|
| 186 |
+
|
| 187 |
+
with torch.no_grad():
|
| 188 |
+
outputs = model(**inputs)
|
| 189 |
+
|
| 190 |
+
# Apply sigmoid activation (for BCEWithLogitsLoss)
|
| 191 |
+
probabilities = outputs.logits.cpu().numpy()
|
| 192 |
+
|
| 193 |
+
return probabilities
|
| 194 |
+
|
| 195 |
+
# def show_sentiment_analysis():
|
| 196 |
+
|
| 197 |
+
# Add your sentiment analysis code here
|
| 198 |
+
|
| 199 |
+
# user_input = st.text_input("Enter text for sentiment analysis:")
|
| 200 |
+
# user_input = st.text_area("Enter text for sentiment analysis:", height=200)
|
| 201 |
+
# user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
|
| 202 |
+
|
| 203 |
+
def show_sentiment_analysis():
|
| 204 |
+
st.title("Stage 1: Sentiment Polarity Analysis")
|
| 205 |
+
st.write("This section will handle sentiment analysis.")
|
| 206 |
+
|
| 207 |
+
if "selected_model" not in st.session_state:
|
| 208 |
+
st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
|
| 209 |
+
|
| 210 |
+
if "clear_output" not in st.session_state:
|
| 211 |
+
st.session_state.clear_output = False
|
| 212 |
+
|
| 213 |
+
st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
|
| 214 |
+
|
| 215 |
+
selected_model = st.session_state.selected_model
|
| 216 |
+
|
| 217 |
+
if selected_model not in MODEL_OPTIONS:
|
| 218 |
+
st.error(f"❌ Selected model '{selected_model}' not found!")
|
| 219 |
+
st.stop()
|
| 220 |
+
|
| 221 |
+
st.session_state.clear_output = True # Reset output when model changes
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
| 225 |
+
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
user_input = st.text_input("Enter text for sentiment analysis:")
|
| 229 |
+
|
| 230 |
+
if user_input:
|
| 231 |
+
# Make prediction
|
| 232 |
+
|
| 233 |
+
# model, tokenizer = load_model()
|
| 234 |
+
# model, tokenizer = load_selected_model(selected_model)
|
| 235 |
+
|
| 236 |
+
model, tokenizer, predict_func = load_selected_model(selected_model)
|
| 237 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 238 |
+
|
| 239 |
+
if model is None:
|
| 240 |
+
st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
|
| 241 |
+
st.stop()
|
| 242 |
+
|
| 243 |
+
model.to(device)
|
| 244 |
+
|
| 245 |
+
# predictions = predict(user_input, model, tokenizer, device)
|
| 246 |
+
|
| 247 |
+
predictions = predict_func(user_input, model, tokenizer, device)
|
| 248 |
+
|
| 249 |
+
# Squeeze predictions to remove extra dimensions
|
| 250 |
+
predictions_array = predictions.squeeze()
|
| 251 |
+
|
| 252 |
+
# Convert to binary predictions (argmax)
|
| 253 |
+
binary_predictions = np.zeros_like(predictions_array)
|
| 254 |
+
max_indices = np.argmax(predictions_array)
|
| 255 |
+
binary_predictions[max_indices] = 1
|
| 256 |
+
|
| 257 |
+
# Display raw predictions
|
| 258 |
+
st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
|
| 259 |
+
|
| 260 |
+
# Display binary classification result
|
| 261 |
+
st.write(f"**Predicted Sentiment:**")
|
| 262 |
+
st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
|
| 263 |
+
# st.write(f"**NEUTRAL:** {binary_predictions[1]}")
|
| 264 |
+
# st.write(f"**POSITIVE:** {binary_predictions[2]}")
|
| 265 |
+
|
| 266 |
+
# 1️⃣ **Polar Plot (Plotly)**
|
| 267 |
+
sentiment_polarities = predictions_array.tolist()
|
| 268 |
+
fig_polar = px.line_polar(
|
| 269 |
+
pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
|
| 270 |
+
r='r', theta='theta', line_close=True
|
| 271 |
+
)
|
| 272 |
+
st.plotly_chart(fig_polar)
|
| 273 |
+
|
| 274 |
+
# 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
|
| 275 |
+
normalized_predictions = predictions_array / predictions_array.sum()
|
| 276 |
+
|
| 277 |
+
fig, ax = plt.subplots(figsize=(8, 2))
|
| 278 |
+
left = 0
|
| 279 |
+
for i in range(len(normalized_predictions)):
|
| 280 |
+
ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
|
| 281 |
+
left += normalized_predictions[i]
|
| 282 |
+
|
| 283 |
+
# Configure the chart
|
| 284 |
+
ax.set_xlim(0, 1)
|
| 285 |
+
ax.set_yticks([])
|
| 286 |
+
ax.set_xticks(np.arange(0, 1.1, 0.1))
|
| 287 |
+
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
|
| 288 |
+
plt.title("Sentiment Polarity Prediction Distribution")
|
| 289 |
+
|
| 290 |
+
# Display in Streamlit
|
| 291 |
+
st.pyplot(fig)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
if __name__ == "__main__":
|
| 296 |
show_sentiment_analysis()
|