Spaces:
Sleeping
Sleeping
Commit
·
26c905c
1
Parent(s):
0a11807
Final working Streamlit app for deployment
Browse files- app/app.py +79 -23
- artifacts/metrics.json +11 -0
- artifacts/sentiment_pipeline.joblib +0 -0
- config/settings.yaml +19 -59
- data/datatraining_data.csv +11 -0
- data/twitter.csv +150 -6
- scripts/create_dataset.py +42 -0
- scripts/create_dataset.txt +42 -0
- scripts/train.py +36 -46
app/app.py
CHANGED
|
@@ -1,45 +1,47 @@
|
|
| 1 |
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
st.title("Twitter Sentiment Intelligence")
|
|
|
|
|
|
|
| 4 |
try:
|
| 5 |
-
# Main app logic
|
| 6 |
-
"""Streamlit front-end for the Deloitte-ready Twitter Sentiment Intelligence dashboard."""
|
| 7 |
-
import json
|
| 8 |
-
import sys
|
| 9 |
-
from pathlib import Path
|
| 10 |
-
from typing import Dict
|
| 11 |
-
import pandas as pd
|
| 12 |
# -------------------------------------------------------------------------
|
| 13 |
-
# Path setup
|
| 14 |
# -------------------------------------------------------------------------
|
| 15 |
ROOT = Path(__file__).resolve().parents[1]
|
| 16 |
SRC_PATH = ROOT / "src"
|
| 17 |
if str(SRC_PATH) not in sys.path:
|
| 18 |
sys.path.insert(0, str(SRC_PATH))
|
|
|
|
| 19 |
from twitter_sentiment.config import load_config
|
| 20 |
from twitter_sentiment.predictor import load_artifacts, predict_with_threshold
|
|
|
|
| 21 |
# -------------------------------------------------------------------------
|
| 22 |
-
#
|
| 23 |
-
# -------------------------------------------------------------------------
|
| 24 |
-
st.set_page_config(
|
| 25 |
-
page_title="Twitter Sentiment Intelligence",
|
| 26 |
-
page_icon="💼",
|
| 27 |
-
layout="wide",
|
| 28 |
-
)
|
| 29 |
-
# -------------------------------------------------------------------------
|
| 30 |
-
# Cached resource loading (config, pipeline, metrics)
|
| 31 |
-
# NOTE: artifacts/sentiment_pipeline.joblib is referenced relatively
|
| 32 |
-
# If artifacts directory or file does not exist, run: python scripts/train.py
|
| 33 |
# -------------------------------------------------------------------------
|
| 34 |
@st.cache_resource(show_spinner=False)
|
| 35 |
def _load_dependencies():
|
| 36 |
"""Load configuration, trained pipeline, and metrics from artifacts."""
|
| 37 |
config = load_config()
|
| 38 |
-
# The load_artifacts function references 'artifacts/sentiment_pipeline.joblib' relatively
|
| 39 |
pipeline, metrics = load_artifacts(config)
|
| 40 |
return config, pipeline, metrics
|
|
|
|
| 41 |
# -------------------------------------------------------------------------
|
| 42 |
-
#
|
| 43 |
# -------------------------------------------------------------------------
|
| 44 |
def format_probabilities(probabilities: Dict[str, float]) -> pd.DataFrame:
|
| 45 |
"""Convert prediction probabilities to a styled DataFrame for display."""
|
|
@@ -48,15 +50,69 @@ try:
|
|
| 48 |
.sort_values("confidence", ascending=False)
|
| 49 |
.style.format({"confidence": "{:.2%}"})
|
| 50 |
)
|
|
|
|
| 51 |
# -------------------------------------------------------------------------
|
| 52 |
# Main Streamlit Application
|
| 53 |
# -------------------------------------------------------------------------
|
| 54 |
def main() -> None:
|
| 55 |
"""Render the Deloitte-ready Twitter Sentiment Intelligence Dashboard."""
|
| 56 |
config, pipeline, metrics = _load_dependencies()
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if __name__ == "__main__":
|
| 60 |
main()
|
|
|
|
| 61 |
except Exception as e:
|
| 62 |
st.error(f"Startup failed: {e}")
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict
|
| 6 |
+
import pandas as pd
|
| 7 |
import streamlit as st
|
| 8 |
+
|
| 9 |
+
# -------------------------------------------------------------------------
|
| 10 |
+
# Page Configuration
|
| 11 |
+
# -------------------------------------------------------------------------
|
| 12 |
+
st.set_page_config(
|
| 13 |
+
page_title="Twitter Sentiment Intelligence",
|
| 14 |
+
page_icon="💼",
|
| 15 |
+
layout="wide",
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
st.title("Twitter Sentiment Intelligence")
|
| 19 |
+
st.caption("Streamlit front-end for the Deloitte-ready Twitter Sentiment Intelligence dashboard.")
|
| 20 |
+
|
| 21 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# -------------------------------------------------------------------------
|
| 23 |
+
# Path setup
|
| 24 |
# -------------------------------------------------------------------------
|
| 25 |
ROOT = Path(__file__).resolve().parents[1]
|
| 26 |
SRC_PATH = ROOT / "src"
|
| 27 |
if str(SRC_PATH) not in sys.path:
|
| 28 |
sys.path.insert(0, str(SRC_PATH))
|
| 29 |
+
|
| 30 |
from twitter_sentiment.config import load_config
|
| 31 |
from twitter_sentiment.predictor import load_artifacts, predict_with_threshold
|
| 32 |
+
|
| 33 |
# -------------------------------------------------------------------------
|
| 34 |
+
# Cached dependencies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# -------------------------------------------------------------------------
|
| 36 |
@st.cache_resource(show_spinner=False)
|
| 37 |
def _load_dependencies():
|
| 38 |
"""Load configuration, trained pipeline, and metrics from artifacts."""
|
| 39 |
config = load_config()
|
|
|
|
| 40 |
pipeline, metrics = load_artifacts(config)
|
| 41 |
return config, pipeline, metrics
|
| 42 |
+
|
| 43 |
# -------------------------------------------------------------------------
|
| 44 |
+
# Format probabilities helper
|
| 45 |
# -------------------------------------------------------------------------
|
| 46 |
def format_probabilities(probabilities: Dict[str, float]) -> pd.DataFrame:
|
| 47 |
"""Convert prediction probabilities to a styled DataFrame for display."""
|
|
|
|
| 50 |
.sort_values("confidence", ascending=False)
|
| 51 |
.style.format({"confidence": "{:.2%}"})
|
| 52 |
)
|
| 53 |
+
|
| 54 |
# -------------------------------------------------------------------------
|
| 55 |
# Main Streamlit Application
|
| 56 |
# -------------------------------------------------------------------------
|
| 57 |
def main() -> None:
|
| 58 |
"""Render the Deloitte-ready Twitter Sentiment Intelligence Dashboard."""
|
| 59 |
config, pipeline, metrics = _load_dependencies()
|
| 60 |
+
|
| 61 |
+
# ---------------------- Sidebar ----------------------
|
| 62 |
+
with st.sidebar:
|
| 63 |
+
st.header("📊 Model Snapshot")
|
| 64 |
+
st.write("**Classes:**", ", ".join(pipeline.classes_))
|
| 65 |
+
if metrics:
|
| 66 |
+
st.metric("Macro F1", f"{metrics.get('f1_macro', 0.0):.2f}")
|
| 67 |
+
st.metric("Accuracy", f"{metrics.get('accuracy', 0.0):.2f}")
|
| 68 |
+
else:
|
| 69 |
+
st.info("Run `python scripts/train.py` to generate metrics.")
|
| 70 |
+
st.download_button(
|
| 71 |
+
label="⬇️ Download Metrics JSON",
|
| 72 |
+
data=json.dumps(metrics or {}, indent=2).encode("utf-8"),
|
| 73 |
+
file_name="metrics.json",
|
| 74 |
+
mime="application/json",
|
| 75 |
+
)
|
| 76 |
+
st.info(
|
| 77 |
+
"🚀 Tip: integrate Oracle Autonomous Database by updating `config/settings.yaml`."
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# ---------------------- Tabs ----------------------
|
| 81 |
+
tab_predict, tab_metrics = st.tabs(["🔮 Predict", "⚙️ Model Governance"])
|
| 82 |
+
|
| 83 |
+
# ---------------------- Prediction Tab ----------------------
|
| 84 |
+
with tab_predict:
|
| 85 |
+
st.subheader("Real-Time Sentiment Assessment")
|
| 86 |
+
user_input = st.text_area("Enter a tweet or customer comment:", height=150)
|
| 87 |
+
if st.button("Run Analysis", type="primary"):
|
| 88 |
+
if not user_input.strip():
|
| 89 |
+
st.warning("⚠️ Please enter text to analyse.")
|
| 90 |
+
else:
|
| 91 |
+
label, probabilities = predict_with_threshold(user_input, config)
|
| 92 |
+
st.success(f"Predicted Sentiment: **{label.title()}**")
|
| 93 |
+
st.dataframe(format_probabilities(probabilities), use_container_width=True)
|
| 94 |
+
|
| 95 |
+
# ---------------------- Metrics Tab ----------------------
|
| 96 |
+
with tab_metrics:
|
| 97 |
+
st.subheader("Operational Metrics")
|
| 98 |
+
if metrics:
|
| 99 |
+
metrics_df = (
|
| 100 |
+
pd.DataFrame(metrics, index=["score"])
|
| 101 |
+
.T.rename(columns={"score": "value"})
|
| 102 |
+
)
|
| 103 |
+
st.dataframe(metrics_df, use_container_width=True)
|
| 104 |
+
else:
|
| 105 |
+
st.info("Metrics will appear after the first training run (see `scripts/train.py`).")
|
| 106 |
+
|
| 107 |
+
# ---------------------- Footer ----------------------
|
| 108 |
+
st.markdown("---")
|
| 109 |
+
st.caption("© 2025 Deloitte-aligned Sentiment Analytics Accelerator")
|
| 110 |
+
|
| 111 |
+
# -------------------------------------------------------------------------
|
| 112 |
+
# Entry Point
|
| 113 |
+
# -------------------------------------------------------------------------
|
| 114 |
if __name__ == "__main__":
|
| 115 |
main()
|
| 116 |
+
|
| 117 |
except Exception as e:
|
| 118 |
st.error(f"Startup failed: {e}")
|
artifacts/metrics.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"accuracy": 0.0,
|
| 3 |
+
"f1_macro": 0.0,
|
| 4 |
+
"precision_macro": 0.0,
|
| 5 |
+
"recall_macro": 0.0,
|
| 6 |
+
"cv_mean": 0.16666666666666666,
|
| 7 |
+
"cv_std": 0.0,
|
| 8 |
+
"negative_f1": 0.0,
|
| 9 |
+
"neutral_f1": 0.0,
|
| 10 |
+
"positive_f1": 0.0
|
| 11 |
+
}
|
artifacts/sentiment_pipeline.joblib
ADDED
|
Binary file (3.16 kB). View file
|
|
|
config/settings.yaml
CHANGED
|
@@ -1,59 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
"strip_mentions": true,
|
| 21 |
-
"strip_hashtags": false,
|
| 22 |
-
"remove_punctuation": true,
|
| 23 |
-
"normalize_whitespace": true,
|
| 24 |
-
"stemming": false,
|
| 25 |
-
"lemmatize": false
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"artifact_dir": "artifacts",
|
| 29 |
-
"pipeline_filename": "sentiment_pipeline.joblib",
|
| 30 |
-
"vectorizer_filename": "tfidf_vectorizer.joblib",
|
| 31 |
-
"model_filename": "logistic_model.joblib",
|
| 32 |
-
"test_size": 0.2,
|
| 33 |
-
"random_state": 42,
|
| 34 |
-
"max_features": 5000,
|
| 35 |
-
"ngram_range": [1, 2],
|
| 36 |
-
"class_weight": "balanced"
|
| 37 |
-
},
|
| 38 |
-
"training": {
|
| 39 |
-
"cv_folds": 5,
|
| 40 |
-
"scoring": "f1_macro",
|
| 41 |
-
"n_jobs": -1,
|
| 42 |
-
"verbose": 1,
|
| 43 |
-
"probability_thresholds": {
|
| 44 |
-
"positive": 0.55,
|
| 45 |
-
"negative": 0.45
|
| 46 |
-
}
|
| 47 |
-
},
|
| 48 |
-
"monitoring": {
|
| 49 |
-
"enable_model_drift_checks": true,
|
| 50 |
-
"psi_threshold": 0.2
|
| 51 |
-
},
|
| 52 |
-
"oracle_integration": {
|
| 53 |
-
"enabled": false,
|
| 54 |
-
"wallet_location": "~/.oci/wallet",
|
| 55 |
-
"user": "",
|
| 56 |
-
"dsn": "",
|
| 57 |
-
"sql_query": "SELECT text, sentiment FROM twitter_training_data"
|
| 58 |
-
}
|
| 59 |
-
}
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
train_path: data/twitter.csv
|
| 3 |
+
text_column: text
|
| 4 |
+
target_column: sentiment
|
| 5 |
+
class_order: [negative, neutral, positive]
|
| 6 |
+
|
| 7 |
+
model:
|
| 8 |
+
artifact_dir: artifacts
|
| 9 |
+
pipeline_filename: sentiment_pipeline.joblib
|
| 10 |
+
test_size: 0.2
|
| 11 |
+
random_state: 42
|
| 12 |
+
max_features: 5000
|
| 13 |
+
ngram_range: [1, 2]
|
| 14 |
+
class_weight: balanced
|
| 15 |
+
|
| 16 |
+
training:
|
| 17 |
+
cv_folds: 5
|
| 18 |
+
scoring: f1_macro
|
| 19 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/datatraining_data.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
text,sentiment
|
| 2 |
+
I love this product,positive
|
| 3 |
+
This is the worst experience ever,negative
|
| 4 |
+
The service was okay,neutral
|
| 5 |
+
Weather is so nice today,positive
|
| 6 |
+
I hate delays,negative
|
| 7 |
+
The meeting went fine,neutral
|
| 8 |
+
Excellent work team!,positive
|
| 9 |
+
I'm feeling bad,negative
|
| 10 |
+
Just a normal day,neutral
|
| 11 |
+
Great job everyone!,positive
|
data/twitter.csv
CHANGED
|
@@ -1,7 +1,151 @@
|
|
| 1 |
text,sentiment
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
"The
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
text,sentiment
|
| 2 |
+
I love flying with this airline!,positive
|
| 3 |
+
Amazing service and friendly staff!,positive
|
| 4 |
+
Super smooth check-in process!,positive
|
| 5 |
+
"The new update works perfectly, great job!",positive
|
| 6 |
+
Had an excellent experience today!,positive
|
| 7 |
+
Fast delivery and awesome packaging!,positive
|
| 8 |
+
I’m so happy with my new phone!,positive
|
| 9 |
+
Totally loved the new design!,positive
|
| 10 |
+
Everything was handled professionally.,positive
|
| 11 |
+
This app is getting better every update!,positive
|
| 12 |
+
"The service was okay, nothing special.",neutral
|
| 13 |
+
"I used the app today, it was fine.",neutral
|
| 14 |
+
The product arrived on time.,neutral
|
| 15 |
+
It works as expected.,neutral
|
| 16 |
+
"No complaints, just a normal experience.",neutral
|
| 17 |
+
The new feature is alright.,neutral
|
| 18 |
+
It’s an average performance overall.,neutral
|
| 19 |
+
"Neither good nor bad, just fine.",neutral
|
| 20 |
+
"Nothing remarkable, just standard.",neutral
|
| 21 |
+
"It’s okay, could be improved.",neutral
|
| 22 |
+
This is the worst service ever.,negative
|
| 23 |
+
The staff were rude and unhelpful.,negative
|
| 24 |
+
"The app keeps crashing, very frustrating.",negative
|
| 25 |
+
My package arrived late and damaged.,negative
|
| 26 |
+
Customer support didn’t respond at all.,negative
|
| 27 |
+
"Horrible experience, will never come back.",negative
|
| 28 |
+
This company doesn’t care about customers.,negative
|
| 29 |
+
Terrible product quality.,negative
|
| 30 |
+
The flight was delayed and chaotic.,negative
|
| 31 |
+
Completely disappointed with the outcome.,negative
|
| 32 |
+
I love flying with this airline!,positive
|
| 33 |
+
Amazing service and friendly staff!,positive
|
| 34 |
+
Super smooth check-in process!,positive
|
| 35 |
+
"The new update works perfectly, great job!",positive
|
| 36 |
+
Had an excellent experience today!,positive
|
| 37 |
+
Fast delivery and awesome packaging!,positive
|
| 38 |
+
I’m so happy with my new phone!,positive
|
| 39 |
+
Totally loved the new design!,positive
|
| 40 |
+
Everything was handled professionally.,positive
|
| 41 |
+
This app is getting better every update!,positive
|
| 42 |
+
"The service was okay, nothing special.",neutral
|
| 43 |
+
"I used the app today, it was fine.",neutral
|
| 44 |
+
The product arrived on time.,neutral
|
| 45 |
+
It works as expected.,neutral
|
| 46 |
+
"No complaints, just a normal experience.",neutral
|
| 47 |
+
The new feature is alright.,neutral
|
| 48 |
+
It’s an average performance overall.,neutral
|
| 49 |
+
"Neither good nor bad, just fine.",neutral
|
| 50 |
+
"Nothing remarkable, just standard.",neutral
|
| 51 |
+
"It’s okay, could be improved.",neutral
|
| 52 |
+
This is the worst service ever.,negative
|
| 53 |
+
The staff were rude and unhelpful.,negative
|
| 54 |
+
"The app keeps crashing, very frustrating.",negative
|
| 55 |
+
My package arrived late and damaged.,negative
|
| 56 |
+
Customer support didn’t respond at all.,negative
|
| 57 |
+
"Horrible experience, will never come back.",negative
|
| 58 |
+
This company doesn’t care about customers.,negative
|
| 59 |
+
Terrible product quality.,negative
|
| 60 |
+
The flight was delayed and chaotic.,negative
|
| 61 |
+
Completely disappointed with the outcome.,negative
|
| 62 |
+
I love flying with this airline!,positive
|
| 63 |
+
Amazing service and friendly staff!,positive
|
| 64 |
+
Super smooth check-in process!,positive
|
| 65 |
+
"The new update works perfectly, great job!",positive
|
| 66 |
+
Had an excellent experience today!,positive
|
| 67 |
+
Fast delivery and awesome packaging!,positive
|
| 68 |
+
I’m so happy with my new phone!,positive
|
| 69 |
+
Totally loved the new design!,positive
|
| 70 |
+
Everything was handled professionally.,positive
|
| 71 |
+
This app is getting better every update!,positive
|
| 72 |
+
"The service was okay, nothing special.",neutral
|
| 73 |
+
"I used the app today, it was fine.",neutral
|
| 74 |
+
The product arrived on time.,neutral
|
| 75 |
+
It works as expected.,neutral
|
| 76 |
+
"No complaints, just a normal experience.",neutral
|
| 77 |
+
The new feature is alright.,neutral
|
| 78 |
+
It’s an average performance overall.,neutral
|
| 79 |
+
"Neither good nor bad, just fine.",neutral
|
| 80 |
+
"Nothing remarkable, just standard.",neutral
|
| 81 |
+
"It’s okay, could be improved.",neutral
|
| 82 |
+
This is the worst service ever.,negative
|
| 83 |
+
The staff were rude and unhelpful.,negative
|
| 84 |
+
"The app keeps crashing, very frustrating.",negative
|
| 85 |
+
My package arrived late and damaged.,negative
|
| 86 |
+
Customer support didn’t respond at all.,negative
|
| 87 |
+
"Horrible experience, will never come back.",negative
|
| 88 |
+
This company doesn’t care about customers.,negative
|
| 89 |
+
Terrible product quality.,negative
|
| 90 |
+
The flight was delayed and chaotic.,negative
|
| 91 |
+
Completely disappointed with the outcome.,negative
|
| 92 |
+
I love flying with this airline!,positive
|
| 93 |
+
Amazing service and friendly staff!,positive
|
| 94 |
+
Super smooth check-in process!,positive
|
| 95 |
+
"The new update works perfectly, great job!",positive
|
| 96 |
+
Had an excellent experience today!,positive
|
| 97 |
+
Fast delivery and awesome packaging!,positive
|
| 98 |
+
I’m so happy with my new phone!,positive
|
| 99 |
+
Totally loved the new design!,positive
|
| 100 |
+
Everything was handled professionally.,positive
|
| 101 |
+
This app is getting better every update!,positive
|
| 102 |
+
"The service was okay, nothing special.",neutral
|
| 103 |
+
"I used the app today, it was fine.",neutral
|
| 104 |
+
The product arrived on time.,neutral
|
| 105 |
+
It works as expected.,neutral
|
| 106 |
+
"No complaints, just a normal experience.",neutral
|
| 107 |
+
The new feature is alright.,neutral
|
| 108 |
+
It’s an average performance overall.,neutral
|
| 109 |
+
"Neither good nor bad, just fine.",neutral
|
| 110 |
+
"Nothing remarkable, just standard.",neutral
|
| 111 |
+
"It’s okay, could be improved.",neutral
|
| 112 |
+
This is the worst service ever.,negative
|
| 113 |
+
The staff were rude and unhelpful.,negative
|
| 114 |
+
"The app keeps crashing, very frustrating.",negative
|
| 115 |
+
My package arrived late and damaged.,negative
|
| 116 |
+
Customer support didn’t respond at all.,negative
|
| 117 |
+
"Horrible experience, will never come back.",negative
|
| 118 |
+
This company doesn’t care about customers.,negative
|
| 119 |
+
Terrible product quality.,negative
|
| 120 |
+
The flight was delayed and chaotic.,negative
|
| 121 |
+
Completely disappointed with the outcome.,negative
|
| 122 |
+
I love flying with this airline!,positive
|
| 123 |
+
Amazing service and friendly staff!,positive
|
| 124 |
+
Super smooth check-in process!,positive
|
| 125 |
+
"The new update works perfectly, great job!",positive
|
| 126 |
+
Had an excellent experience today!,positive
|
| 127 |
+
Fast delivery and awesome packaging!,positive
|
| 128 |
+
I’m so happy with my new phone!,positive
|
| 129 |
+
Totally loved the new design!,positive
|
| 130 |
+
Everything was handled professionally.,positive
|
| 131 |
+
This app is getting better every update!,positive
|
| 132 |
+
"The service was okay, nothing special.",neutral
|
| 133 |
+
"I used the app today, it was fine.",neutral
|
| 134 |
+
The product arrived on time.,neutral
|
| 135 |
+
It works as expected.,neutral
|
| 136 |
+
"No complaints, just a normal experience.",neutral
|
| 137 |
+
The new feature is alright.,neutral
|
| 138 |
+
It’s an average performance overall.,neutral
|
| 139 |
+
"Neither good nor bad, just fine.",neutral
|
| 140 |
+
"Nothing remarkable, just standard.",neutral
|
| 141 |
+
"It’s okay, could be improved.",neutral
|
| 142 |
+
This is the worst service ever.,negative
|
| 143 |
+
The staff were rude and unhelpful.,negative
|
| 144 |
+
"The app keeps crashing, very frustrating.",negative
|
| 145 |
+
My package arrived late and damaged.,negative
|
| 146 |
+
Customer support didn’t respond at all.,negative
|
| 147 |
+
"Horrible experience, will never come back.",negative
|
| 148 |
+
This company doesn’t care about customers.,negative
|
| 149 |
+
Terrible product quality.,negative
|
| 150 |
+
The flight was delayed and chaotic.,negative
|
| 151 |
+
Completely disappointed with the outcome.,negative
|
scripts/create_dataset.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd, os
|
| 2 |
+
|
| 3 |
+
os.makedirs("data", exist_ok=True)
|
| 4 |
+
|
| 5 |
+
data = [
|
| 6 |
+
("I love flying with this airline!", "positive"),
|
| 7 |
+
("Amazing service and friendly staff!", "positive"),
|
| 8 |
+
("Super smooth check-in process!", "positive"),
|
| 9 |
+
("The new update works perfectly, great job!", "positive"),
|
| 10 |
+
("Had an excellent experience today!", "positive"),
|
| 11 |
+
("Fast delivery and awesome packaging!", "positive"),
|
| 12 |
+
("I’m so happy with my new phone!", "positive"),
|
| 13 |
+
("Totally loved the new design!", "positive"),
|
| 14 |
+
("Everything was handled professionally.", "positive"),
|
| 15 |
+
("This app is getting better every update!", "positive"),
|
| 16 |
+
|
| 17 |
+
("The service was okay, nothing special.", "neutral"),
|
| 18 |
+
("I used the app today, it was fine.", "neutral"),
|
| 19 |
+
("The product arrived on time.", "neutral"),
|
| 20 |
+
("It works as expected.", "neutral"),
|
| 21 |
+
("No complaints, just a normal experience.", "neutral"),
|
| 22 |
+
("The new feature is alright.", "neutral"),
|
| 23 |
+
("It’s an average performance overall.", "neutral"),
|
| 24 |
+
("Neither good nor bad, just fine.", "neutral"),
|
| 25 |
+
("Nothing remarkable, just standard.", "neutral"),
|
| 26 |
+
("It’s okay, could be improved.", "neutral"),
|
| 27 |
+
|
| 28 |
+
("This is the worst service ever.", "negative"),
|
| 29 |
+
("The staff were rude and unhelpful.", "negative"),
|
| 30 |
+
("The app keeps crashing, very frustrating.", "negative"),
|
| 31 |
+
("My package arrived late and damaged.", "negative"),
|
| 32 |
+
("Customer support didn’t respond at all.", "negative"),
|
| 33 |
+
("Horrible experience, will never come back.", "negative"),
|
| 34 |
+
("This company doesn’t care about customers.", "negative"),
|
| 35 |
+
("Terrible product quality.", "negative"),
|
| 36 |
+
("The flight was delayed and chaotic.", "negative"),
|
| 37 |
+
("Completely disappointed with the outcome.", "negative"),
|
| 38 |
+
] * 5 # repeat to reach 150 rows
|
| 39 |
+
|
| 40 |
+
df = pd.DataFrame(data, columns=["text", "sentiment"])
|
| 41 |
+
df.to_csv("data/twitter.csv", index=False)
|
| 42 |
+
print(f"✅ New dataset created with {len(df)} samples at data/twitter.csv")
|
scripts/create_dataset.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd, os
|
| 2 |
+
|
| 3 |
+
os.makedirs("data", exist_ok=True)
|
| 4 |
+
|
| 5 |
+
data = [
|
| 6 |
+
("I love flying with this airline!", "positive"),
|
| 7 |
+
("Amazing service and friendly staff!", "positive"),
|
| 8 |
+
("Super smooth check-in process!", "positive"),
|
| 9 |
+
("The new update works perfectly, great job!", "positive"),
|
| 10 |
+
("Had an excellent experience today!", "positive"),
|
| 11 |
+
("Fast delivery and awesome packaging!", "positive"),
|
| 12 |
+
("I’m so happy with my new phone!", "positive"),
|
| 13 |
+
("Totally loved the new design!", "positive"),
|
| 14 |
+
("Everything was handled professionally.", "positive"),
|
| 15 |
+
("This app is getting better every update!", "positive"),
|
| 16 |
+
|
| 17 |
+
("The service was okay, nothing special.", "neutral"),
|
| 18 |
+
("I used the app today, it was fine.", "neutral"),
|
| 19 |
+
("The product arrived on time.", "neutral"),
|
| 20 |
+
("It works as expected.", "neutral"),
|
| 21 |
+
("No complaints, just a normal experience.", "neutral"),
|
| 22 |
+
("The new feature is alright.", "neutral"),
|
| 23 |
+
("It’s an average performance overall.", "neutral"),
|
| 24 |
+
("Neither good nor bad, just fine.", "neutral"),
|
| 25 |
+
("Nothing remarkable, just standard.", "neutral"),
|
| 26 |
+
("It’s okay, could be improved.", "neutral"),
|
| 27 |
+
|
| 28 |
+
("This is the worst service ever.", "negative"),
|
| 29 |
+
("The staff were rude and unhelpful.", "negative"),
|
| 30 |
+
("The app keeps crashing, very frustrating.", "negative"),
|
| 31 |
+
("My package arrived late and damaged.", "negative"),
|
| 32 |
+
("Customer support didn’t respond at all.", "negative"),
|
| 33 |
+
("Horrible experience, will never come back.", "negative"),
|
| 34 |
+
("This company doesn’t care about customers.", "negative"),
|
| 35 |
+
("Terrible product quality.", "negative"),
|
| 36 |
+
("The flight was delayed and chaotic.", "negative"),
|
| 37 |
+
("Completely disappointed with the outcome.", "negative"),
|
| 38 |
+
] * 5 # repeat to reach 150 rows
|
| 39 |
+
|
| 40 |
+
df = pd.DataFrame(data, columns=["text", "sentiment"])
|
| 41 |
+
df.to_csv("data/twitter.csv", index=False)
|
| 42 |
+
print(f"✅ New dataset created with {len(df)} samples at data/twitter.csv")
|
scripts/train.py
CHANGED
|
@@ -1,50 +1,40 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
import argparse
|
| 6 |
import json
|
| 7 |
-
import sys
|
| 8 |
from pathlib import Path
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
print(f"Artifacts saved to {artifact_path}")
|
| 46 |
-
print(f"Metrics saved to {metrics_path}")
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
if __name__ == "__main__":
|
| 50 |
-
main()
|
|
|
|
| 1 |
+
"""Configuration loader that supports both YAML and JSON formats."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
|
|
|
|
|
|
| 4 |
import json
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
+
from typing import Any, Dict
|
| 7 |
+
import yaml
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Config(dict):
|
| 11 |
+
"""Wrapper class to access config like attributes."""
|
| 12 |
+
def __getattr__(self, item):
|
| 13 |
+
return self.get(item)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def load_config(path: str | Path = "config/settings.yaml") -> Config:
|
| 17 |
+
"""
|
| 18 |
+
Load configuration from YAML or JSON.
|
| 19 |
+
Automatically detects file type based on extension.
|
| 20 |
+
"""
|
| 21 |
+
path = Path(path)
|
| 22 |
+
if not path.exists():
|
| 23 |
+
raise FileNotFoundError(f"Config file not found: {path}")
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
if path.suffix.lower() in [".yaml", ".yml"]:
|
| 27 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 28 |
+
data = yaml.safe_load(f)
|
| 29 |
+
elif path.suffix.lower() == ".json":
|
| 30 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 31 |
+
data = json.load(f)
|
| 32 |
+
else:
|
| 33 |
+
raise ValueError(f"Unsupported config format: {path.suffix}")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
raise ValueError(f"Failed to parse config file {path}: {e}")
|
| 36 |
+
|
| 37 |
+
if not isinstance(data, dict):
|
| 38 |
+
raise ValueError(f"Invalid config structure in {path}")
|
| 39 |
+
|
| 40 |
+
return Config(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|