vishnu-coder commited on
Commit
26c905c
·
1 Parent(s): 0a11807

Final working Streamlit app for deployment

Browse files
app/app.py CHANGED
@@ -1,45 +1,47 @@
1
  from __future__ import annotations
 
 
 
 
 
2
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
3
  st.title("Twitter Sentiment Intelligence")
 
 
4
  try:
5
- # Main app logic
6
- """Streamlit front-end for the Deloitte-ready Twitter Sentiment Intelligence dashboard."""
7
- import json
8
- import sys
9
- from pathlib import Path
10
- from typing import Dict
11
- import pandas as pd
12
  # -------------------------------------------------------------------------
13
- # Path setup to include the local src/ package for imports
14
  # -------------------------------------------------------------------------
15
  ROOT = Path(__file__).resolve().parents[1]
16
  SRC_PATH = ROOT / "src"
17
  if str(SRC_PATH) not in sys.path:
18
  sys.path.insert(0, str(SRC_PATH))
 
19
  from twitter_sentiment.config import load_config
20
  from twitter_sentiment.predictor import load_artifacts, predict_with_threshold
 
21
  # -------------------------------------------------------------------------
22
- # Streamlit App Configuration
23
- # -------------------------------------------------------------------------
24
- st.set_page_config(
25
- page_title="Twitter Sentiment Intelligence",
26
- page_icon="💼",
27
- layout="wide",
28
- )
29
- # -------------------------------------------------------------------------
30
- # Cached resource loading (config, pipeline, metrics)
31
- # NOTE: artifacts/sentiment_pipeline.joblib is referenced relatively
32
- # If artifacts directory or file does not exist, run: python scripts/train.py
33
  # -------------------------------------------------------------------------
34
  @st.cache_resource(show_spinner=False)
35
  def _load_dependencies():
36
  """Load configuration, trained pipeline, and metrics from artifacts."""
37
  config = load_config()
38
- # The load_artifacts function references 'artifacts/sentiment_pipeline.joblib' relatively
39
  pipeline, metrics = load_artifacts(config)
40
  return config, pipeline, metrics
 
41
  # -------------------------------------------------------------------------
42
- # Helper function to format prediction probabilities
43
  # -------------------------------------------------------------------------
44
  def format_probabilities(probabilities: Dict[str, float]) -> pd.DataFrame:
45
  """Convert prediction probabilities to a styled DataFrame for display."""
@@ -48,15 +50,69 @@ try:
48
  .sort_values("confidence", ascending=False)
49
  .style.format({"confidence": "{:.2%}"})
50
  )
 
51
  # -------------------------------------------------------------------------
52
  # Main Streamlit Application
53
  # -------------------------------------------------------------------------
54
  def main() -> None:
55
  """Render the Deloitte-ready Twitter Sentiment Intelligence Dashboard."""
56
  config, pipeline, metrics = _load_dependencies()
57
- # Main application logic continues here
58
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  if __name__ == "__main__":
60
  main()
 
61
  except Exception as e:
62
  st.error(f"Startup failed: {e}")
 
1
  from __future__ import annotations
2
+ import json
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Dict
6
+ import pandas as pd
7
  import streamlit as st
8
+
9
+ # -------------------------------------------------------------------------
10
+ # Page Configuration
11
+ # -------------------------------------------------------------------------
12
+ st.set_page_config(
13
+ page_title="Twitter Sentiment Intelligence",
14
+ page_icon="💼",
15
+ layout="wide",
16
+ )
17
+
18
  st.title("Twitter Sentiment Intelligence")
19
+ st.caption("Streamlit front-end for the Deloitte-ready Twitter Sentiment Intelligence dashboard.")
20
+
21
  try:
 
 
 
 
 
 
 
22
  # -------------------------------------------------------------------------
23
+ # Path setup
24
  # -------------------------------------------------------------------------
25
  ROOT = Path(__file__).resolve().parents[1]
26
  SRC_PATH = ROOT / "src"
27
  if str(SRC_PATH) not in sys.path:
28
  sys.path.insert(0, str(SRC_PATH))
29
+
30
  from twitter_sentiment.config import load_config
31
  from twitter_sentiment.predictor import load_artifacts, predict_with_threshold
32
+
33
  # -------------------------------------------------------------------------
34
+ # Cached dependencies
 
 
 
 
 
 
 
 
 
 
35
  # -------------------------------------------------------------------------
36
  @st.cache_resource(show_spinner=False)
37
  def _load_dependencies():
38
  """Load configuration, trained pipeline, and metrics from artifacts."""
39
  config = load_config()
 
40
  pipeline, metrics = load_artifacts(config)
41
  return config, pipeline, metrics
42
+
43
  # -------------------------------------------------------------------------
44
+ # Format probabilities helper
45
  # -------------------------------------------------------------------------
46
  def format_probabilities(probabilities: Dict[str, float]) -> pd.DataFrame:
47
  """Convert prediction probabilities to a styled DataFrame for display."""
 
50
  .sort_values("confidence", ascending=False)
51
  .style.format({"confidence": "{:.2%}"})
52
  )
53
+
54
  # -------------------------------------------------------------------------
55
  # Main Streamlit Application
56
  # -------------------------------------------------------------------------
57
  def main() -> None:
58
  """Render the Deloitte-ready Twitter Sentiment Intelligence Dashboard."""
59
  config, pipeline, metrics = _load_dependencies()
60
+
61
+ # ---------------------- Sidebar ----------------------
62
+ with st.sidebar:
63
+ st.header("📊 Model Snapshot")
64
+ st.write("**Classes:**", ", ".join(pipeline.classes_))
65
+ if metrics:
66
+ st.metric("Macro F1", f"{metrics.get('f1_macro', 0.0):.2f}")
67
+ st.metric("Accuracy", f"{metrics.get('accuracy', 0.0):.2f}")
68
+ else:
69
+ st.info("Run `python scripts/train.py` to generate metrics.")
70
+ st.download_button(
71
+ label="⬇️ Download Metrics JSON",
72
+ data=json.dumps(metrics or {}, indent=2).encode("utf-8"),
73
+ file_name="metrics.json",
74
+ mime="application/json",
75
+ )
76
+ st.info(
77
+ "🚀 Tip: integrate Oracle Autonomous Database by updating `config/settings.yaml`."
78
+ )
79
+
80
+ # ---------------------- Tabs ----------------------
81
+ tab_predict, tab_metrics = st.tabs(["🔮 Predict", "⚙️ Model Governance"])
82
+
83
+ # ---------------------- Prediction Tab ----------------------
84
+ with tab_predict:
85
+ st.subheader("Real-Time Sentiment Assessment")
86
+ user_input = st.text_area("Enter a tweet or customer comment:", height=150)
87
+ if st.button("Run Analysis", type="primary"):
88
+ if not user_input.strip():
89
+ st.warning("⚠️ Please enter text to analyse.")
90
+ else:
91
+ label, probabilities = predict_with_threshold(user_input, config)
92
+ st.success(f"Predicted Sentiment: **{label.title()}**")
93
+ st.dataframe(format_probabilities(probabilities), use_container_width=True)
94
+
95
+ # ---------------------- Metrics Tab ----------------------
96
+ with tab_metrics:
97
+ st.subheader("Operational Metrics")
98
+ if metrics:
99
+ metrics_df = (
100
+ pd.DataFrame(metrics, index=["score"])
101
+ .T.rename(columns={"score": "value"})
102
+ )
103
+ st.dataframe(metrics_df, use_container_width=True)
104
+ else:
105
+ st.info("Metrics will appear after the first training run (see `scripts/train.py`).")
106
+
107
+ # ---------------------- Footer ----------------------
108
+ st.markdown("---")
109
+ st.caption("© 2025 Deloitte-aligned Sentiment Analytics Accelerator")
110
+
111
+ # -------------------------------------------------------------------------
112
+ # Entry Point
113
+ # -------------------------------------------------------------------------
114
  if __name__ == "__main__":
115
  main()
116
+
117
  except Exception as e:
118
  st.error(f"Startup failed: {e}")
artifacts/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.0,
3
+ "f1_macro": 0.0,
4
+ "precision_macro": 0.0,
5
+ "recall_macro": 0.0,
6
+ "cv_mean": 0.16666666666666666,
7
+ "cv_std": 0.0,
8
+ "negative_f1": 0.0,
9
+ "neutral_f1": 0.0,
10
+ "positive_f1": 0.0
11
+ }
artifacts/sentiment_pipeline.joblib ADDED
Binary file (3.16 kB). View file
 
config/settings.yaml CHANGED
@@ -1,59 +1,19 @@
1
- {
2
- "project": {
3
- "name": "Twitter Sentiment Analysis",
4
- "description": "End-to-end sentiment analysis workflow aligned with Deloitte India Oracle Analyst capabilities.",
5
- "author": "Your Name"
6
- },
7
- "live_app": {
8
- "streamlit_url": "https://<your-app>.streamlit.app",
9
- "vercel_redirect_domain": "https://<your-project>.vercel.app"
10
- },
11
- "data": {
12
- "path": "data/twitter.csv",
13
- "text_column": "text",
14
- "target_column": "sentiment",
15
- "class_order": ["negative", "neutral", "positive"]
16
- },
17
- "preprocessing": {
18
- "lowercase": true,
19
- "strip_urls": true,
20
- "strip_mentions": true,
21
- "strip_hashtags": false,
22
- "remove_punctuation": true,
23
- "normalize_whitespace": true,
24
- "stemming": false,
25
- "lemmatize": false
26
- },
27
- "model": {
28
- "artifact_dir": "artifacts",
29
- "pipeline_filename": "sentiment_pipeline.joblib",
30
- "vectorizer_filename": "tfidf_vectorizer.joblib",
31
- "model_filename": "logistic_model.joblib",
32
- "test_size": 0.2,
33
- "random_state": 42,
34
- "max_features": 5000,
35
- "ngram_range": [1, 2],
36
- "class_weight": "balanced"
37
- },
38
- "training": {
39
- "cv_folds": 5,
40
- "scoring": "f1_macro",
41
- "n_jobs": -1,
42
- "verbose": 1,
43
- "probability_thresholds": {
44
- "positive": 0.55,
45
- "negative": 0.45
46
- }
47
- },
48
- "monitoring": {
49
- "enable_model_drift_checks": true,
50
- "psi_threshold": 0.2
51
- },
52
- "oracle_integration": {
53
- "enabled": false,
54
- "wallet_location": "~/.oci/wallet",
55
- "user": "",
56
- "dsn": "",
57
- "sql_query": "SELECT text, sentiment FROM twitter_training_data"
58
- }
59
- }
 
1
+ data:
2
+ train_path: data/twitter.csv
3
+ text_column: text
4
+ target_column: sentiment
5
+ class_order: [negative, neutral, positive]
6
+
7
+ model:
8
+ artifact_dir: artifacts
9
+ pipeline_filename: sentiment_pipeline.joblib
10
+ test_size: 0.2
11
+ random_state: 42
12
+ max_features: 5000
13
+ ngram_range: [1, 2]
14
+ class_weight: balanced
15
+
16
+ training:
17
+ cv_folds: 5
18
+ scoring: f1_macro
19
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datatraining_data.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,sentiment
2
+ I love this product,positive
3
+ This is the worst experience ever,negative
4
+ The service was okay,neutral
5
+ Weather is so nice today,positive
6
+ I hate delays,negative
7
+ The meeting went fine,neutral
8
+ Excellent work team!,positive
9
+ I'm feeling bad,negative
10
+ Just a normal day,neutral
11
+ Great job everyone!,positive
data/twitter.csv CHANGED
@@ -1,7 +1,151 @@
1
  text,sentiment
2
- "I love flying with this airline!",positive
3
- "This is the worst service ever",negative
4
- "I had a neutral experience",neutral
5
- "The flight was delayed and staff were rude",negative
6
- "Amazing in-flight entertainment and service",positive
7
- "Nothing special, just an average flight",neutral
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  text,sentiment
2
+ I love flying with this airline!,positive
3
+ Amazing service and friendly staff!,positive
4
+ Super smooth check-in process!,positive
5
+ "The new update works perfectly, great job!",positive
6
+ Had an excellent experience today!,positive
7
+ Fast delivery and awesome packaging!,positive
8
+ I’m so happy with my new phone!,positive
9
+ Totally loved the new design!,positive
10
+ Everything was handled professionally.,positive
11
+ This app is getting better every update!,positive
12
+ "The service was okay, nothing special.",neutral
13
+ "I used the app today, it was fine.",neutral
14
+ The product arrived on time.,neutral
15
+ It works as expected.,neutral
16
+ "No complaints, just a normal experience.",neutral
17
+ The new feature is alright.,neutral
18
+ It’s an average performance overall.,neutral
19
+ "Neither good nor bad, just fine.",neutral
20
+ "Nothing remarkable, just standard.",neutral
21
+ "It’s okay, could be improved.",neutral
22
+ This is the worst service ever.,negative
23
+ The staff were rude and unhelpful.,negative
24
+ "The app keeps crashing, very frustrating.",negative
25
+ My package arrived late and damaged.,negative
26
+ Customer support didn’t respond at all.,negative
27
+ "Horrible experience, will never come back.",negative
28
+ This company doesn’t care about customers.,negative
29
+ Terrible product quality.,negative
30
+ The flight was delayed and chaotic.,negative
31
+ Completely disappointed with the outcome.,negative
32
+ I love flying with this airline!,positive
33
+ Amazing service and friendly staff!,positive
34
+ Super smooth check-in process!,positive
35
+ "The new update works perfectly, great job!",positive
36
+ Had an excellent experience today!,positive
37
+ Fast delivery and awesome packaging!,positive
38
+ I’m so happy with my new phone!,positive
39
+ Totally loved the new design!,positive
40
+ Everything was handled professionally.,positive
41
+ This app is getting better every update!,positive
42
+ "The service was okay, nothing special.",neutral
43
+ "I used the app today, it was fine.",neutral
44
+ The product arrived on time.,neutral
45
+ It works as expected.,neutral
46
+ "No complaints, just a normal experience.",neutral
47
+ The new feature is alright.,neutral
48
+ It’s an average performance overall.,neutral
49
+ "Neither good nor bad, just fine.",neutral
50
+ "Nothing remarkable, just standard.",neutral
51
+ "It’s okay, could be improved.",neutral
52
+ This is the worst service ever.,negative
53
+ The staff were rude and unhelpful.,negative
54
+ "The app keeps crashing, very frustrating.",negative
55
+ My package arrived late and damaged.,negative
56
+ Customer support didn’t respond at all.,negative
57
+ "Horrible experience, will never come back.",negative
58
+ This company doesn’t care about customers.,negative
59
+ Terrible product quality.,negative
60
+ The flight was delayed and chaotic.,negative
61
+ Completely disappointed with the outcome.,negative
62
+ I love flying with this airline!,positive
63
+ Amazing service and friendly staff!,positive
64
+ Super smooth check-in process!,positive
65
+ "The new update works perfectly, great job!",positive
66
+ Had an excellent experience today!,positive
67
+ Fast delivery and awesome packaging!,positive
68
+ I’m so happy with my new phone!,positive
69
+ Totally loved the new design!,positive
70
+ Everything was handled professionally.,positive
71
+ This app is getting better every update!,positive
72
+ "The service was okay, nothing special.",neutral
73
+ "I used the app today, it was fine.",neutral
74
+ The product arrived on time.,neutral
75
+ It works as expected.,neutral
76
+ "No complaints, just a normal experience.",neutral
77
+ The new feature is alright.,neutral
78
+ It’s an average performance overall.,neutral
79
+ "Neither good nor bad, just fine.",neutral
80
+ "Nothing remarkable, just standard.",neutral
81
+ "It’s okay, could be improved.",neutral
82
+ This is the worst service ever.,negative
83
+ The staff were rude and unhelpful.,negative
84
+ "The app keeps crashing, very frustrating.",negative
85
+ My package arrived late and damaged.,negative
86
+ Customer support didn’t respond at all.,negative
87
+ "Horrible experience, will never come back.",negative
88
+ This company doesn’t care about customers.,negative
89
+ Terrible product quality.,negative
90
+ The flight was delayed and chaotic.,negative
91
+ Completely disappointed with the outcome.,negative
92
+ I love flying with this airline!,positive
93
+ Amazing service and friendly staff!,positive
94
+ Super smooth check-in process!,positive
95
+ "The new update works perfectly, great job!",positive
96
+ Had an excellent experience today!,positive
97
+ Fast delivery and awesome packaging!,positive
98
+ I’m so happy with my new phone!,positive
99
+ Totally loved the new design!,positive
100
+ Everything was handled professionally.,positive
101
+ This app is getting better every update!,positive
102
+ "The service was okay, nothing special.",neutral
103
+ "I used the app today, it was fine.",neutral
104
+ The product arrived on time.,neutral
105
+ It works as expected.,neutral
106
+ "No complaints, just a normal experience.",neutral
107
+ The new feature is alright.,neutral
108
+ It’s an average performance overall.,neutral
109
+ "Neither good nor bad, just fine.",neutral
110
+ "Nothing remarkable, just standard.",neutral
111
+ "It’s okay, could be improved.",neutral
112
+ This is the worst service ever.,negative
113
+ The staff were rude and unhelpful.,negative
114
+ "The app keeps crashing, very frustrating.",negative
115
+ My package arrived late and damaged.,negative
116
+ Customer support didn’t respond at all.,negative
117
+ "Horrible experience, will never come back.",negative
118
+ This company doesn’t care about customers.,negative
119
+ Terrible product quality.,negative
120
+ The flight was delayed and chaotic.,negative
121
+ Completely disappointed with the outcome.,negative
122
+ I love flying with this airline!,positive
123
+ Amazing service and friendly staff!,positive
124
+ Super smooth check-in process!,positive
125
+ "The new update works perfectly, great job!",positive
126
+ Had an excellent experience today!,positive
127
+ Fast delivery and awesome packaging!,positive
128
+ I’m so happy with my new phone!,positive
129
+ Totally loved the new design!,positive
130
+ Everything was handled professionally.,positive
131
+ This app is getting better every update!,positive
132
+ "The service was okay, nothing special.",neutral
133
+ "I used the app today, it was fine.",neutral
134
+ The product arrived on time.,neutral
135
+ It works as expected.,neutral
136
+ "No complaints, just a normal experience.",neutral
137
+ The new feature is alright.,neutral
138
+ It’s an average performance overall.,neutral
139
+ "Neither good nor bad, just fine.",neutral
140
+ "Nothing remarkable, just standard.",neutral
141
+ "It’s okay, could be improved.",neutral
142
+ This is the worst service ever.,negative
143
+ The staff were rude and unhelpful.,negative
144
+ "The app keeps crashing, very frustrating.",negative
145
+ My package arrived late and damaged.,negative
146
+ Customer support didn’t respond at all.,negative
147
+ "Horrible experience, will never come back.",negative
148
+ This company doesn’t care about customers.,negative
149
+ Terrible product quality.,negative
150
+ The flight was delayed and chaotic.,negative
151
+ Completely disappointed with the outcome.,negative
scripts/create_dataset.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd, os
2
+
3
+ os.makedirs("data", exist_ok=True)
4
+
5
+ data = [
6
+ ("I love flying with this airline!", "positive"),
7
+ ("Amazing service and friendly staff!", "positive"),
8
+ ("Super smooth check-in process!", "positive"),
9
+ ("The new update works perfectly, great job!", "positive"),
10
+ ("Had an excellent experience today!", "positive"),
11
+ ("Fast delivery and awesome packaging!", "positive"),
12
+ ("I’m so happy with my new phone!", "positive"),
13
+ ("Totally loved the new design!", "positive"),
14
+ ("Everything was handled professionally.", "positive"),
15
+ ("This app is getting better every update!", "positive"),
16
+
17
+ ("The service was okay, nothing special.", "neutral"),
18
+ ("I used the app today, it was fine.", "neutral"),
19
+ ("The product arrived on time.", "neutral"),
20
+ ("It works as expected.", "neutral"),
21
+ ("No complaints, just a normal experience.", "neutral"),
22
+ ("The new feature is alright.", "neutral"),
23
+ ("It’s an average performance overall.", "neutral"),
24
+ ("Neither good nor bad, just fine.", "neutral"),
25
+ ("Nothing remarkable, just standard.", "neutral"),
26
+ ("It’s okay, could be improved.", "neutral"),
27
+
28
+ ("This is the worst service ever.", "negative"),
29
+ ("The staff were rude and unhelpful.", "negative"),
30
+ ("The app keeps crashing, very frustrating.", "negative"),
31
+ ("My package arrived late and damaged.", "negative"),
32
+ ("Customer support didn’t respond at all.", "negative"),
33
+ ("Horrible experience, will never come back.", "negative"),
34
+ ("This company doesn’t care about customers.", "negative"),
35
+ ("Terrible product quality.", "negative"),
36
+ ("The flight was delayed and chaotic.", "negative"),
37
+ ("Completely disappointed with the outcome.", "negative"),
38
+ ] * 5 # repeat to reach 150 rows
39
+
40
+ df = pd.DataFrame(data, columns=["text", "sentiment"])
41
+ df.to_csv("data/twitter.csv", index=False)
42
+ print(f"✅ New dataset created with {len(df)} samples at data/twitter.csv")
scripts/create_dataset.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd, os
2
+
3
+ os.makedirs("data", exist_ok=True)
4
+
5
+ data = [
6
+ ("I love flying with this airline!", "positive"),
7
+ ("Amazing service and friendly staff!", "positive"),
8
+ ("Super smooth check-in process!", "positive"),
9
+ ("The new update works perfectly, great job!", "positive"),
10
+ ("Had an excellent experience today!", "positive"),
11
+ ("Fast delivery and awesome packaging!", "positive"),
12
+ ("I’m so happy with my new phone!", "positive"),
13
+ ("Totally loved the new design!", "positive"),
14
+ ("Everything was handled professionally.", "positive"),
15
+ ("This app is getting better every update!", "positive"),
16
+
17
+ ("The service was okay, nothing special.", "neutral"),
18
+ ("I used the app today, it was fine.", "neutral"),
19
+ ("The product arrived on time.", "neutral"),
20
+ ("It works as expected.", "neutral"),
21
+ ("No complaints, just a normal experience.", "neutral"),
22
+ ("The new feature is alright.", "neutral"),
23
+ ("It’s an average performance overall.", "neutral"),
24
+ ("Neither good nor bad, just fine.", "neutral"),
25
+ ("Nothing remarkable, just standard.", "neutral"),
26
+ ("It’s okay, could be improved.", "neutral"),
27
+
28
+ ("This is the worst service ever.", "negative"),
29
+ ("The staff were rude and unhelpful.", "negative"),
30
+ ("The app keeps crashing, very frustrating.", "negative"),
31
+ ("My package arrived late and damaged.", "negative"),
32
+ ("Customer support didn’t respond at all.", "negative"),
33
+ ("Horrible experience, will never come back.", "negative"),
34
+ ("This company doesn’t care about customers.", "negative"),
35
+ ("Terrible product quality.", "negative"),
36
+ ("The flight was delayed and chaotic.", "negative"),
37
+ ("Completely disappointed with the outcome.", "negative"),
38
+ ] * 5 # repeat to reach 150 rows
39
+
40
+ df = pd.DataFrame(data, columns=["text", "sentiment"])
41
+ df.to_csv("data/twitter.csv", index=False)
42
+ print(f"✅ New dataset created with {len(df)} samples at data/twitter.csv")
scripts/train.py CHANGED
@@ -1,50 +1,40 @@
1
- """Command line utility for training the sentiment analysis pipeline."""
2
 
3
  from __future__ import annotations
4
-
5
- import argparse
6
  import json
7
- import sys
8
  from pathlib import Path
9
-
10
- ROOT = Path(__file__).resolve().parents[1]
11
- SRC = ROOT / "src"
12
- if str(SRC) not in sys.path:
13
- sys.path.insert(0, str(SRC))
14
-
15
- from twitter_sentiment.config import load_config
16
- from twitter_sentiment.modeling import persist_artifacts, train_and_evaluate
17
-
18
-
19
- def parse_args() -> argparse.Namespace:
20
- parser = argparse.ArgumentParser(description="Train the Deloitte-aligned Twitter sentiment model")
21
- parser.add_argument(
22
- "--config",
23
- default="config/settings.yaml",
24
- help="Path to the YAML configuration file",
25
- )
26
- parser.add_argument(
27
- "--metrics",
28
- default="artifacts/metrics.json",
29
- help="Optional path to persist evaluation metrics as JSON",
30
- )
31
- return parser.parse_args()
32
-
33
-
34
- def main() -> None:
35
- args = parse_args()
36
- config = load_config(args.config)
37
-
38
- pipeline, metrics_summary = train_and_evaluate(config)
39
- artifact_path = persist_artifacts(pipeline, config, metrics_summary)
40
-
41
- metrics_path = Path(args.metrics)
42
- metrics_path.parent.mkdir(parents=True, exist_ok=True)
43
- metrics_path.write_text(json.dumps(metrics_summary, indent=2), encoding="utf-8")
44
-
45
- print(f"Artifacts saved to {artifact_path}")
46
- print(f"Metrics saved to {metrics_path}")
47
-
48
-
49
- if __name__ == "__main__":
50
- main()
 
1
+ """Configuration loader that supports both YAML and JSON formats."""
2
 
3
  from __future__ import annotations
 
 
4
  import json
 
5
  from pathlib import Path
6
+ from typing import Any, Dict
7
+ import yaml
8
+
9
+
10
+ class Config(dict):
11
+ """Wrapper class to access config like attributes."""
12
+ def __getattr__(self, item):
13
+ return self.get(item)
14
+
15
+
16
+ def load_config(path: str | Path = "config/settings.yaml") -> Config:
17
+ """
18
+ Load configuration from YAML or JSON.
19
+ Automatically detects file type based on extension.
20
+ """
21
+ path = Path(path)
22
+ if not path.exists():
23
+ raise FileNotFoundError(f"Config file not found: {path}")
24
+
25
+ try:
26
+ if path.suffix.lower() in [".yaml", ".yml"]:
27
+ with open(path, "r", encoding="utf-8") as f:
28
+ data = yaml.safe_load(f)
29
+ elif path.suffix.lower() == ".json":
30
+ with open(path, "r", encoding="utf-8") as f:
31
+ data = json.load(f)
32
+ else:
33
+ raise ValueError(f"Unsupported config format: {path.suffix}")
34
+ except Exception as e:
35
+ raise ValueError(f"Failed to parse config file {path}: {e}")
36
+
37
+ if not isinstance(data, dict):
38
+ raise ValueError(f"Invalid config structure in {path}")
39
+
40
+ return Config(data)