Yousif22 commited on
Commit
fe63ba2
·
verified ·
1 Parent(s): ead70ba

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,39 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ **/*.h5 filter=lfs diff=lfs merge=lfs -text
37
+ *.preproc filter=lfs diff=lfs merge=lfs -text
38
+ Models/tf_model/* filter=lfs diff=lfs merge=lfs -text
39
+ Models/Naive_Bayes_model filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
.gradio/flagged/dataset1.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ text,output,timestamp
2
+ Testing store data,📊 Sentiment: **Neutral**,2025-05-30 17:21:18.740457
3
+ Apple reports record-breaking profits in Q4,📊 Sentiment: **Negative**,2025-05-30 17:23:05.719872
4
+ Markets crash amid global economic fears,📊 Sentiment: **Negative**,2025-05-30 17:24:25.995378
Models/Naive_Bayes_model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2cae70244cc7ae96eb54a427d3adf1eb007be3c3dec1af65f3d3f0e40bd1316
3
+ size 3098902
README.md CHANGED
@@ -1,12 +1,41 @@
1
  ---
2
- title: Financial Sentiment Analyzer
3
- emoji: 🐠
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.33.1
8
  app_file: app.py
9
- pinned: false
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: financial-sentiment-analyzer
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.31.0
6
+ ---
7
+ # 🧠 Financial News Sentiment Analyzer
8
+
9
+ This is a modern AI-powered application that analyzes the sentiment of financial news headlines or articles using either:
10
+ - ✅ Naive Bayes (fast & lightweight)
11
+ - 🤖 BERT transformer model (powerful & context-aware)
12
+
13
+ Built with **Gradio**, **Hugging Face Transformers**, and deployable on **Hugging Face Spaces**.
14
+
15
  ---
16
 
17
+ ## 🚀 Features
18
+
19
+ - 🔍 Text sentiment classification (positive / neutral / negative)
20
+ - 📈 Live usage statistics (locally or via Google Sheets)
21
+ - 🧪 Preloaded examples
22
+ - 🌙 Responsive UI with light/dark mode support
23
+
24
+ ---
25
+
26
+ ## 🧰 Tech Stack
27
+
28
+ - `Gradio`
29
+ - `Transformers` + `TensorFlow`
30
+ - `Joblib` for Naive Bayes model
31
+ - `Google Sheets` for logging (via Apps Script endpoint)
32
+
33
+ ---
34
+
35
+ ## 🛠 How to Run Locally
36
+
37
+ ```bash
38
+ git clone https://github.com/yourusername/financial-sentiment-analyzer.git
39
+ cd financial-sentiment-analyzer
40
+ pip install -r requirements.txt
41
+ python app.py
__pycache__/bert_model_handler.cpython-310.pyc ADDED
Binary file (7.3 kB). View file
 
__pycache__/config.cpython-310.pyc ADDED
Binary file (989 Bytes). View file
 
__pycache__/css.cpython-310.pyc ADDED
Binary file (9.9 kB). View file
 
__pycache__/examples.cpython-310.pyc ADDED
Binary file (557 Bytes). View file
 
__pycache__/interface.cpython-310.pyc ADDED
Binary file (2.87 kB). View file
 
__pycache__/interface.cpython-311.pyc ADDED
Binary file (3.45 kB). View file
 
__pycache__/models.cpython-310.pyc ADDED
Binary file (2.19 kB). View file
 
__pycache__/prediction.cpython-310.pyc ADDED
Binary file (3.7 kB). View file
 
__pycache__/stats.cpython-310.pyc ADDED
Binary file (1.49 kB). View file
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (4.35 kB). View file
 
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from interface import create_interface
2
+
3
+ if __name__ == "__main__":
4
+ interface, model_manager = create_interface()
5
+
6
+ if not model_manager.models_available:
7
+ print("⚠️ No models were loaded successfully. Please check your configuration.")
8
+
9
+ interface.launch(
10
+ share=True,
11
+ show_error=True
12
+ )
config.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Config:
2
+ BERT_MODEL_REPO_ID = "Yousif22/financial-sentiment-analyzerv2"
3
+ NAIVE_BAYES_MODEL_PATH = "Models/Naive_Bayes_model"
4
+ GOOGLE_SHEET_ENDPOINT = "GOOGLE_SHEET_ENDPOINT"
5
+ GOOGLE_SHEET_TOKEN = "GOOGLE_SHEET_TOKEN"
6
+ GOOGLE_SHEET_CSV_URL = "GOOGLE_SHEET_CSV_URL"
7
+
8
+ LABEL_MAP = {
9
+ 0: "negative",
10
+ 1: "neutral",
11
+ 2: "positive"
12
+ }
css.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_custom_css():
2
+ """Modern blue theme with dark/light mode support"""
3
+ return """
4
+ /* Modern Blue Theme with Dark/Light Mode Support */
5
+ :root {
6
+ /* Light mode colors */
7
+ --primary-blue: #2563eb;
8
+ --primary-blue-light: #3b82f6;
9
+ --primary-blue-dark: #1d4ed8;
10
+ --secondary-blue: #60a5fa;
11
+ --accent-blue: #93c5fd;
12
+ --light-blue: #dbeafe;
13
+
14
+ --bg-primary: linear-gradient(135deg, #2563eb 0%, #1e40af 50%, #1d4ed8 100%);
15
+ --bg-secondary: #ffffff;
16
+ --bg-card: #f8fafc;
17
+ --bg-hover: #f1f5f9;
18
+
19
+ --text-primary: #1e293b;
20
+ --text-secondary: #475569;
21
+ --text-muted: #64748b;
22
+ --text-inverse: #ffffff;
23
+
24
+ --border-color: #e2e8f0;
25
+ --border-hover: #cbd5e1;
26
+ --shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 0.05);
27
+ --shadow-md: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
28
+ --shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
29
+ --shadow-xl: 0 20px 25px -5px rgb(0 0 0 / 0.1), 0 8px 10px -6px rgb(0 0 0 / 0.1);
30
+
31
+ --success-color: #22c55e;
32
+ --warning-color: #f59e0b;
33
+ --error-color: #ef4444;
34
+ --info-color: #3b82f6;
35
+ }
36
+
37
+ /* Dark mode */
38
+ @media (prefers-color-scheme: dark) {
39
+ :root {
40
+ --bg-primary: linear-gradient(135deg, #1e3a8a 0%, #1e40af 50%, #1d4ed8 100%);
41
+ --bg-secondary: #0f172a;
42
+ --bg-card: #1e293b;
43
+ --bg-hover: #334155;
44
+
45
+ --text-primary: #f1f5f9;
46
+ --text-secondary: #cbd5e1;
47
+ --text-muted: #94a3b8;
48
+ --text-inverse: #0f172a;
49
+
50
+ --border-color: #334155;
51
+ --border-hover: #475569;
52
+ --shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 0.25);
53
+ --shadow-md: 0 4px 6px -1px rgb(0 0 0 / 0.3), 0 2px 4px -2px rgb(0 0 0 / 0.3);
54
+ --shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 0.3), 0 4px 6px -4px rgb(0 0 0 / 0.3);
55
+ --shadow-xl: 0 20px 25px -5px rgb(0 0 0 / 0.3), 0 8px 10px -6px rgb(0 0 0 / 0.3);
56
+ }
57
+ }
58
+
59
+ /* Manual dark mode toggle */
60
+ .dark-mode {
61
+ --bg-primary: linear-gradient(135deg, #1e3a8a 0%, #1e40af 50%, #1d4ed8 100%);
62
+ --bg-secondary: #0f172a;
63
+ --bg-card: #1e293b;
64
+ --bg-hover: #334155;
65
+
66
+ --text-primary: #f1f5f9;
67
+ --text-secondary: #cbd5e1;
68
+ --text-muted: #94a3b8;
69
+ --text-inverse: #0f172a;
70
+
71
+ --border-color: #334155;
72
+ --border-hover: #475569;
73
+ }
74
+
75
+ /* Global styles */
76
+ .gradio-container {
77
+ background: var(--bg-primary);
78
+ font-family: 'Inter', 'Segoe UI', system-ui, sans-serif;
79
+ color: var(--text-primary);
80
+ min-height: 100vh;
81
+ }
82
+
83
+ /* Header */
84
+ .header {
85
+ text-align: center;
86
+ background: rgba(255, 255, 255, 0.1);
87
+ backdrop-filter: blur(20px);
88
+ border-radius: 24px;
89
+ padding: 3rem 2rem;
90
+ margin: 2rem;
91
+ border: 1px solid rgba(255, 255, 255, 0.2);
92
+ box-shadow: var(--shadow-xl);
93
+ }
94
+
95
+ .header h1 {
96
+ color: var(--text-inverse) !important;
97
+ font-size: 3rem;
98
+ font-weight: 800;
99
+ margin: 0;
100
+ text-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
101
+ letter-spacing: -0.025em;
102
+ }
103
+
104
+ .header p {
105
+ color: rgba(255, 255, 255, 0.9);
106
+ font-size: 1.25rem;
107
+ margin: 1rem 0 0;
108
+ font-weight: 400;
109
+ }
110
+
111
+ /* Input container */
112
+ .input-container {
113
+ background: var(--bg-card);
114
+ border-radius: 20px;
115
+ padding: 2rem;
116
+ margin: 1rem;
117
+ box-shadow: var(--shadow-lg);
118
+ border: 1px solid var(--border-color);
119
+ transition: all 0.3s ease;
120
+ }
121
+
122
+ .input-container:hover {
123
+ box-shadow: var(--shadow-xl);
124
+ border-color: var(--border-hover);
125
+ }
126
+
127
+ /* Sentiment results */
128
+ .sentiment-result {
129
+ text-align: center;
130
+ padding: 2rem;
131
+ border-radius: 20px;
132
+ margin: 1rem 0;
133
+ background: var(--bg-card);
134
+ border: 2px solid var(--border-color);
135
+ box-shadow: var(--shadow-lg);
136
+ transition: all 0.3s ease;
137
+ }
138
+
139
+ .sentiment-result[data-sentiment="positive"] {
140
+ border-color: var(--success-color);
141
+ background: linear-gradient(135deg, rgba(34, 197, 94, 0.1), rgba(34, 197, 94, 0.05));
142
+ }
143
+
144
+ .sentiment-result[data-sentiment="negative"] {
145
+ border-color: var(--error-color);
146
+ background: linear-gradient(135deg, rgba(239, 68, 68, 0.1), rgba(239, 68, 68, 0.05));
147
+ }
148
+
149
+ .sentiment-result[data-sentiment="neutral"] {
150
+ border-color: var(--info-color);
151
+ background: linear-gradient(135deg, rgba(59, 130, 246, 0.1), rgba(59, 130, 246, 0.05));
152
+ }
153
+
154
+ .sentiment-result.error {
155
+ border-color: var(--error-color);
156
+ background: linear-gradient(135deg, rgba(239, 68, 68, 0.1), rgba(239, 68, 68, 0.05));
157
+ color: var(--error-color);
158
+ }
159
+
160
+ .sentiment-result.warning {
161
+ border-color: var(--warning-color);
162
+ background: linear-gradient(135deg, rgba(245, 158, 11, 0.1), rgba(245, 158, 11, 0.05));
163
+ color: var(--warning-color);
164
+ }
165
+
166
+ .result-title {
167
+ margin: 0 0 1rem;
168
+ color: var(--text-primary);
169
+ font-size: 1.75rem;
170
+ font-weight: 700;
171
+ }
172
+
173
+ .sentiment-label {
174
+ font-size: 2rem;
175
+ font-weight: 800;
176
+ margin: 1rem 0;
177
+ color: var(--text-inverse);
178
+ letter-spacing: 0.05em;
179
+ }
180
+
181
+ .model-info, .confidence-info {
182
+ color: var(--text-inverse);
183
+ font-size: 1rem;
184
+ margin: 0.5rem 0;
185
+ font-weight: 500;
186
+ }
187
+
188
+ .confidence-info {
189
+ font-size: 0.875rem;
190
+ }
191
+
192
+ /* Statistics */
193
+ .stats-container {
194
+ background: var(--bg-card);
195
+ padding: 2rem;
196
+ border-radius: 20px;
197
+ color: var(--text-primary);
198
+ text-align: center;
199
+ box-shadow: var(--shadow-lg);
200
+ border: 1px solid var(--border-color);
201
+ }
202
+
203
+ .stats-title {
204
+ color: var(--text-primary);
205
+ margin-bottom: 1.5rem;
206
+ font-size: 1.5rem;
207
+ font-weight: 700;
208
+ }
209
+
210
+ .stats-total {
211
+ font-weight: 600;
212
+ margin: 1rem 0;
213
+ color: var(--text-primary);
214
+ font-size: 1.125rem;
215
+ }
216
+
217
+ .stats-section {
218
+ margin: 1.5rem 0;
219
+ text-align: left;
220
+ }
221
+
222
+ .stats-section p {
223
+ font-weight: 600;
224
+ color: var(--text-primary);
225
+ margin-bottom: 0.75rem;
226
+ }
227
+
228
+ .stats-list {
229
+ list-style: none;
230
+ padding: 0;
231
+ margin: 0;
232
+ }
233
+
234
+ .stats-item {
235
+ padding: 0.5rem 0;
236
+ color: var(--text-secondary);
237
+ font-weight: 500;
238
+ border-bottom: 1px solid var(--border-color);
239
+ }
240
+
241
+ .stats-item:last-child {
242
+ border-bottom: none;
243
+ }
244
+
245
+ .stats-item.stats-positive {
246
+ color: var(--success-color);
247
+ }
248
+
249
+ .stats-item.stats-negative {
250
+ color: var(--error-color);
251
+ }
252
+
253
+ .stats-item.stats-neutral {
254
+ color: var(--info-color);
255
+ }
256
+
257
+ .no-stats, .error-stats {
258
+ text-align: center;
259
+ padding: 2rem;
260
+ color: var(--text-muted);
261
+ background: var(--bg-card);
262
+ border-radius: 16px;
263
+ border: 1px solid var(--border-color);
264
+ }
265
+
266
+ /* Buttons */
267
+ .submit-btn {
268
+ background: linear-gradient(135deg, var(--primary-blue), var(--primary-blue-dark)) !important;
269
+ border: none !important;
270
+ border-radius: 16px !important;
271
+ padding: 1rem 2rem !important;
272
+ color: var(--text-inverse) !important;
273
+ font-weight: 600 !important;
274
+ font-size: 1rem !important;
275
+ cursor: pointer !important;
276
+ transition: all 0.3s ease !important;
277
+ box-shadow: var(--shadow-md) !important;
278
+ }
279
+
280
+ .submit-btn:hover {
281
+ transform: translateY(-2px) !important;
282
+ box-shadow: var(--shadow-lg) !important;
283
+ background: linear-gradient(135deg, var(--primary-blue-light), var(--primary-blue)) !important;
284
+ }
285
+
286
+ /* Footer */
287
+ .footer {
288
+ text-align: center;
289
+ background: var(--block-background-fill);
290
+ color: var(--text-inverse);
291
+ padding: 2rem;
292
+ border-radius: 20px;
293
+ margin: 2rem;
294
+ backdrop-filter: blur(10px);
295
+ }
296
+
297
+ .footer h3 {
298
+ margin: 0 0 1rem;
299
+ font-weight: 700;
300
+ }
301
+
302
+ .social-links {
303
+ margin: 1rem 0;
304
+ }
305
+
306
+ .social-links a {
307
+ color: var(--secondary-blue);
308
+ text-decoration: none;
309
+ margin: 0 1rem;
310
+ font-weight: 600;
311
+ transition: color 0.3s ease;
312
+ }
313
+
314
+ .social-links a:hover {
315
+ color: var(--accent-blue);
316
+ text-decoration: underline;
317
+ }
318
+
319
+ /* Responsive design */
320
+ @media (max-width: 768px) {
321
+ .header {
322
+ margin: 1rem;
323
+ padding: 2rem 1rem;
324
+ }
325
+
326
+ .header h1 {
327
+ font-size: 2rem !important;
328
+ }
329
+
330
+ .header p {
331
+ font-size: 1rem !important;
332
+ }
333
+
334
+ .input-container {
335
+ margin: 0.5rem;
336
+ padding: 1.5rem;
337
+ }
338
+
339
+ .sentiment-result {
340
+ padding: 1.5rem;
341
+ }
342
+
343
+ .stats-container {
344
+ padding: 1.5rem;
345
+ }
346
+ }
347
+
348
+ /* Smooth transitions */
349
+ * {
350
+ transition: background-color 0.3s ease, border-color 0.3s ease, color 0.3s ease;
351
+ }
352
+ """
examples.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ EXAMPLE_DATA = [
2
+ ["Fed raises interest rates amid inflation concerns", "Naive Bayes"],
3
+ ["Apple reports record-breaking quarterly profits", "BERT"],
4
+ ["Global markets crash as recession fears mount", "Naive Bayes"],
5
+ ["Tesla announces breakthrough in battery technology", "BERT"],
6
+ ["Banking sector shows mixed results this quarter", "Naive Bayes"]
7
+ ]
interface.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from models import ModelManager
3
+ from prediction import PredictionEngine
4
+ from stats import StatsManager
5
+ from css import get_custom_css
6
+ from examples import EXAMPLE_DATA
7
+
8
+ def create_interface():
9
+ """Create and configure the Gradio interface"""
10
+ # Initialize components
11
+ model_manager = ModelManager()
12
+ prediction_engine = PredictionEngine(model_manager)
13
+
14
+ # Create interface
15
+ with gr.Blocks(css=get_custom_css(), title="🧠 Financial Sentiment Analyzer", theme=gr.themes.Base()) as interface:
16
+ # Header
17
+ gr.HTML("""
18
+ <div class="header">
19
+ <h1>🧠 Financial News Sentiment Analyzer</h1>
20
+ <p>Powered by AI • Analyze financial news sentiment with advanced ML models</p>
21
+ </div>
22
+ """)
23
+
24
+ # Main content
25
+ with gr.Row():
26
+ # Input column
27
+ with gr.Column(scale=2):
28
+
29
+
30
+ text_input = gr.Textbox(
31
+ lines=4,
32
+ placeholder="💼 Enter financial news headline or text...\n\nExample: 'Apple stock surges after strong earnings report'",
33
+ label="📝 Financial News Text",
34
+ elem_classes=["input-text"]
35
+ )
36
+
37
+ model_choice = gr.Radio(
38
+ choices=["Naive Bayes", "BERT"],
39
+ value=model_manager.default_model,
40
+ label="🤖 Select AI Model",
41
+ info="Choose between Naive Bayes (fast) or BERT (advanced)"
42
+ )
43
+
44
+ predict_btn = gr.Button(
45
+ "🔍 Analyze Sentiment",
46
+ variant="primary",
47
+ elem_classes=["submit-btn"]
48
+ )
49
+
50
+ gr.HTML('</div>')
51
+
52
+ # Examples
53
+ gr.Examples(
54
+ examples=EXAMPLE_DATA,
55
+ inputs=[text_input, model_choice],
56
+ label="💡 Try these examples:"
57
+ )
58
+
59
+ # Output column
60
+ with gr.Column(scale=1):
61
+ output = gr.HTML(
62
+ value="<div style='text-align: center; padding: 3rem; color: var(--text-invers);'>👆 Enter text and click analyze to see results</div>",
63
+ label="📊 Analysis Result"
64
+ )
65
+
66
+ stats_display = gr.HTML(
67
+ label="📈 Usage Statistics"
68
+ )
69
+
70
+ refresh_stats_btn = gr.Button("🔄 Refresh Stats", variant="secondary")
71
+
72
+ # Event handlers
73
+ predict_btn.click(
74
+ fn=prediction_engine.predict_sentiment,
75
+ inputs=[text_input, model_choice],
76
+ outputs=output
77
+ )
78
+
79
+ refresh_stats_btn.click(
80
+ fn=StatsManager.get_stats,
81
+ inputs=None,
82
+ outputs=stats_display
83
+ )
84
+
85
+ # Load initial stats
86
+ interface.load(StatsManager.get_stats, None, stats_display)
87
+
88
+ # Footer
89
+ gr.HTML("""
90
+ <div class="footer">
91
+ <h3>👨‍💻 Developed by Yousif Al Nasser</h3>
92
+ <div class="social-links">
93
+ <a href="https://yousif.engineer" target="_blank">🌐 Portfolio Website</a>
94
+ <a href="https://linkedin.com/in/yalnasser" target="_blank">💼 LinkedIn Profile</a>
95
+ </div>
96
+ </div>
97
+ """)
98
+
99
+ return interface, model_manager
models.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
4
+ from config import Config
5
+
6
+ class ModelManager:
7
+ def __init__(self):
8
+ self.bert_model = None
9
+ self.bert_tokenizer = None
10
+ self.naive_bayes_model = None
11
+ self._load_models()
12
+
13
+ def _load_models(self):
14
+ self._load_bert_model()
15
+ self._load_naive_bayes_model()
16
+
17
+ def _load_bert_model(self):
18
+ try:
19
+ print(f"Loading BERT model from {Config.BERT_MODEL_REPO_ID}...")
20
+ self.bert_tokenizer = AutoTokenizer.from_pretrained(Config.BERT_MODEL_REPO_ID)
21
+ self.bert_model = TFAutoModelForSequenceClassification.from_pretrained(Config.BERT_MODEL_REPO_ID)
22
+ print("✅ BERT model loaded successfully!")
23
+ except Exception as e:
24
+ print(f"❌ Error loading BERT model: {e}")
25
+
26
+ def _load_naive_bayes_model(self):
27
+ try:
28
+ if os.path.exists(Config.NAIVE_BAYES_MODEL_PATH):
29
+ self.naive_bayes_model = joblib.load(Config.NAIVE_BAYES_MODEL_PATH)
30
+ print("✅ Naive Bayes model loaded successfully")
31
+ else:
32
+ print(f"⚠️ Naive Bayes model not found at {Config.NAIVE_BAYES_MODEL_PATH}")
33
+ except Exception as e:
34
+ print(f"❌ Error loading Naive Bayes model: {e}")
35
+
36
+ @property
37
+ def models_available(self):
38
+ return self.bert_model or self.naive_bayes_model
39
+
40
+ @property
41
+ def default_model(self):
42
+ return "Naive Bayes" if self.naive_bayes_model else "BERT"
prediction.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import requests
3
+ from config import Config
4
+ from utils import preprocess
5
+ from models import ModelManager
6
+
7
+ class PredictionEngine:
8
+ def __init__(self, model_manager: ModelManager):
9
+ self.model_manager = model_manager
10
+
11
+ def predict_with_bert(self, text: str):
12
+ try:
13
+ inputs = self.model_manager.bert_tokenizer(
14
+ text, return_tensors="tf", truncation=True, padding=True
15
+ )
16
+ outputs = self.model_manager.bert_model(**inputs)
17
+ logits = outputs.logits.numpy()[0]
18
+ prediction = int(tf.math.argmax(logits).numpy())
19
+ confidence = float(tf.nn.softmax(logits)[prediction].numpy())
20
+ label = Config.LABEL_MAP.get(prediction, "neutral")
21
+ return prediction, label, confidence
22
+ except Exception as e:
23
+ print(f"❌ BERT prediction error: {e}")
24
+ return 1, "neutral", 0.5
25
+
26
+ def predict_with_naive_bayes(self, text: str):
27
+ try:
28
+ cleaned = preprocess(text, model_type="naive_bayes")
29
+ prediction = self.model_manager.naive_bayes_model.predict([cleaned])[0]
30
+ label = Config.LABEL_MAP.get(prediction, "unknown")
31
+ return prediction, label, 0.85 # Static confidence
32
+ except Exception as e:
33
+ print(f"❌ Naive Bayes prediction error: {e}")
34
+ return 1, "neutral", 0.5
35
+
36
+ def predict_sentiment(self, text: str, model_choice: str):
37
+ if not text.strip():
38
+ return self._html_message("⚠️ Please enter some text to analyze.", "warning")
39
+
40
+ if model_choice == "Naive Bayes":
41
+ if self.model_manager.naive_bayes_model is None:
42
+ return self._html_message("Naive Bayes model not available.", "error")
43
+ pred, label, conf = self.predict_with_naive_bayes(text)
44
+ elif model_choice == "BERT":
45
+ if self.model_manager.bert_model is None:
46
+ return self._html_message("BERT model not available.", "error")
47
+ pred, label, conf = self.predict_with_bert(text)
48
+ else:
49
+ return self._html_message("Invalid model selection.", "error")
50
+
51
+ self._log_to_sheet(text, model_choice, label, conf)
52
+ return self._render_result(label, model_choice, conf)
53
+
54
+ def _log_to_sheet(self, text, model, sentiment, confidence):
55
+ try:
56
+ requests.post(Config.GOOGLE_SHEET_ENDPOINT, json={
57
+ "token": Config.GOOGLE_SHEET_TOKEN,
58
+ "text": text,
59
+ "model_used": model,
60
+ "sentiment": sentiment,
61
+ "confidence": confidence
62
+ })
63
+ except Exception as e:
64
+ print(f"⚠️ Logging failed: {e}")
65
+
66
+ def _render_result(self, label, model, confidence):
67
+ emoji = {"positive": "📈", "negative": "📉", "neutral": "📊"}.get(label, "📊")
68
+ return f"""
69
+ <div class="sentiment-result" data-sentiment="{label}">
70
+ <h2 style="color: white;">{emoji} Sentiment Result</h2>
71
+ <p class="sentiment-label">{label.upper()}</p>
72
+ <p class="model-info">Model: {model}</p>
73
+ <p class="confidence-info">Confidence: {confidence:.2%}</p>
74
+ </div>
75
+ """
76
+
77
+ def _html_message(self, msg, level):
78
+ return f"<div class='sentiment-result {level}'>{msg}</div>"
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core libraries
2
+ pandas
3
+ numpy
4
+ joblib
5
+
6
+ # Gradio interface
7
+ gradio
8
+
9
+ # Hugging Face Transformers
10
+ transformers
11
+ tensorflow # for TFAutoModelForSequenceClassification
12
+
13
+ # Requests for API logging
14
+ requests
15
+
16
+ # Optional: required by utils.py for advanced preprocessing
17
+ regex
18
+
19
+ # To run locally
20
+ gunicorn # if deploying on platforms like Heroku or similar
stats.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ class StatsManager:
5
+ @staticmethod
6
+ def get_stats():
7
+ try:
8
+ if os.path.exists("usage_stats.csv"):
9
+ df = pd.read_csv("usage_stats.csv")
10
+ return StatsManager._render_html(df)
11
+ return "<div class='no-stats'>📊 No usage data yet.</div>"
12
+ except Exception as e:
13
+ return f"<div class='error-stats'>❌ Error: {e}</div>"
14
+
15
+ @staticmethod
16
+ def _render_html(df: pd.DataFrame):
17
+ if df.empty:
18
+ return "<div class='no-stats'>📊 No usage data yet.</div>"
19
+
20
+ total = len(df)
21
+ sentiments = df['sentiment'].value_counts()
22
+ models = df['model_used'].value_counts()
23
+
24
+ html = f"<div class='stats-container'><h3>📈 Stats</h3><p>Total: {total}</p><ul>"
25
+ for s, c in sentiments.items():
26
+ html += f"<li>{s.title()}: {c} ({(c/total)*100:.1f}%)</li>"
27
+ html += "</ul><ul>"
28
+ for m, c in models.items():
29
+ html += f"<li>{m}: {c} ({(c/total)*100:.1f}%)</li>"
30
+ html += "</ul></div>"
31
+ return html
usage_stats.csv ADDED
File without changes
utils.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ import re
3
+ import string
4
+ from typing import Optional
5
+
6
+ def preprocess(text: str, model_type: str = "naive_bayes") -> str:
7
+ """
8
+ Enhanced preprocessing function with model-specific optimizations
9
+
10
+ Args:
11
+ text (str): Input text to preprocess
12
+ model_type (str): Type of model ("naive_bayes" or "bert")
13
+
14
+ Returns:
15
+ str: Preprocessed text
16
+ """
17
+ if not text or not isinstance(text, str):
18
+ return ""
19
+
20
+ # Basic cleaning
21
+ text = text.strip()
22
+
23
+ if model_type.lower() == "bert":
24
+ # BERT-specific preprocessing (less aggressive)
25
+ # BERT can handle punctuation and case better
26
+
27
+ # Remove excessive whitespace
28
+ text = re.sub(r'\s+', ' ', text)
29
+
30
+ # Remove URLs
31
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
32
+
33
+ # Remove email addresses
34
+ text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', text)
35
+
36
+ # Remove excessive punctuation (more than 2 consecutive)
37
+ text = re.sub(r'[.]{3,}', '...', text)
38
+ text = re.sub(r'[!]{2,}', '!', text)
39
+ text = re.sub(r'[?]{2,}', '?', text)
40
+
41
+ return text.strip()
42
+
43
+ else:
44
+ # Naive Bayes preprocessing (more aggressive cleaning)
45
+
46
+ # Convert to lowercase
47
+ text = text.lower()
48
+
49
+ # Remove URLs
50
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
51
+
52
+ # Remove email addresses
53
+ text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', text)
54
+
55
+ # Remove special financial symbols but keep dollar signs and percentages
56
+ text = re.sub(r'[^\w\s$%.-]', ' ', text)
57
+
58
+ # Handle numbers and percentages
59
+ text = re.sub(r'\b\d+\.\d+%\b', 'PERCENTAGE', text)
60
+ text = re.sub(r'\b\d+%\b', 'PERCENTAGE', text)
61
+ text = re.sub(r'\$\d+\.?\d*[KMB]?\b', 'DOLLAR_AMOUNT', text)
62
+
63
+ # Remove extra whitespace
64
+ text = re.sub(r'\s+', ' ', text)
65
+
66
+ return text.strip()
67
+
68
+ def clean_financial_text(text: str) -> str:
69
+ """
70
+ Specialized cleaning for financial text
71
+
72
+ Args:
73
+ text (str): Financial text to clean
74
+
75
+ Returns:
76
+ str: Cleaned financial text
77
+ """
78
+ if not text:
79
+ return ""
80
+
81
+ # Common financial abbreviations to preserve
82
+ financial_terms = {
83
+ 'q1': 'first quarter',
84
+ 'q2': 'second quarter',
85
+ 'q3': 'third quarter',
86
+ 'q4': 'fourth quarter',
87
+ 'yoy': 'year over year',
88
+ 'qoq': 'quarter over quarter',
89
+ 'ipo': 'initial public offering',
90
+ 'ceo': 'chief executive officer',
91
+ 'cfo': 'chief financial officer',
92
+ 'fed': 'federal reserve',
93
+ 'gdp': 'gross domestic product',
94
+ 'etf': 'exchange traded fund'
95
+ }
96
+
97
+ text_lower = text.lower()
98
+ for abbrev, full_form in financial_terms.items():
99
+ text_lower = text_lower.replace(abbrev, full_form)
100
+
101
+ return text_lower
102
+
103
+ def extract_financial_entities(text: str) -> dict:
104
+ """
105
+ Extract financial entities from text
106
+
107
+ Args:
108
+ text (str): Input text
109
+
110
+ Returns:
111
+ dict: Dictionary containing extracted entities
112
+ """
113
+ entities = {
114
+ 'percentages': [],
115
+ 'dollar_amounts': [],
116
+ 'stock_symbols': [],
117
+ 'quarters': [],
118
+ 'years': []
119
+ }
120
+
121
+ # Extract percentages
122
+ percentages = re.findall(r'\b\d+\.?\d*%\b', text)
123
+ entities['percentages'] = percentages
124
+
125
+ # Extract dollar amounts
126
+ dollar_amounts = re.findall(r'\$\d+\.?\d*[KMB]?\b', text)
127
+ entities['dollar_amounts'] = dollar_amounts
128
+
129
+ # Extract potential stock symbols (2-5 uppercase letters)
130
+ stock_symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
131
+ entities['stock_symbols'] = stock_symbols
132
+
133
+ # Extract quarters
134
+ quarters = re.findall(r'\bQ[1-4]\b|\b[1-4]Q\b', text, re.IGNORECASE)
135
+ entities['quarters'] = quarters
136
+
137
+ # Extract years
138
+ years = re.findall(r'\b20\d{2}\b', text)
139
+ entities['years'] = years
140
+
141
+ return entities
142
+
143
+ def get_text_stats(text: str) -> dict:
144
+ """
145
+ Get basic statistics about the text
146
+
147
+ Args:
148
+ text (str): Input text
149
+
150
+ Returns:
151
+ dict: Text statistics
152
+ """
153
+ if not text:
154
+ return {
155
+ 'word_count': 0,
156
+ 'char_count': 0,
157
+ 'sentence_count': 0,
158
+ 'avg_word_length': 0
159
+ }
160
+
161
+ words = text.split()
162
+ sentences = re.split(r'[.!?]+', text)
163
+
164
+ stats = {
165
+ 'word_count': len(words),
166
+ 'char_count': len(text),
167
+ 'sentence_count': len([s for s in sentences if s.strip()]),
168
+ 'avg_word_length': sum(len(word) for word in words) / len(words) if words else 0
169
+ }
170
+
171
+ return stats
172
+
173
+ def validate_input(text: str, min_length: int = 5, max_length: int = 1000) -> tuple[bool, str]:
174
+ """
175
+ Validate user input
176
+
177
+ Args:
178
+ text (str): Input text to validate
179
+ min_length (int): Minimum required length
180
+ max_length (int): Maximum allowed length
181
+
182
+ Returns:
183
+ tuple: (is_valid, error_message)
184
+ """
185
+ if not text or not text.strip():
186
+ return False, "Text cannot be empty"
187
+
188
+ if len(text.strip()) < min_length:
189
+ return False, f"Text must be at least {min_length} characters long"
190
+
191
+ if len(text) > max_length:
192
+ return False, f"Text cannot exceed {max_length} characters"
193
+
194
+ # Check if text contains only special characters
195
+ if re.match(r'^[^\w\s]+$', text.strip()):
196
+ return False, "Text must contain alphanumeric characters"
197
+
198
+ return True, ""