Commit ·
9c30c74
1
Parent(s): fb8e5c3
Complete migration to SQL-based approach
Browse files- Removed all RAG-related code and files
- Added quality evaluation system (auto-improvement if score < 80)
- Changed background from green to blue
- Added GitHub and CV links in header
- Added comprehensive documentation (ARCHITECTURE.md)
- Code cleanup and optimization
- Added detailed comments throughout codebase
- Removed unnecessary dependencies
- Updated Dockerfile for SQL-only approach
- .query_history.json +1 -38
- ARCHITECTURE.md +185 -182
- CONTRIBUTING.md +1 -1
- DEPLOYMENT_GUIDE.md +1 -1
- Dockerfile +8 -10
- LOCAL_SETUP_GUIDE.md +0 -329
- MIGRATION_TO_MAIN.md +68 -0
- PROJECT_COMPLETE.md +0 -484
- QUICK_START.md +0 -289
- README.md +138 -214
- README_TESTING_GUIDE.md +0 -520
- SESSION_SUMMARY.md +0 -371
- SQL_APPROACH_README.md +0 -172
- STATUS_REPORT.md +0 -501
- TESTING_CHECKLIST.md +0 -472
- app/__init__.py +10 -5
- app/analysis.py +0 -97
- app/api.py +92 -216
- app/config.py +22 -7
- app/embedding.py +0 -35
- app/preprocess.py +0 -33
- app/rag_service.py +0 -1057
- app/sentiment.py +0 -53
- app/sql_service.py +348 -33
- app/static/app.js +239 -59
- app/static/index.html +49 -22
- app/topics.py +0 -22
- app/vector_store.py +0 -69
- requirements.txt +4 -9
- scripts/precompute_index.py +0 -29
- scripts/smoke_check.py +6 -3
- scripts/test_queries.py +0 -48
- scripts/validate_local.py +0 -314
.query_history.json
CHANGED
|
@@ -1,38 +1 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"query": "איך המשתמשים מרגישים לגבי השירות?",
|
| 4 |
-
"response": {
|
| 5 |
-
"summary": "את הנוחות של המשתמש נוחות השירות והאינטואיטיביות של הממשק שירות לקוחות על הפנים מענה זמין יותר בשירות לקוחות שירות קל וידידותי למשתמש "
|
| 6 |
-
}
|
| 7 |
-
},
|
| 8 |
-
{
|
| 9 |
-
"query": "איך המשתמשים מרגישים לגבי השירות?",
|
| 10 |
-
"response": {
|
| 11 |
-
"summary": "את הנוחות של המשתמש נוחות השירות והאינטואיטיביות של הממשק שירות לקוחות על הפנים מענה זמין יותר בשירות לקוחות שירות קל וידידותי למשתמש "
|
| 12 |
-
}
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"query": "איך המשתמשים מרגישים לגבי השירות?",
|
| 16 |
-
"response": {
|
| 17 |
-
"summary": "את הנוחות של המשתמש נוחות השירות והאינטואיטיביות של הממשק שירות לקוחות על הפנים מענה זמין יותר בשירות לקוחות שירות קל וידידותי למשתמש "
|
| 18 |
-
}
|
| 19 |
-
},
|
| 20 |
-
{
|
| 21 |
-
"query": "בלה",
|
| 22 |
-
"response": {
|
| 23 |
-
"summary": "תוגה בלה בלה בלה זריז סבבה סבבה"
|
| 24 |
-
}
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"query": "מה שלומך אחי?",
|
| 28 |
-
"response": {
|
| 29 |
-
"summary": "אלופים אתם סיבכתם עם הורה 1 הורה 2 אבתי מאוד\nקליל, פשוט וזריז אתם אלופים אתם אלופים"
|
| 30 |
-
}
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"query": "כמה אחוזים של משתמשים אוהב לאכול שוקולד?",
|
| 34 |
-
"response": {
|
| 35 |
-
"summary": "0 משובים מכילים את הביטוי 'של משתמשים אוהב לאכול שוקולד'."
|
| 36 |
-
}
|
| 37 |
-
}
|
| 38 |
-
]
|
|
|
|
| 1 |
+
[]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ARCHITECTURE.md
CHANGED
|
@@ -1,229 +1,232 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
-
┌─────────────┐
|
| 11 |
-
│ משתמש │
|
| 12 |
-
│ (דפדפן) │
|
| 13 |
-
└──────┬──────┘
|
| 14 |
-
│ HTTP
|
| 15 |
-
▼
|
| 16 |
-
┌─────────────────────────────────────┐
|
| 17 |
-
│ FastAPI Server │
|
| 18 |
-
│ (app/api.py) │
|
| 19 |
-
│ - /query - שאילתות חופשיות │
|
| 20 |
-
│ - /topics - ניתוח נושאים │
|
| 21 |
-
│ - /sentiment - ניתוח רגשות │
|
| 22 |
-
│ - /ingest - בניית אינדקס │
|
| 23 |
-
│ - /health - בדיקת בריאות │
|
| 24 |
-
└──────┬──────────────────────────────┘
|
| 25 |
-
│
|
| 26 |
-
▼
|
| 27 |
-
┌─────────────────────────────────────┐
|
| 28 |
-
│ RAG Service │
|
| 29 |
-
│ (app/rag_service.py) │
|
| 30 |
-
│ - זיהוי כוונה (ספירה/חיפוש) │
|
| 31 |
-
│ - חיפוש וקטורי (FAISS) │
|
| 32 |
-
│ - סינתזה עם LLM (Gemini/OpenAI) │
|
| 33 |
-
└──────┬──────────────────────────────┘
|
| 34 |
-
│
|
| 35 |
-
├─────────────────┬──────────────────┐
|
| 36 |
-
▼ ▼ ▼
|
| 37 |
-
┌─────────────┐ ┌──────────────┐ ┌──────────────┐
|
| 38 |
-
│ Embeddings │ │ Vector Store │ │ Analysis │
|
| 39 |
-
│ (embedding) │ │(vector_store)│ │ (analysis) │
|
| 40 |
-
└─────────────┘ └──────────────┘ └──────────────┘
|
| 41 |
-
```
|
| 42 |
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
##
|
| 46 |
|
| 47 |
-
|
| 48 |
-
- קורא את קובץ `Feedback.csv`
|
| 49 |
-
- מחזיר DataFrame עם כל המשובים
|
| 50 |
|
| 51 |
-
|
| 52 |
-
-
|
| 53 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
**
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
#
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
**
|
| 65 |
-
- משתמש ב-
|
| 66 |
-
-
|
| 67 |
-
- שומר מ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
-
- מזהה כוונת שאילתה (ספירה/חיפוש/ניתוח)
|
| 73 |
-
- מבצע ספירות מדויקות מהנתונים
|
| 74 |
-
- מזהה מילות מפתח (תודה, תלונות, וכו')
|
| 75 |
|
| 76 |
-
|
| 77 |
-
- מנתח רגשות במשובים
|
| 78 |
-
- משתמש במודל רב-לשוני
|
| 79 |
|
| 80 |
-
**
|
| 81 |
-
- מ
|
| 82 |
-
- מ
|
| 83 |
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
**
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
1. **זיהוי כוונה** - האם זו שאילתת ספירה או שאילתת ניתוח?
|
| 90 |
-
2. **חיפוש וקטורי** - מוצא את המשובים הרלוונטיים ביותר
|
| 91 |
-
3. **סינתזה** - משתמש ב-LLM (Gemini/OpenAI) ליצירת תשובה מקצועית
|
| 92 |
-
4. **ולידציה** - מוודא שהתשובה מבוססת על הנתונים
|
| 93 |
|
| 94 |
-
|
| 95 |
-
- תמיכה בשאילתות ספירה מדויקות
|
| 96 |
-
- תמיכה בשאילתות ניתוח מעמיקות
|
| 97 |
-
- תשובות מפורטות ומחוברות לנתונים
|
| 98 |
-
- הבנת הקונטקסט (דירוגים, שירותים)
|
| 99 |
|
| 100 |
-
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
-
|
| 105 |
-
-
|
| 106 |
-
- שו
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
-
|
| 110 |
-
-
|
| 111 |
-
-
|
| 112 |
-
- `POST /ingest` - בניית אינדקס
|
| 113 |
-
- `POST /health` - בדיקת בריאות
|
| 114 |
|
| 115 |
-
##
|
| 116 |
|
| 117 |
-
**
|
| 118 |
-
- ממשק ווב פשוט ויפה
|
| 119 |
-
- תמיכה בעברית (RTL)
|
| 120 |
-
- הצגת תשובות והיסטוריה
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
Feedback.csv
|
| 128 |
-
│
|
| 129 |
-
▼
|
| 130 |
-
data_loader.py → DataFrame
|
| 131 |
-
│
|
| 132 |
-
▼
|
| 133 |
-
preprocess.py → טקסטים נקיים
|
| 134 |
-
│
|
| 135 |
-
▼
|
| 136 |
-
embedding.py → וקטורים (embeddings)
|
| 137 |
-
│
|
| 138 |
-
▼
|
| 139 |
-
vector_store.py → FAISS Index
|
| 140 |
-
│
|
| 141 |
-
▼
|
| 142 |
-
.vector_index/faiss.index + meta.parquet
|
| 143 |
-
```
|
| 144 |
|
| 145 |
-
|
| 146 |
|
| 147 |
-
```
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
│
|
| 156 |
-
├─→ analysis.py (זיהוי כוונה)
|
| 157 |
-
│
|
| 158 |
-
├─→ embedding.py (המרת שאילתה לוקטור)
|
| 159 |
-
│
|
| 160 |
-
├─→ vector_store.py (חיפוש וקטורי)
|
| 161 |
-
│
|
| 162 |
-
└─→ LLM (Gemini/OpenAI) - סינתזה
|
| 163 |
-
│
|
| 164 |
-
▼
|
| 165 |
-
תשובה מקצועית למשתמש
|
| 166 |
```
|
| 167 |
|
| 168 |
-
##
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
```
|
| 174 |
|
| 175 |
-
|
| 176 |
-
- `Text` - הטקסט המלא של המשוב
|
| 177 |
-
- `Level` - הדירוג (1-5, 5 = הטוב ביותר)
|
| 178 |
-
- `ServiceName` - שם השירות
|
| 179 |
|
| 180 |
-
|
| 181 |
-
- **
|
| 182 |
-
- **
|
|
|
|
| 183 |
|
| 184 |
-
##
|
| 185 |
|
| 186 |
-
**
|
| 187 |
-
-
|
| 188 |
-
|
| 189 |
-
-
|
|
|
|
| 190 |
|
| 191 |
-
|
| 192 |
-
- `GEMINI_API_KEY` - מפתח Gemini (מומלץ)
|
| 193 |
-
- `OPENAI_API_KEY` - מפתח OpenAI (גיבוי)
|
| 194 |
-
- `CSV_PATH` - נתיב לקובץ CSV
|
| 195 |
-
- `VECTOR_INDEX_PATH` - נתיב לאינדקס
|
| 196 |
|
| 197 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
| 202 |
|
| 203 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
- **FAISS** - חיפוש וקטורי
|
| 208 |
-
- **Pandas** - עיבוד נתונים
|
| 209 |
-
- **Google Generative AI** - Gemini LLM
|
| 210 |
-
- **OpenAI** - GPT (גיבוי)
|
| 211 |
|
| 212 |
-
##
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
4. **תשובות מבוססות נתונים** - המערכת לא ממציאה עובדות
|
| 218 |
-
5. **ולידציה כפולה** - בדיקה שהתשובות הגיוניות ומבוססות על הנתונים
|
| 219 |
|
| 220 |
-
##
|
|
|
|
| 221 |
|
| 222 |
-
|
| 223 |
-
-
|
| 224 |
-
-
|
| 225 |
-
- מאפשר שאילתות חופשיות
|
| 226 |
-
- מחזיר תשובות מקצועיות ומבוססות נתונים
|
| 227 |
|
| 228 |
-
|
| 229 |
|
|
|
|
|
|
|
|
|
| 1 |
+
# ארכיטקטורת המערכת - Feedback Analysis Agent
|
| 2 |
|
| 3 |
+
## סקירה כללית
|
| 4 |
|
| 5 |
+
המערכת היא **SQL-based Feedback Analysis Agent** שמאפשרת לשאול שאלות בשפה טבעית על משובי משתמשים ולקבל תשובות מפורטות ומבוססות נתונים.
|
| 6 |
|
| 7 |
+
## עקרונות הארכיטקטורה
|
| 8 |
|
| 9 |
+
המערכת מבוססת על **4 שלבים עיקריים**:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
1. **ניתוח שאילתה** - LLM מנתח את שאלת המשתמש
|
| 12 |
+
2. **יצירת שאילתות SQL** - LLM יוצר 1-5 שאילתות SQL רלוונטיות
|
| 13 |
+
3. **ביצוע שאילתות** - שאילתות SQL מבוצעות על הנתונים
|
| 14 |
+
4. **סינתזה ותשובה** - LLM יוצר תשובה מפורטת מהתוצאות, כולל בדיקת איכות אוטומטית
|
| 15 |
|
| 16 |
+
## רכיבי המערכת
|
| 17 |
|
| 18 |
+
### 1. Backend (Python/FastAPI)
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
#### `app/api.py`
|
| 21 |
+
- **תפקיד**: FastAPI application - נקודת הכניסה הראשית
|
| 22 |
+
- **Endpoints**:
|
| 23 |
+
- `POST /query-sql` - שאילתות עיקריות (הגישה היחידה)
|
| 24 |
+
- `POST /health` - בדיקת תקינות השרת
|
| 25 |
+
- `GET /history` - היסטוריית שאלות
|
| 26 |
+
- `POST /history/clear` - ניקוי היסטוריה
|
| 27 |
+
- `GET /` - ממשק משתמש (frontend)
|
| 28 |
|
| 29 |
+
#### `app/sql_service.py`
|
| 30 |
+
- **תפקיד**: הליבה של המערכת - מטפל בכל תהליך הניתוח
|
| 31 |
+
- **מחלקות**:
|
| 32 |
+
- `SQLFeedbackService` - השירות הראשי
|
| 33 |
+
- `SQLQueryResult` - תוצאה של שאילתת SQL אחת
|
| 34 |
+
- `AnalysisResult` - תוצאה מלאה של ניתוח
|
| 35 |
|
| 36 |
+
**תהליך העבודה**:
|
| 37 |
+
```python
|
| 38 |
+
analyze_query(query)
|
| 39 |
+
→ _generate_sql_queries() # שלב 1: יצירת שאילתות SQL
|
| 40 |
+
→ _execute_sql_queries() # שלב 2: ביצוע שאילתות
|
| 41 |
+
→ _synthesize_answer() # שלב 3: יצירת תשובה
|
| 42 |
+
→ _evaluate_answer_quality() # בדיקת איכות (אם < 80, שיפור אוטומטי)
|
| 43 |
+
→ _generate_visualizations() # שלב 4: יצירת גרפים
|
| 44 |
+
```
|
| 45 |
|
| 46 |
+
**פונקציות מפתח**:
|
| 47 |
+
- `_generate_sql_queries()` - משתמש ב-LLM ליצירת שאילתות SQL
|
| 48 |
+
- `_execute_sql_queries()` - מריץ שאילתות על SQLite in-memory
|
| 49 |
+
- `_synthesize_answer()` - יוצר תשובה מפורטת מהתוצאות
|
| 50 |
+
- `_evaluate_answer_quality()` - בודק איכות תשובה (0-100)
|
| 51 |
+
- `_generate_visualizations()` - יוצר מפרטי גרפים
|
| 52 |
+
|
| 53 |
+
#### `app/data_loader.py`
|
| 54 |
+
- **תפקיד**: טעינת נתונים מ-CSV
|
| 55 |
+
- **פונקציה**: `load_feedback()` - טוען ומנקה את קובץ ה-CSV
|
| 56 |
+
|
| 57 |
+
#### `app/config.py`
|
| 58 |
+
- **תפקיד**: הגדרות מערכת
|
| 59 |
+
- **מכיל**: API keys, נתיבי קבצים, שמות עמודות
|
| 60 |
+
|
| 61 |
+
### 2. Frontend (HTML/CSS/JavaScript)
|
| 62 |
+
|
| 63 |
+
#### `app/static/index.html`
|
| 64 |
+
- **תפקיד**: ממשק משתמש
|
| 65 |
+
- **תכונות**:
|
| 66 |
+
- שדה שאילתה
|
| 67 |
+
- הצגת תשובות
|
| 68 |
+
- הצגת שאילתות SQL ותוצאות
|
| 69 |
+
- גרפים ויזואליים
|
| 70 |
+
- היסטוריית שאלות
|
| 71 |
+
|
| 72 |
+
#### `app/static/app.js`
|
| 73 |
+
- **תפקיד**: לוגיקת frontend
|
| 74 |
+
- **פונקציות מפתח**:
|
| 75 |
+
- `sendQuery()` - שליחת שאילתה לשרת
|
| 76 |
+
- `showVisualizations()` - הצגת גרפים
|
| 77 |
+
- `getChartConfig()` - הגדרת גרפים (Chart.js)
|
| 78 |
+
- `formatSQLResults()` - עיצוב תוצאות SQL
|
| 79 |
+
|
| 80 |
+
## זרימת נתונים
|
| 81 |
|
| 82 |
+
```
|
| 83 |
+
משתמש → Frontend → API (/query-sql) → SQLFeedbackService
|
| 84 |
+
↓
|
| 85 |
+
[1] _generate_sql_queries()
|
| 86 |
+
↓
|
| 87 |
+
[2] _execute_sql_queries()
|
| 88 |
+
↓
|
| 89 |
+
[3] _synthesize_answer()
|
| 90 |
+
↓
|
| 91 |
+
[4] _evaluate_answer_quality()
|
| 92 |
+
↓ (אם < 80)
|
| 93 |
+
[5] שיפור אוטומטי
|
| 94 |
+
↓
|
| 95 |
+
[6] _generate_visualizations()
|
| 96 |
+
↓
|
| 97 |
+
← AnalysisResult
|
| 98 |
+
↓
|
| 99 |
+
← JSON Response
|
| 100 |
+
↓
|
| 101 |
+
Frontend → משתמש
|
| 102 |
+
```
|
| 103 |
|
| 104 |
+
## LLM Integration
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
המערכת תומכת ב-2 LLM providers:
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
1. **Google Gemini** (מועדף)
|
| 109 |
+
- מודל: `gemini-2.0-flash`
|
| 110 |
+
- Fallback אוטומטי ל-OpenAI אם לא זמין
|
| 111 |
|
| 112 |
+
2. **OpenAI**
|
| 113 |
+
- מודל: `gpt-4o-mini`
|
| 114 |
+
- Fallback אם Gemini לא זמין
|
| 115 |
|
| 116 |
+
**שימוש ב-LLM ב-3 מקומות**:
|
| 117 |
+
1. יצירת שאילתות SQL (`_generate_sql_queries`)
|
| 118 |
+
2. סינתזה של תשובה (`_synthesize_answer`)
|
| 119 |
+
3. הערכת איכות תשובה (`_evaluate_answer_quality`)
|
| 120 |
|
| 121 |
+
## Quality Assurance
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
### בדיקת איכות אוטומטית
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
המערכת כוללת **מערכת בדיקת איכות אוטומטית**:
|
| 126 |
|
| 127 |
+
1. כל תשובה מקבלת ציון 0-100
|
| 128 |
+
2. קריטריונים:
|
| 129 |
+
- האם התשובה עונה ישירות על השאלה? (0-30 נקודות)
|
| 130 |
+
- האם התשובה מבוססת על הנתונים? (0-25 נקודות)
|
| 131 |
+
- האם התשובה מפורטת ומקיפה? (0-20 נקודות)
|
| 132 |
+
- האם התשובה ברורה ומובנת? (0-15 נקו��ות)
|
| 133 |
+
- האם התשובה כוללת תובנות עסקיות? (0-10 נקודות)
|
| 134 |
|
| 135 |
+
3. אם הציון < 80:
|
| 136 |
+
- המערכת מנסה לשפר את התשובה אוטומטית
|
| 137 |
+
- התשובה המשופרת נבדקת שוב
|
| 138 |
+
- אם הציון השתפר, התשובה המשופרת מוחזרת
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
## Visualizations
|
| 141 |
|
| 142 |
+
המערכת יוצרת **גרפים אוטומטיים** בהתבסס על תוצאות השאילתות:
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
- **Bar Chart** - להשוואות בין קטגוריות
|
| 145 |
+
- **Line Chart** - למגמות לאורך זמן
|
| 146 |
+
- **Scatter Plot** - לקשרים בין משתנים
|
| 147 |
+
- **Histogram** - להתפלגות נתונים
|
| 148 |
|
| 149 |
+
כל גרף כולל:
|
| 150 |
+
- הסבר על סוג הגרף
|
| 151 |
+
- צבעים מגוונים
|
| 152 |
+
- Tooltips אינטראקטיביים
|
| 153 |
|
| 154 |
+
## Database Schema
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
+
המערכת עובדת עם טבלת `Feedback`:
|
| 157 |
|
| 158 |
+
```sql
|
| 159 |
+
CREATE TABLE feedback (
|
| 160 |
+
ID INTEGER PRIMARY KEY,
|
| 161 |
+
ServiceName TEXT, -- שם השירות
|
| 162 |
+
Level INTEGER, -- דירוג 1-5
|
| 163 |
+
Text TEXT, -- טקסט המשוב
|
| 164 |
+
CreationDate TEXT -- תאריך יצירה (אופציונלי)
|
| 165 |
+
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
```
|
| 167 |
|
| 168 |
+
## Security & Configuration
|
| 169 |
|
| 170 |
+
- **API Keys**: נטענים מ-`.env` (git-ignored)
|
| 171 |
+
- **Data**: קובץ CSV נטען מהדיסק
|
| 172 |
+
- **History**: נשמר ב-`.query_history.json` (git-ignored)
|
|
|
|
| 173 |
|
| 174 |
+
## Deployment
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
המערכת יכולה לרוץ:
|
| 177 |
+
- **Locally**: `python run.py`
|
| 178 |
+
- **Docker**: `docker build && docker run`
|
| 179 |
+
- **Runpod**: באמצעות Dockerfile
|
| 180 |
|
| 181 |
+
## הרחבות עתידיות
|
| 182 |
|
| 183 |
+
1. **Caching** - שמירת תוצאות שאילתות נפוצות
|
| 184 |
+
2. **Multi-language** - תמיכה בשפות נוספות
|
| 185 |
+
3. **Advanced Analytics** - ניתוחים סטטיסטיים מתקדמים
|
| 186 |
+
4. **Real-time Updates** - עדכונים בזמן אמת
|
| 187 |
+
5. **Export** - ייצוא תוצאות ל-PDF/Excel
|
| 188 |
|
| 189 |
+
## שינויים והתאמות
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
+
### שינוי מודל LLM
|
| 192 |
+
ערוך ב-`app/sql_service.py`:
|
| 193 |
+
```python
|
| 194 |
+
model = genai.GenerativeModel("gemini-2.0-flash") # שנה כאן
|
| 195 |
+
```
|
| 196 |
|
| 197 |
+
### שינוי סף איכות
|
| 198 |
+
ערוך ב-`app/sql_service.py`:
|
| 199 |
+
```python
|
| 200 |
+
if score < 80: # שנה כאן (0-100)
|
| 201 |
+
```
|
| 202 |
|
| 203 |
+
### הוספת עמודות חדשות
|
| 204 |
+
ערוך ב-`app/sql_service.py` → `_get_schema_info()`:
|
| 205 |
+
```python
|
| 206 |
+
schema_info = f"""
|
| 207 |
+
טבלת Feedback מכילה את השדות הבאים:
|
| 208 |
+
- ID: ...
|
| 209 |
+
- NewColumn: ... # הוסף כאן
|
| 210 |
+
"""
|
| 211 |
+
```
|
| 212 |
|
| 213 |
+
### שינוי עיצוב Frontend
|
| 214 |
+
ערוך ב-`app/static/index.html` (CSS) ו-`app/static/app.js` (JavaScript)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
## Troubleshooting
|
| 217 |
|
| 218 |
+
### שגיאת "No feedback data available"
|
| 219 |
+
- ודא שקובץ `Feedback.csv` קיים
|
| 220 |
+
- ודא שהעמודות הנדרשות קיימות: ID, ServiceName, Level, Text
|
|
|
|
|
|
|
| 221 |
|
| 222 |
+
### שגיאת API Key
|
| 223 |
+
- ודא שקובץ `.env` קיים עם `GEMINI_API_KEY` או `OPENAI_API_KEY`
|
| 224 |
|
| 225 |
+
### תשובות לא איכותיות
|
| 226 |
+
- בדוק את הלוגים - המערכת מדפיסה ציוני איכות
|
| 227 |
+
- נסה לשנות את ה-prompt ב-`_synthesize_answer()`
|
|
|
|
|
|
|
| 228 |
|
| 229 |
+
## קישורים
|
| 230 |
|
| 231 |
+
- GitHub: [לעדכן]
|
| 232 |
+
- קורות חיים: [לעדכן]
|
CONTRIBUTING.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Contributing and Usage Guide
|
| 2 |
|
| 3 |
-
This project implements a
|
| 4 |
|
| 5 |
Goals:
|
| 6 |
- Make the API easy to run locally and deploy to Runpod or any container platform.
|
|
|
|
| 1 |
# Contributing and Usage Guide
|
| 2 |
|
| 3 |
+
This project implements a SQL-based feedback analysis system using LLM-generated queries.
|
| 4 |
|
| 5 |
Goals:
|
| 6 |
- Make the API easy to run locally and deploy to Runpod or any container platform.
|
DEPLOYMENT_GUIDE.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Deployment Guide - Runpod Cloud
|
| 2 |
|
| 3 |
-
After local testing is complete
|
| 4 |
|
| 5 |
---
|
| 6 |
|
|
|
|
| 1 |
# Deployment Guide - Runpod Cloud
|
| 2 |
|
| 3 |
+
After local testing is complete, follow this guide to deploy your Feedback Analysis Agent to Runpod.
|
| 4 |
|
| 5 |
---
|
| 6 |
|
Dockerfile
CHANGED
|
@@ -1,27 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
PYTHONUNBUFFERED=1 \
|
| 5 |
-
PIP_NO_CACHE_DIR=1
|
| 6 |
-
HF_HUB_DISABLE_TELEMETRY=1
|
| 7 |
|
| 8 |
WORKDIR /app
|
| 9 |
|
|
|
|
| 10 |
COPY requirements.txt ./
|
| 11 |
-
# Install Torch CPU wheels first to avoid heavy builds
|
| 12 |
RUN pip install --upgrade pip && \
|
| 13 |
-
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \
|
| 14 |
-
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 && \
|
| 15 |
pip install --no-cache-dir -r requirements.txt --default-timeout=100
|
| 16 |
|
|
|
|
| 17 |
COPY . .
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
# These lines increase the image size but significantly reduce latency on first API call.
|
| 21 |
-
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')"
|
| 22 |
-
RUN python -c "from transformers import pipeline; pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')"
|
| 23 |
-
|
| 24 |
EXPOSE 8000
|
| 25 |
|
|
|
|
| 26 |
CMD ["python", "run.py"]
|
| 27 |
|
|
|
|
| 1 |
+
# Dockerfile for Feedback Analysis Agent
|
| 2 |
+
# SQL-based feedback analysis system using LLM-generated queries
|
| 3 |
+
|
| 4 |
FROM python:3.10-slim
|
| 5 |
|
| 6 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
PYTHONUNBUFFERED=1 \
|
| 8 |
+
PIP_NO_CACHE_DIR=1
|
|
|
|
| 9 |
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
+
# Copy and install dependencies
|
| 13 |
COPY requirements.txt ./
|
|
|
|
| 14 |
RUN pip install --upgrade pip && \
|
|
|
|
|
|
|
| 15 |
pip install --no-cache-dir -r requirements.txt --default-timeout=100
|
| 16 |
|
| 17 |
+
# Copy application code
|
| 18 |
COPY . .
|
| 19 |
|
| 20 |
+
# Expose port
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
EXPOSE 8000
|
| 22 |
|
| 23 |
+
# Run the application
|
| 24 |
CMD ["python", "run.py"]
|
| 25 |
|
LOCAL_SETUP_GUIDE.md
DELETED
|
@@ -1,329 +0,0 @@
|
|
| 1 |
-
# 🚀 מדריך הרצה מקומית - Feedback Analysis RAG Agent
|
| 2 |
-
|
| 3 |
-
מדריך מפורט להרצת המערכת באופן מקומי על המחשב שלך.
|
| 4 |
-
|
| 5 |
-
## 📋 דרישות מוקדמות
|
| 6 |
-
|
| 7 |
-
1. **Python 3.10+** - ודא שיש לך Python מותקן:
|
| 8 |
-
```bash
|
| 9 |
-
python --version # צריך להציג 3.10 או גבוה יותר
|
| 10 |
-
```
|
| 11 |
-
|
| 12 |
-
2. **pip** - מנהל חבילות Python (מגיע עם Python)
|
| 13 |
-
|
| 14 |
-
3. **אינטרנט** - להורדת מודלים בפעם הראשונה
|
| 15 |
-
|
| 16 |
-
## 🔧 התקנה שלב אחר שלב
|
| 17 |
-
|
| 18 |
-
### שלב 1: שכפול/הורדת הפרויקט
|
| 19 |
-
|
| 20 |
-
אם עדיין לא עשית זאת:
|
| 21 |
-
```bash
|
| 22 |
-
cd /path/to/Feedback_Analysis_RAG_Agent_runpod
|
| 23 |
-
```
|
| 24 |
-
|
| 25 |
-
### שלב 2: יצירת סביבה וירטואלית
|
| 26 |
-
|
| 27 |
-
```bash
|
| 28 |
-
# יצירת סביבה וירטואלית
|
| 29 |
-
python -m venv .venv
|
| 30 |
-
|
| 31 |
-
# הפעלת הסביבה (Windows)
|
| 32 |
-
.venv\Scripts\activate
|
| 33 |
-
|
| 34 |
-
# הפעלת הסביבה (macOS/Linux)
|
| 35 |
-
source .venv/bin/activate
|
| 36 |
-
```
|
| 37 |
-
|
| 38 |
-
**הערה:** אחרי ההפעלה, תראה `(.venv)` בתחילת שורת הפקודה.
|
| 39 |
-
|
| 40 |
-
### שלב 3: התקנת תלויות
|
| 41 |
-
|
| 42 |
-
```bash
|
| 43 |
-
pip install --upgrade pip
|
| 44 |
-
pip install -r requirements.txt
|
| 45 |
-
```
|
| 46 |
-
|
| 47 |
-
**זמן משוער:** 5-10 דקות (תלוי במהירות האינטרנט)
|
| 48 |
-
|
| 49 |
-
**מה מותקן:**
|
| 50 |
-
- FastAPI - שרת API
|
| 51 |
-
- Sentence-Transformers - מודל embeddings
|
| 52 |
-
- FAISS - חיפוש וקטורי
|
| 53 |
-
- Pandas, NumPy - עיבוד נתונים
|
| 54 |
-
- ועוד...
|
| 55 |
-
|
| 56 |
-
### שלב 4: הגדרת מפתחות API (אופציונלי אבל מומלץ)
|
| 57 |
-
|
| 58 |
-
צור קובץ `.env` בתיקיית הפרויקט:
|
| 59 |
-
|
| 60 |
-
```bash
|
| 61 |
-
# Windows
|
| 62 |
-
notepad .env
|
| 63 |
-
|
| 64 |
-
# macOS/Linux
|
| 65 |
-
nano .env
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
הוסף את המפתחות שלך:
|
| 69 |
-
```
|
| 70 |
-
GEMINI_API_KEY=your_gemini_key_here
|
| 71 |
-
OPENAI_API_KEY=sk-your_openai_key_here
|
| 72 |
-
```
|
| 73 |
-
|
| 74 |
-
**למה זה חשוב?**
|
| 75 |
-
- **Gemini** - משמש ליצירת תשובות איכותיות בעברית
|
| 76 |
-
- **OpenAI** - גיבוי אם Gemini לא זמין
|
| 77 |
-
|
| 78 |
-
**ללא מפתחות:** המערכת תעבוד אבל התשובות יהיו פחות איכותיות.
|
| 79 |
-
|
| 80 |
-
### שלב 5: בניית אינדקס וקטורי (חובה!)
|
| 81 |
-
|
| 82 |
-
המערכת צריכה לבנות אינדקס מהקובץ `Feedback.csv`:
|
| 83 |
-
|
| 84 |
-
```bash
|
| 85 |
-
# שיטה 1: באמצעות הסקריפט
|
| 86 |
-
python scripts/precompute_index.py
|
| 87 |
-
|
| 88 |
-
# שיטה 2: דרך ה-API (אחרי הפעלת השרת)
|
| 89 |
-
# ראה שלב 6
|
| 90 |
-
```
|
| 91 |
-
|
| 92 |
-
**זמן משוער:** 2-5 דקות (תלוי בגודל הקובץ)
|
| 93 |
-
|
| 94 |
-
**מה קורה כאן?**
|
| 95 |
-
- קריאת `Feedback.csv`
|
| 96 |
-
- יצירת embeddings לכל משוב
|
| 97 |
-
- שמירת אינדקס FAISS ב-`.vector_index/`
|
| 98 |
-
|
| 99 |
-
**תוצאה:** תיקייה `.vector_index/` עם:
|
| 100 |
-
- `faiss.index` - האינדקס הווקטורי
|
| 101 |
-
- `meta.parquet` - מטא-דאטה
|
| 102 |
-
|
| 103 |
-
### שלב 6: הפעלת השרת
|
| 104 |
-
|
| 105 |
-
```bash
|
| 106 |
-
python run.py
|
| 107 |
-
```
|
| 108 |
-
|
| 109 |
-
**פלט צפוי:**
|
| 110 |
-
```
|
| 111 |
-
INFO: Started server process [12345]
|
| 112 |
-
INFO: Waiting for application startup.
|
| 113 |
-
INFO: Application startup complete.
|
| 114 |
-
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
|
| 115 |
-
```
|
| 116 |
-
|
| 117 |
-
**השרת רץ על:** `http://127.0.0.1:8000`
|
| 118 |
-
|
| 119 |
-
### שלב 7: פתיחת הממשק
|
| 120 |
-
|
| 121 |
-
פתח דפדפן וגש ל:
|
| 122 |
-
```
|
| 123 |
-
http://127.0.0.1:8000
|
| 124 |
-
```
|
| 125 |
-
|
| 126 |
-
**או:**
|
| 127 |
-
```
|
| 128 |
-
http://localhost:8000
|
| 129 |
-
```
|
| 130 |
-
|
| 131 |
-
**מה תראה:**
|
| 132 |
-
- ממשק יפה וצבעוני בעברית
|
| 133 |
-
- שדה לשאילתות
|
| 134 |
-
- היסטוריית שאלות
|
| 135 |
-
|
| 136 |
-
## 🧪 בדיקות
|
| 137 |
-
|
| 138 |
-
### בדיקה 1: בדיקת בריאות השרת
|
| 139 |
-
|
| 140 |
-
```bash
|
| 141 |
-
curl -X POST http://127.0.0.1:8000/health
|
| 142 |
-
```
|
| 143 |
-
|
| 144 |
-
**תגובה צפויה:**
|
| 145 |
-
```json
|
| 146 |
-
{"status":"ok"}
|
| 147 |
-
```
|
| 148 |
-
|
| 149 |
-
### בדיקה 2: שאילתה פשוטה
|
| 150 |
-
|
| 151 |
-
בממשק האינטרנט, נסה:
|
| 152 |
-
```
|
| 153 |
-
כמה משתמשים כתבו תודה?
|
| 154 |
-
```
|
| 155 |
-
|
| 156 |
-
**תגובה צפויה:** מספר מדויק של משובים המכילים תודה.
|
| 157 |
-
|
| 158 |
-
### בדיקה 3: שאילתה מורכבת
|
| 159 |
-
|
| 160 |
-
```
|
| 161 |
-
מה הנושאים המרכזיים במשובים?
|
| 162 |
-
```
|
| 163 |
-
|
| 164 |
-
**תגובה צפויה:** רשימת נושאים עם הסברים.
|
| 165 |
-
|
| 166 |
-
### בדיקה 4: API ישירות
|
| 167 |
-
|
| 168 |
-
```bash
|
| 169 |
-
curl -X POST http://127.0.0.1:8000/query \
|
| 170 |
-
-H "Content-Type: application/json" \
|
| 171 |
-
-d '{"query": "כמה משתמשים מתלוננים על אלמנטים שלא עובדים", "top_k": 5}'
|
| 172 |
-
```
|
| 173 |
-
|
| 174 |
-
## 📝 דוגמאות שאילתות
|
| 175 |
-
|
| 176 |
-
### שאילתות ספירה:
|
| 177 |
-
- "כמה משתמשים כתבו תודה?"
|
| 178 |
-
- "כמה משתמשים מתלוננים על אלמנטים שלא עובדים?"
|
| 179 |
-
- "כמה משובים יש בסך הכל?"
|
| 180 |
-
|
| 181 |
-
### שאילתות ניתוח:
|
| 182 |
-
- "מה הנושאים המרכזיים במשובים?"
|
| 183 |
-
- "תסווג את התלונות ל-5 סוגים"
|
| 184 |
-
- "אילו שירותים מקבלים את הציונים הנמוכים ביותר?"
|
| 185 |
-
|
| 186 |
-
### שאילתות חיפוש:
|
| 187 |
-
- "מה המשתמשים אומרים על הטופס?"
|
| 188 |
-
- "מה הבעיות הנפוצות ביותר?"
|
| 189 |
-
|
| 190 |
-
## 🐛 פתרון בעיות
|
| 191 |
-
|
| 192 |
-
### בעיה: "Vector index not found"
|
| 193 |
-
|
| 194 |
-
**פתרון:**
|
| 195 |
-
```bash
|
| 196 |
-
python scripts/precompute_index.py
|
| 197 |
-
```
|
| 198 |
-
|
| 199 |
-
או דרך ה-API:
|
| 200 |
-
```bash
|
| 201 |
-
curl -X POST http://127.0.0.1:8000/ingest
|
| 202 |
-
```
|
| 203 |
-
|
| 204 |
-
### בעיה: "ModuleNotFoundError"
|
| 205 |
-
|
| 206 |
-
**פתרון:**
|
| 207 |
-
```bash
|
| 208 |
-
# ודא שהסביבה הוירטואלית פעילה
|
| 209 |
-
source .venv/bin/activate # macOS/Linux
|
| 210 |
-
# או
|
| 211 |
-
.venv\Scripts\activate # Windows
|
| 212 |
-
|
| 213 |
-
# התקן מחדש
|
| 214 |
-
pip install -r requirements.txt
|
| 215 |
-
```
|
| 216 |
-
|
| 217 |
-
### בעיה: השרת לא עולה
|
| 218 |
-
|
| 219 |
-
**בדוק:**
|
| 220 |
-
1. האם פורט 8000 תפוס?
|
| 221 |
-
```bash
|
| 222 |
-
# macOS/Linux
|
| 223 |
-
lsof -i :8000
|
| 224 |
-
|
| 225 |
-
# Windows
|
| 226 |
-
netstat -ano | findstr :8000
|
| 227 |
-
```
|
| 228 |
-
|
| 229 |
-
2. האם Python מותקן?
|
| 230 |
-
```bash
|
| 231 |
-
python --version
|
| 232 |
-
```
|
| 233 |
-
|
| 234 |
-
### בעיה: תשובות כלליות מדי
|
| 235 |
-
|
| 236 |
-
**פתרון:**
|
| 237 |
-
1. ודא שיש מפתח GEMINI_API_KEY ב-`.env`
|
| 238 |
-
2. ודא שהאינדקס נבנה מהנתונים העדכניים
|
| 239 |
-
3. נסה שאילתות ספציפיות יותר
|
| 240 |
-
|
| 241 |
-
### בעיה: הפרונט לא מציג תשובות
|
| 242 |
-
|
| 243 |
-
**פתרון:**
|
| 244 |
-
1. פתח את קונסול הדפדפן (F12)
|
| 245 |
-
2. בדוק אם יש שגיאות JavaScript
|
| 246 |
-
3. ודא שהשרת רץ על פורט 8000
|
| 247 |
-
4. נסה לרענן את הדף (Ctrl+R / Cmd+R)
|
| 248 |
-
|
| 249 |
-
## 📂 מבנה הפרויקט
|
| 250 |
-
|
| 251 |
-
```
|
| 252 |
-
Feedback_Analysis_RAG_Agent_runpod/
|
| 253 |
-
├── app/ # קוד האפליקציה
|
| 254 |
-
│ ├── api.py # נקודות קצה API
|
| 255 |
-
│ ├── rag_service.py # לוגיקת RAG
|
| 256 |
-
│ ├── analysis.py # ניתוח שאילתות
|
| 257 |
-
│ ├── static/ # קבצי פרונט
|
| 258 |
-
│ │ ├── index.html
|
| 259 |
-
│ │ └── app.js
|
| 260 |
-
│ └── ...
|
| 261 |
-
├── scripts/ # סקריפטים שימושיים
|
| 262 |
-
│ └── precompute_index.py
|
| 263 |
-
├── .vector_index/ # אינדקס וקטורי (נוצר אוטומטית)
|
| 264 |
-
├── Feedback.csv # נתוני המשובים
|
| 265 |
-
├── requirements.txt # תלויות Python
|
| 266 |
-
├── run.py # נקודת כניסה
|
| 267 |
-
└── README.md # תיעוד ראשי
|
| 268 |
-
```
|
| 269 |
-
|
| 270 |
-
## 🔄 עדכון הנתונים
|
| 271 |
-
|
| 272 |
-
אם עדכנת את `Feedback.csv`:
|
| 273 |
-
|
| 274 |
-
```bash
|
| 275 |
-
# מחק את האינדקס הישן
|
| 276 |
-
rm -rf .vector_index/ # macOS/Linux
|
| 277 |
-
# או
|
| 278 |
-
rmdir /s .vector_index # Windows
|
| 279 |
-
|
| 280 |
-
# בנה מחדש
|
| 281 |
-
python scripts/precompute_index.py
|
| 282 |
-
|
| 283 |
-
# הפעל מחדש את השרת
|
| 284 |
-
python run.py
|
| 285 |
-
```
|
| 286 |
-
|
| 287 |
-
## 🎯 טיפים לשימוש
|
| 288 |
-
|
| 289 |
-
1. **שאילתות ספציפיות** - תקבל תשובות טובות יותר
|
| 290 |
-
2. **השתמש בדוגמאות** - סמן "הצג דוגמאות" לראות את המקורות
|
| 291 |
-
3. **בדוק את ההיסטוריה** - כל השאלות נשמרות
|
| 292 |
-
4. **נסה שאילתות שונות** - המערכת תומכת בשאילתות מגוונות
|
| 293 |
-
|
| 294 |
-
## 📚 משאבים נוספים
|
| 295 |
-
|
| 296 |
-
- **API Documentation:** http://127.0.0.1:8000/docs
|
| 297 |
-
- **README.md** - תיעוד כללי
|
| 298 |
-
- **QUICK_START.md** - התחלה מהירה
|
| 299 |
-
|
| 300 |
-
## ❓ שאלות נפוצות
|
| 301 |
-
|
| 302 |
-
**Q: כמה זמן לוקח להריץ בפעם הראשונה?**
|
| 303 |
-
A: 10-15 דקות (הורדת מודלים + בניית אינדקס)
|
| 304 |
-
|
| 305 |
-
**Q: האם צריך GPU?**
|
| 306 |
-
A: לא, המערכת עובדת על CPU
|
| 307 |
-
|
| 308 |
-
**Q: כמה זיכרון RAM צריך?**
|
| 309 |
-
A: מינימום 4GB, מומלץ 8GB+
|
| 310 |
-
|
| 311 |
-
**Q: האם זה עובד ב-Windows?**
|
| 312 |
-
A: כן, עובד על Windows, macOS, ו-Linux
|
| 313 |
-
|
| 314 |
-
**Q: איך אני עוצר את השרת?**
|
| 315 |
-
A: לחץ Ctrl+C בטרמינל
|
| 316 |
-
|
| 317 |
-
## 🎉 סיכום
|
| 318 |
-
|
| 319 |
-
אם הגעת עד כאן, המערכת אמורה לעבוד!
|
| 320 |
-
|
| 321 |
-
**צעדים מהירים:**
|
| 322 |
-
1. `python -m venv .venv && source .venv/bin/activate`
|
| 323 |
-
2. `pip install -r requirements.txt`
|
| 324 |
-
3. `python scripts/precompute_index.py`
|
| 325 |
-
4. `python run.py`
|
| 326 |
-
5. פתח http://127.0.0.1:8000
|
| 327 |
-
|
| 328 |
-
**בהצלחה! 🚀**
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MIGRATION_TO_MAIN.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# הוראות להפיכת sqlApproach ל-main
|
| 2 |
+
|
| 3 |
+
## שלבים
|
| 4 |
+
|
| 5 |
+
### 1. שמירת השינויים הנוכחיים
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# ודא שאתה ב-branch sqlApproach
|
| 9 |
+
git branch --show-current # צריך להציג: sqlApproach
|
| 10 |
+
|
| 11 |
+
# בדוק את השינויים
|
| 12 |
+
git status
|
| 13 |
+
|
| 14 |
+
# הוסף את כל השינויים
|
| 15 |
+
git add .
|
| 16 |
+
|
| 17 |
+
# צור commit
|
| 18 |
+
git commit -m "Complete migration to SQL-based approach
|
| 19 |
+
|
| 20 |
+
- Removed all RAG-related code and files
|
| 21 |
+
- Added quality evaluation system
|
| 22 |
+
- Improved UI with blue theme
|
| 23 |
+
- Added comprehensive documentation
|
| 24 |
+
- Code cleanup and optimization"
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
### 2. מעבר ל-main
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
# עבור ל-main
|
| 31 |
+
git checkout main
|
| 32 |
+
|
| 33 |
+
# מיזוג את sqlApproach ל-main
|
| 34 |
+
git merge sqlApproach
|
| 35 |
+
|
| 36 |
+
# או אם אתה רוצה להחליף את main לחלוטין:
|
| 37 |
+
# git reset --hard sqlApproach
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
### 3. עדכון remote (אם יש)
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
# דחוף את השינויים
|
| 44 |
+
git push origin main
|
| 45 |
+
|
| 46 |
+
# מחק את ה-branch הישן (אופציונלי)
|
| 47 |
+
git branch -d sqlApproach
|
| 48 |
+
git push origin --delete sqlApproach
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## הערות
|
| 52 |
+
|
| 53 |
+
- **גיבוי**: לפני המיזוג, ודא שיש לך גיבוי של main הישן (אם יש)
|
| 54 |
+
- **בדיקה**: אחרי המיזוג, בדוק שהכל עובד:
|
| 55 |
+
```bash
|
| 56 |
+
python run.py
|
| 57 |
+
# פתח http://127.0.0.1:8000 ובדוק שהכל עובד
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
## מה השתנה
|
| 61 |
+
|
| 62 |
+
- ✅ כל הקוד הקשור ל-RAG נמחק
|
| 63 |
+
- ✅ המערכת עכשיו מבוססת SQL בלבד
|
| 64 |
+
- ✅ נוספה מערכת בדיקת איכות אוטומטית
|
| 65 |
+
- ✅ UI עודכן עם רקע כחול
|
| 66 |
+
- ✅ נוספו לינקים ל-GitHub וקורות חיים
|
| 67 |
+
- ✅ כל הקוד מתועד ומתועד היטב
|
| 68 |
+
|
PROJECT_COMPLETE.md
DELETED
|
@@ -1,484 +0,0 @@
|
|
| 1 |
-
# ✅ PROJECT COMPLETION SUMMARY
|
| 2 |
-
|
| 3 |
-
**Date:** November 12, 2025
|
| 4 |
-
**Status:** ✨ **100% COMPLETE - PRODUCTION READY** ✨
|
| 5 |
-
|
| 6 |
-
---
|
| 7 |
-
|
| 8 |
-
## 🎯 Mission Statement
|
| 9 |
-
|
| 10 |
-
Build a **Feedback Analysis RAG Agent** that:
|
| 11 |
-
1. ✅ Answers diverse question types (counting, searching, analysis)
|
| 12 |
-
2. ✅ Detects user intent automatically
|
| 13 |
-
3. ✅ Supports Hebrew queries natively
|
| 14 |
-
4. ✅ Works locally for development
|
| 15 |
-
5. ✅ Deploys to Runpod for production
|
| 16 |
-
6. ✅ Includes comprehensive documentation
|
| 17 |
-
|
| 18 |
-
**Status:** ALL OBJECTIVES ACHIEVED ✅
|
| 19 |
-
|
| 20 |
-
---
|
| 21 |
-
|
| 22 |
-
## 📦 Deliverables Checklist
|
| 23 |
-
|
| 24 |
-
### Core System (Complete)
|
| 25 |
-
- [x] FastAPI server with 5 endpoints (all POST)
|
| 26 |
-
- [x] RAG pipeline with intent detection
|
| 27 |
-
- [x] FAISS vector search (14.5 MB index)
|
| 28 |
-
- [x] Multi-language support (Hebrew + English)
|
| 29 |
-
- [x] Query counting logic (1168 thanks verified)
|
| 30 |
-
- [x] Topic extraction (k-means clustering)
|
| 31 |
-
- [x] Sentiment analysis (multilingual)
|
| 32 |
-
- [x] Error handling and validation
|
| 33 |
-
- [x] Free-form RAG synthesizer (analyst-style, broader-context responses)
|
| 34 |
-
|
| 35 |
-
### Infrastructure (Complete)
|
| 36 |
-
- [x] Virtual environment setup (.venv)
|
| 37 |
-
- [x] Dependencies installed and locked (requirements.txt)
|
| 38 |
-
- [x] Environment configuration (.env.example)
|
| 39 |
-
- [x] Docker containerization (Dockerfile)
|
| 40 |
-
- [x] Server entrypoint (run.py)
|
| 41 |
-
- [x] FAISS index precomputed and optimized
|
| 42 |
-
|
| 43 |
-
### Testing & Validation (Complete)
|
| 44 |
-
- [x] 7-check validation harness (validate_local.py) - **ALL PASS ✅**
|
| 45 |
-
- [x] Unit tests for all components
|
| 46 |
-
- [x] Integration tests for RAG pipeline
|
| 47 |
-
- [x] End-to-end API endpoint testing
|
| 48 |
-
- [x] Performance benchmarking
|
| 49 |
-
- [x] Error scenario handling
|
| 50 |
-
|
| 51 |
-
### Documentation (Complete)
|
| 52 |
-
- [x] GETTING_STARTED.txt - Visual quick guide
|
| 53 |
-
- [x] README_TESTING_GUIDE.md - Master navigation guide
|
| 54 |
-
- [x] QUICK_START.md - 5-step setup
|
| 55 |
-
- [x] TESTING_CHECKLIST.md - 15-point validation
|
| 56 |
-
- [x] DEPLOYMENT_GUIDE.md - Runpod deployment
|
| 57 |
-
- [x] SESSION_SUMMARY.md - Architecture overview
|
| 58 |
-
- [x] STATUS_REPORT.md - Project status
|
| 59 |
-
- [x] CONTRIBUTING.md - Development workflow
|
| 60 |
-
|
| 61 |
-
### Code Quality (Complete)
|
| 62 |
-
- [x] All Python files documented (docstrings)
|
| 63 |
-
- [x] Type hints throughout (Pydantic models)
|
| 64 |
-
- [x] Error handling with try/except
|
| 65 |
-
- [x] Clear variable names and logic
|
| 66 |
-
- [x] No syntax errors (validated)
|
| 67 |
-
- [x] No import errors (validated)
|
| 68 |
-
|
| 69 |
-
---
|
| 70 |
-
|
| 71 |
-
## 🧪 Validation Results
|
| 72 |
-
|
| 73 |
-
### Last Validation Run
|
| 74 |
-
```
|
| 75 |
-
Date: November 12, 2025
|
| 76 |
-
Time: ~2 minutes
|
| 77 |
-
Command: python3 scripts/validate_local.py
|
| 78 |
-
Status: ✅ ALL 7 CHECKS PASSED
|
| 79 |
-
```
|
| 80 |
-
|
| 81 |
-
**Results:**
|
| 82 |
-
```
|
| 83 |
-
[PASS] ✅ Dependencies - 26/26 packages ready
|
| 84 |
-
[PASS] ✅ CSV file - 9930 rows verified
|
| 85 |
-
[PASS] ✅ FAISS Index - 14.5 MB ready
|
| 86 |
-
[PASS] ✅ App imports - No errors
|
| 87 |
-
[PASS] ✅ Analysis logic - Counts verified
|
| 88 |
-
[PASS] ✅ RAGService - Working correctly
|
| 89 |
-
[PASS] ✅ API endpoints - All responding
|
| 90 |
-
|
| 91 |
-
Status: PRODUCTION READY ✅
|
| 92 |
-
```
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
## 🚀 What's Working
|
| 97 |
-
|
| 98 |
-
### Query Types (ALL VERIFIED)
|
| 99 |
-
- ✅ Count thank-yous: 1168 (from "כמה משתמשים כתבו תודה")
|
| 100 |
-
- ✅ Count complaints: 352 (from complaint keywords)
|
| 101 |
-
- ✅ Keyword search: Works in Hebrew and English
|
| 102 |
-
- ✅ Semantic search: Embeddings + FAISS working
|
| 103 |
-
- ✅ Free-form RAG: LLM summarization functional
|
| 104 |
-
|
| 105 |
-
### Multi-Language (VERIFIED)
|
| 106 |
-
- ✅ Hebrew queries → Hebrew responses
|
| 107 |
-
- ✅ English queries → English responses
|
| 108 |
-
- ✅ Auto-language detection working
|
| 109 |
-
- ✅ Text encoding correct (no corruption)
|
| 110 |
-
|
| 111 |
-
### API Endpoints (ALL TESTED)
|
| 112 |
-
- ✅ `/health` - Status check (working)
|
| 113 |
-
- ✅ `/query` - Main RAG endpoint (working)
|
| 114 |
-
- ✅ `/topics` - Topic extraction (working)
|
| 115 |
-
- ✅ `/sentiment` - Sentiment analysis (working)
|
| 116 |
-
- ✅ `/ingest` - Index rebuilding (working)
|
| 117 |
-
- ✅ `/docs` - Swagger UI (working)
|
| 118 |
-
- ✅ `/redoc` - ReDoc UI (working)
|
| 119 |
-
|
| 120 |
-
### Performance (VERIFIED)
|
| 121 |
-
- ✅ Health check: <10ms
|
| 122 |
-
- ✅ Query: 1-3 seconds
|
| 123 |
-
- ✅ Sentiment: 5-15 seconds per 100 records
|
| 124 |
-
- ✅ Index build: 30-60 seconds
|
| 125 |
-
- ✅ Scalability: Ready for load
|
| 126 |
-
|
| 127 |
-
### Quality Metrics (VERIFIED)
|
| 128 |
-
- ✅ Code coverage: 100% (all paths tested)
|
| 129 |
-
- ✅ Error handling: Complete
|
| 130 |
-
- ✅ Documentation: Comprehensive
|
| 131 |
-
- ✅ Performance: Acceptable
|
| 132 |
-
- ✅ Reliability: Stable
|
| 133 |
-
|
| 134 |
-
---
|
| 135 |
-
|
| 136 |
-
## 📊 Project Statistics
|
| 137 |
-
|
| 138 |
-
```
|
| 139 |
-
Code
|
| 140 |
-
├─ Python files: 15 (app/ + scripts/)
|
| 141 |
-
├─ Lines of code: ~2000
|
| 142 |
-
├─ Functions/Classes: ~50
|
| 143 |
-
├─ Type hints: 100%
|
| 144 |
-
└─ Docstrings: 100%
|
| 145 |
-
|
| 146 |
-
Documentation
|
| 147 |
-
├─ Markdown files: 8
|
| 148 |
-
├─ Documentation lines: 2500+
|
| 149 |
-
├─ Code examples: 30+
|
| 150 |
-
└─ Troubleshooting entries: 15+
|
| 151 |
-
|
| 152 |
-
Testing
|
| 153 |
-
├─ Validation checks: 7/7 PASS
|
| 154 |
-
├─ API endpoints: 5/5 PASS
|
| 155 |
-
├─ Test scenarios: 15/15 PASS
|
| 156 |
-
└─ Coverage: 100%
|
| 157 |
-
|
| 158 |
-
Data
|
| 159 |
-
├─ Feedback records: 9930
|
| 160 |
-
├─ Indexed records: 9930
|
| 161 |
-
├─ Unique services: 100+
|
| 162 |
-
├─ FAISS index: 14.5 MB
|
| 163 |
-
└─ Metadata: 450 KB
|
| 164 |
-
```
|
| 165 |
-
|
| 166 |
-
---
|
| 167 |
-
|
| 168 |
-
## 🎓 What You Can Do Now
|
| 169 |
-
|
| 170 |
-
### Immediate (Today)
|
| 171 |
-
1. **Read** GETTING_STARTED.txt (5 minutes)
|
| 172 |
-
2. **Run** validation: `python3 scripts/validate_local.py`
|
| 173 |
-
3. **Start** server: `python3 run.py`
|
| 174 |
-
4. **Test** endpoint: http://localhost:8000/docs
|
| 175 |
-
|
| 176 |
-
### Short-term (This Week)
|
| 177 |
-
1. Follow TESTING_CHECKLIST.md (15 tests, 45 min)
|
| 178 |
-
2. Verify all features work
|
| 179 |
-
3. Test different query types
|
| 180 |
-
4. Try in Hebrew and English
|
| 181 |
-
|
| 182 |
-
### Medium-term (When Ready)
|
| 183 |
-
1. Follow DEPLOYMENT_GUIDE.md
|
| 184 |
-
2. Build Docker image
|
| 185 |
-
3. Deploy to Runpod
|
| 186 |
-
4. Test cloud endpoint
|
| 187 |
-
5. Share with users
|
| 188 |
-
|
| 189 |
-
---
|
| 190 |
-
|
| 191 |
-
## 📁 File Structure
|
| 192 |
-
|
| 193 |
-
```
|
| 194 |
-
Feedback_Analysis_RAG_Agent_runpod/
|
| 195 |
-
│
|
| 196 |
-
├── 📄 GETTING_STARTED.txt 👈 START HERE
|
| 197 |
-
├── 📄 README_TESTING_GUIDE.md (Master guide)
|
| 198 |
-
├── 📄 QUICK_START.md (Setup guide)
|
| 199 |
-
├── 📄 TESTING_CHECKLIST.md (15 tests)
|
| 200 |
-
├── 📄 DEPLOYMENT_GUIDE.md (Runpod setup)
|
| 201 |
-
├── 📄 SESSION_SUMMARY.md (Architecture)
|
| 202 |
-
├── 📄 STATUS_REPORT.md (Project status)
|
| 203 |
-
├── 📄 CONTRIBUTING.md (Dev workflow)
|
| 204 |
-
│
|
| 205 |
-
├── 🐍 run.py (Server start)
|
| 206 |
-
├── 📦 requirements.txt (Dependencies)
|
| 207 |
-
├── 🔧 Dockerfile (Containerization)
|
| 208 |
-
├── 📋 .env.example (Config template)
|
| 209 |
-
│
|
| 210 |
-
├── 📂 app/ (Core system)
|
| 211 |
-
│ ├── api.py (FastAPI endpoints)
|
| 212 |
-
│ ├── rag_service.py (RAG pipeline)
|
| 213 |
-
│ ├── analysis.py (Intent detection)
|
| 214 |
-
│ ├── embedding.py (Vector encoding)
|
| 215 |
-
│ ├── vector_store.py (FAISS wrapper)
|
| 216 |
-
│ ├── sentiment.py (Sentiment analysis)
|
| 217 |
-
│ ├── topics.py (Topic extraction)
|
| 218 |
-
│ ├── preprocess.py (Text processing)
|
| 219 |
-
│ ├── data_loader.py (CSV loading)
|
| 220 |
-
│ ├── config.py (Configuration)
|
| 221 |
-
│ └── __init__.py
|
| 222 |
-
│
|
| 223 |
-
├── 📂 scripts/ (Utilities)
|
| 224 |
-
│ ├── validate_local.py (7-check validation)
|
| 225 |
-
│ ├── precompute_index.py (Build index)
|
| 226 |
-
│ └── test_queries.py (Test queries)
|
| 227 |
-
│
|
| 228 |
-
├── 📂 .vector_index/ (Precomputed index)
|
| 229 |
-
│ ├── faiss.index (14.5 MB)
|
| 230 |
-
│ └── meta.parquet (450 KB)
|
| 231 |
-
│
|
| 232 |
-
├── 📂 .venv/ (Virtual environment)
|
| 233 |
-
│ └── (26 dependencies installed)
|
| 234 |
-
│
|
| 235 |
-
└── 📄 Feedback.csv (9930 records)
|
| 236 |
-
```
|
| 237 |
-
|
| 238 |
-
---
|
| 239 |
-
|
| 240 |
-
## ✅ Validation Proof Points
|
| 241 |
-
|
| 242 |
-
### Testing Infrastructure
|
| 243 |
-
- ✅ Full validation harness (validate_local.py)
|
| 244 |
-
- ✅ 7 comprehensive checks
|
| 245 |
-
- ✅ All checks passing
|
| 246 |
-
- ✅ Executes in ~2 minutes
|
| 247 |
-
|
| 248 |
-
### API Functionality
|
| 249 |
-
- ✅ All 5 endpoints respond
|
| 250 |
-
- ✅ JSON serialization working
|
| 251 |
-
- ✅ Error handling in place
|
| 252 |
-
- ✅ Swagger UI accessible
|
| 253 |
-
|
| 254 |
-
### Data Integrity
|
| 255 |
-
- ✅ CSV validates (9930 rows)
|
| 256 |
-
- ✅ FAISS index valid (14.5 MB)
|
| 257 |
-
- ✅ Metadata complete (450 KB)
|
| 258 |
-
- ✅ No data loss
|
| 259 |
-
|
| 260 |
-
### Accuracy Verification
|
| 261 |
-
- ✅ Thank-yous: 1168 (matches CSV)
|
| 262 |
-
- ✅ Complaints: 352 (matches CSV)
|
| 263 |
-
- ✅ Total: 9930 (complete)
|
| 264 |
-
- ✅ Language detection: Working
|
| 265 |
-
|
| 266 |
-
### Performance Verification
|
| 267 |
-
- ✅ Health: <10ms (excellent)
|
| 268 |
-
- ✅ Query: 1-3s (good)
|
| 269 |
-
- ✅ Load handling: Verified
|
| 270 |
-
- ✅ Memory: Efficient
|
| 271 |
-
|
| 272 |
-
---
|
| 273 |
-
|
| 274 |
-
## 🎯 Quality Assurance Checklist
|
| 275 |
-
|
| 276 |
-
### Code Quality
|
| 277 |
-
- [x] No syntax errors
|
| 278 |
-
- [x] No import errors
|
| 279 |
-
- [x] Type hints present
|
| 280 |
-
- [x] Docstrings complete
|
| 281 |
-
- [x] Error handling comprehensive
|
| 282 |
-
- [x] Logging implemented
|
| 283 |
-
|
| 284 |
-
### Testing
|
| 285 |
-
- [x] Unit tests passing
|
| 286 |
-
- [x] Integration tests passing
|
| 287 |
-
- [x] End-to-end tests passing
|
| 288 |
-
- [x] Performance acceptable
|
| 289 |
-
- [x] Error scenarios handled
|
| 290 |
-
- [x] Coverage complete
|
| 291 |
-
|
| 292 |
-
### Documentation
|
| 293 |
-
- [x] User guides complete
|
| 294 |
-
- [x] Technical docs complete
|
| 295 |
-
- [x] Code comments clear
|
| 296 |
-
- [x] Examples provided
|
| 297 |
-
- [x] Troubleshooting included
|
| 298 |
-
- [x] Navigation clear
|
| 299 |
-
|
| 300 |
-
### Deployment
|
| 301 |
-
- [x] Local setup works
|
| 302 |
-
- [x] Docker builds
|
| 303 |
-
- [x] Runpod ready
|
| 304 |
-
- [x] Environment config
|
| 305 |
-
- [x] No data conflicts
|
| 306 |
-
- [x] Cloud path preserved
|
| 307 |
-
|
| 308 |
-
---
|
| 309 |
-
|
| 310 |
-
## 🚀 Launch Readiness
|
| 311 |
-
|
| 312 |
-
### Green Lights (All Systems Go)
|
| 313 |
-
✅ Code complete and tested
|
| 314 |
-
✅ All validation checks passing
|
| 315 |
-
✅ Documentation comprehensive
|
| 316 |
-
✅ Local setup verified
|
| 317 |
-
✅ Docker image ready
|
| 318 |
-
✅ Runpod deployment documented
|
| 319 |
-
✅ Performance acceptable
|
| 320 |
-
✅ Security reviewed
|
| 321 |
-
✅ Scalability planned
|
| 322 |
-
✅ Backup strategy included
|
| 323 |
-
|
| 324 |
-
### No Blockers
|
| 325 |
-
✅ No critical bugs
|
| 326 |
-
✅ No missing features
|
| 327 |
-
✅ No data issues
|
| 328 |
-
✅ No configuration problems
|
| 329 |
-
✅ No deployment obstacles
|
| 330 |
-
|
| 331 |
-
### Status: READY FOR PRODUCTION ✅
|
| 332 |
-
|
| 333 |
-
---
|
| 334 |
-
|
| 335 |
-
## 🎉 Next Steps for You
|
| 336 |
-
|
| 337 |
-
### Step 1: Review (5 minutes)
|
| 338 |
-
- Open: GETTING_STARTED.txt
|
| 339 |
-
- Skim: README_TESTING_GUIDE.md
|
| 340 |
-
- Understand: What you have and what you can do
|
| 341 |
-
|
| 342 |
-
### Step 2: Verify (10 minutes)
|
| 343 |
-
```bash
|
| 344 |
-
source .venv/bin/activate
|
| 345 |
-
python3 scripts/validate_local.py
|
| 346 |
-
python3 run.py
|
| 347 |
-
# Open http://localhost:8000/docs
|
| 348 |
-
```
|
| 349 |
-
|
| 350 |
-
### Step 3: Test (45 minutes)
|
| 351 |
-
- Follow: TESTING_CHECKLIST.md
|
| 352 |
-
- Run: All 15 test scenarios
|
| 353 |
-
- Verify: Everything works
|
| 354 |
-
|
| 355 |
-
### Step 4: Deploy (2 hours, optional)
|
| 356 |
-
- Read: DEPLOYMENT_GUIDE.md
|
| 357 |
-
- Build: Docker image
|
| 358 |
-
- Deploy: To Runpod
|
| 359 |
-
- Test: Cloud endpoint
|
| 360 |
-
|
| 361 |
-
---
|
| 362 |
-
|
| 363 |
-
## 📞 Quick Help
|
| 364 |
-
|
| 365 |
-
**Where do I start?**
|
| 366 |
-
→ GETTING_STARTED.txt (this directory)
|
| 367 |
-
|
| 368 |
-
**How do I set up locally?**
|
| 369 |
-
→ QUICK_START.md (5-step guide)
|
| 370 |
-
|
| 371 |
-
**How do I test everything?**
|
| 372 |
-
→ TESTING_CHECKLIST.md (15 tests)
|
| 373 |
-
|
| 374 |
-
**How do I deploy to cloud?**
|
| 375 |
-
→ DEPLOYMENT_GUIDE.md (Runpod instructions)
|
| 376 |
-
|
| 377 |
-
**Why did something fail?**
|
| 378 |
-
→ Check troubleshooting sections in relevant guide
|
| 379 |
-
|
| 380 |
-
**Can I modify the code?**
|
| 381 |
-
→ Yes, see CONTRIBUTING.md for workflow
|
| 382 |
-
|
| 383 |
-
---
|
| 384 |
-
|
| 385 |
-
## 📈 Success Metrics
|
| 386 |
-
|
| 387 |
-
| Metric | Target | Achieved | Status |
|
| 388 |
-
|--------|--------|----------|--------|
|
| 389 |
-
| Code complete | 100% | 100% | ✅ |
|
| 390 |
-
| Tests passing | 100% | 100% | ✅ |
|
| 391 |
-
| Documentation | Complete | 2500+ lines | ✅ |
|
| 392 |
-
| API endpoints | 5/5 working | 5/5 | ✅ |
|
| 393 |
-
| Validation checks | 7/7 pass | 7/7 | ✅ |
|
| 394 |
-
| Performance | <5s queries | 1-3s | ✅ |
|
| 395 |
-
| Accuracy | Verified | 1168/352 | ✅ |
|
| 396 |
-
| Deployment ready | Yes | Yes | ✅ |
|
| 397 |
-
|
| 398 |
-
---
|
| 399 |
-
|
| 400 |
-
## 🏆 Project Excellence
|
| 401 |
-
|
| 402 |
-
### What Makes This Project Great
|
| 403 |
-
|
| 404 |
-
**Completeness**
|
| 405 |
-
- Everything you need is included
|
| 406 |
-
- No missing dependencies
|
| 407 |
-
- No broken functionality
|
| 408 |
-
- Production-ready code
|
| 409 |
-
|
| 410 |
-
**Documentation**
|
| 411 |
-
- 8 comprehensive guides
|
| 412 |
-
- 2500+ lines of docs
|
| 413 |
-
- Clear navigation
|
| 414 |
-
- Multiple entry points
|
| 415 |
-
|
| 416 |
-
**Testing**
|
| 417 |
-
- 7-check validation
|
| 418 |
-
- 15-point test suite
|
| 419 |
-
- 100% coverage
|
| 420 |
-
- All scenarios verified
|
| 421 |
-
|
| 422 |
-
**Quality**
|
| 423 |
-
- Type hints throughout
|
| 424 |
-
- Full docstrings
|
| 425 |
-
- Error handling
|
| 426 |
-
- Clean code
|
| 427 |
-
|
| 428 |
-
**Deployment**
|
| 429 |
-
- Local setup simple
|
| 430 |
-
- Docker ready
|
| 431 |
-
- Runpod instructions
|
| 432 |
-
- Cloud-ready code
|
| 433 |
-
|
| 434 |
-
---
|
| 435 |
-
|
| 436 |
-
## 📝 Final Checklist
|
| 437 |
-
|
| 438 |
-
Before you start testing:
|
| 439 |
-
|
| 440 |
-
- [x] All code complete
|
| 441 |
-
- [x] All tests passing
|
| 442 |
-
- [x] All documentation written
|
| 443 |
-
- [x] All validation checks passing
|
| 444 |
-
- [x] Environment configured
|
| 445 |
-
- [x] Dependencies installed
|
| 446 |
-
- [x] Index precomputed
|
| 447 |
-
- [x] Docker ready
|
| 448 |
-
- [x] Runpod guide complete
|
| 449 |
-
- [x] No blockers or issues
|
| 450 |
-
|
| 451 |
-
**Status: READY FOR YOUR TESTING ✅**
|
| 452 |
-
|
| 453 |
-
---
|
| 454 |
-
|
| 455 |
-
## 🎓 Remember
|
| 456 |
-
|
| 457 |
-
This is a **production-ready system**. Everything works:
|
| 458 |
-
|
| 459 |
-
✅ **Locally** - Just run `python3 run.py`
|
| 460 |
-
✅ **In Docker** - Build and run container
|
| 461 |
-
✅ **In Cloud** - Runpod deployment ready
|
| 462 |
-
|
| 463 |
-
You can start testing immediately!
|
| 464 |
-
|
| 465 |
-
---
|
| 466 |
-
|
| 467 |
-
## 🌟 Thank You!
|
| 468 |
-
|
| 469 |
-
Your Feedback Analysis RAG Agent is complete, tested, and ready to use.
|
| 470 |
-
|
| 471 |
-
**Now:** Start with GETTING_STARTED.txt
|
| 472 |
-
**Then:** Follow the guide that matches your role
|
| 473 |
-
**Soon:** You'll have a working, deployed system
|
| 474 |
-
|
| 475 |
-
Good luck! 🚀
|
| 476 |
-
|
| 477 |
-
---
|
| 478 |
-
|
| 479 |
-
**Project Status:** ✨ **100% COMPLETE** ✨
|
| 480 |
-
**Ready:** YES ✅
|
| 481 |
-
**Production:** YES ✅
|
| 482 |
-
**Date:** November 12, 2025
|
| 483 |
-
**Version:** 1.0
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
QUICK_START.md
DELETED
|
@@ -1,289 +0,0 @@
|
|
| 1 |
-
# Quick Start - Local Development Guide
|
| 2 |
-
|
| 3 |
-
This guide shows you how to run the Feedback Analysis RAG Agent locally, test all endpoints, and prepare it for Runpod deployment. Everything works locally first before any cloud deployment.
|
| 4 |
-
|
| 5 |
-
## Prerequisites
|
| 6 |
-
|
| 7 |
-
- **Python 3.10+** (verify with `python3 --version`)
|
| 8 |
-
- **Git** (already installed)
|
| 9 |
-
- **Terminal/Command line** access
|
| 10 |
-
- **4GB+ RAM** recommended
|
| 11 |
-
- **~2GB free disk space** for models (first time only)
|
| 12 |
-
|
| 13 |
-
## Step 1: Install Dependencies
|
| 14 |
-
|
| 15 |
-
Clone the repo (if not already done):
|
| 16 |
-
```bash
|
| 17 |
-
git clone https://github.com/galbendavids/Feedback_Analysis_RAG_Agent_runpod.git
|
| 18 |
-
cd Feedback_Analysis_RAG_Agent_runpod
|
| 19 |
-
```
|
| 20 |
-
|
| 21 |
-
Create and activate virtual environment:
|
| 22 |
-
```bash
|
| 23 |
-
python3 -m venv .venv
|
| 24 |
-
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 25 |
-
```
|
| 26 |
-
|
| 27 |
-
Install all required packages:
|
| 28 |
-
```bash
|
| 29 |
-
pip install --upgrade pip
|
| 30 |
-
pip install -r requirements.txt
|
| 31 |
-
```
|
| 32 |
-
|
| 33 |
-
**Note:** First install may take 5-10 minutes as models are large. Subsequent installs are faster.
|
| 34 |
-
|
| 35 |
-
## Step 2: Prepare Environment Variables (Optional)
|
| 36 |
-
|
| 37 |
-
Copy the example environment file:
|
| 38 |
-
```bash
|
| 39 |
-
cp .env.example .env
|
| 40 |
-
```
|
| 41 |
-
|
| 42 |
-
Edit `.env` if you have LLM API keys (optional):
|
| 43 |
-
```bash
|
| 44 |
-
# Edit .env with your editor
|
| 45 |
-
GEMINI_API_KEY=your_key_here # Optional
|
| 46 |
-
OPENAI_API_KEY=sk-... # Optional
|
| 47 |
-
```
|
| 48 |
-
|
| 49 |
-
If you don't have API keys, the system will use extractive summaries (still works fine).
|
| 50 |
-
|
| 51 |
-
## Step 3: Validate Everything Works
|
| 52 |
-
|
| 53 |
-
Before starting the server, run the validation harness (this checks all components):
|
| 54 |
-
```bash
|
| 55 |
-
python3 scripts/validate_local.py
|
| 56 |
-
```
|
| 57 |
-
|
| 58 |
-
Expected output when all is OK:
|
| 59 |
-
```
|
| 60 |
-
============================================================
|
| 61 |
-
VALIDATION SUMMARY
|
| 62 |
-
============================================================
|
| 63 |
-
|
| 64 |
-
[PASS] Dependencies
|
| 65 |
-
[PASS] CSV file
|
| 66 |
-
[PASS] FAISS Index
|
| 67 |
-
[PASS] App imports
|
| 68 |
-
[PASS] Analysis logic
|
| 69 |
-
[PASS] RAGService
|
| 70 |
-
[PASS] API endpoints
|
| 71 |
-
|
| 72 |
-
------------------------------------------------------------
|
| 73 |
-
All 7 checks PASSED! Ready for local testing.
|
| 74 |
-
```
|
| 75 |
-
|
| 76 |
-
If any checks fail, the script will tell you exactly what to fix.
|
| 77 |
-
|
| 78 |
-
## Step 4: Start the Local Server
|
| 79 |
-
|
| 80 |
-
Run the API server:
|
| 81 |
-
```bash
|
| 82 |
-
python3 run.py
|
| 83 |
-
```
|
| 84 |
-
|
| 85 |
-
Expected output:
|
| 86 |
-
```
|
| 87 |
-
INFO: Uvicorn running on http://0.0.0.0:8000
|
| 88 |
-
Press CTRL+C to quit
|
| 89 |
-
```
|
| 90 |
-
|
| 91 |
-
The server is now running and ready to accept requests!
|
| 92 |
-
|
| 93 |
-
## Step 5: Test the API - Three Options
|
| 94 |
-
|
| 95 |
-
### Option A: Interactive Swagger UI (Easiest)
|
| 96 |
-
|
| 97 |
-
Open your browser:
|
| 98 |
-
- http://localhost:8000/docs
|
| 99 |
-
|
| 100 |
-
Click on any endpoint, fill in the JSON, and click "Try it out". You'll see responses in real-time.
|
| 101 |
-
|
| 102 |
-
### Option B: curl Commands (Terminal)
|
| 103 |
-
|
| 104 |
-
In a new terminal window (keep server running), try these:
|
| 105 |
-
|
| 106 |
-
**Health check:**
|
| 107 |
-
```bash
|
| 108 |
-
curl -X POST http://localhost:8000/health
|
| 109 |
-
```
|
| 110 |
-
|
| 111 |
-
**Count query (עברית):**
|
| 112 |
-
```bash
|
| 113 |
-
curl -X POST http://localhost:8000/query \
|
| 114 |
-
-H "Content-Type: application/json" \
|
| 115 |
-
-d '{"query":"כמה משתמשים כתבו תודה","top_k":5}'
|
| 116 |
-
```
|
| 117 |
-
|
| 118 |
-
**Complaint query:**
|
| 119 |
-
```bash
|
| 120 |
-
curl -X POST http://localhost:8000/query \
|
| 121 |
-
-H "Content-Type: application/json" \
|
| 122 |
-
-d '{"query":"כמה משתמשים מתלוננים על אלמנטים שלא עובדים להם במערכת","top_k":5}'
|
| 123 |
-
```
|
| 124 |
-
|
| 125 |
-
**Extract topics:**
|
| 126 |
-
```bash
|
| 127 |
-
curl -X POST http://localhost:8000/topics \
|
| 128 |
-
-H "Content-Type: application/json" \
|
| 129 |
-
-d '{"num_topics":5}'
|
| 130 |
-
```
|
| 131 |
-
|
| 132 |
-
**Analyze sentiment:**
|
| 133 |
-
```bash
|
| 134 |
-
curl -X POST http://localhost:8000/sentiment \
|
| 135 |
-
-H "Content-Type: application/json" \
|
| 136 |
-
-d '{"limit":100}'
|
| 137 |
-
```
|
| 138 |
-
|
| 139 |
-
**Build/rebuild index:**
|
| 140 |
-
```bash
|
| 141 |
-
curl -X POST http://localhost:8000/ingest
|
| 142 |
-
```
|
| 143 |
-
|
| 144 |
-
### Option C: Python Client
|
| 145 |
-
|
| 146 |
-
Create a file `test_api.py`:
|
| 147 |
-
```python
|
| 148 |
-
import requests
|
| 149 |
-
import json
|
| 150 |
-
|
| 151 |
-
BASE_URL = "http://localhost:8000"
|
| 152 |
-
|
| 153 |
-
# Test health
|
| 154 |
-
print("Testing /health...")
|
| 155 |
-
resp = requests.post(f"{BASE_URL}/health")
|
| 156 |
-
print(f"Status: {resp.status_code}")
|
| 157 |
-
print(f"Response: {resp.json()}\n")
|
| 158 |
-
|
| 159 |
-
# Test query
|
| 160 |
-
print("Testing /query...")
|
| 161 |
-
query_data = {
|
| 162 |
-
"query": "כמה משתמשים כתבו תודה",
|
| 163 |
-
"top_k": 5
|
| 164 |
-
}
|
| 165 |
-
resp = requests.post(f"{BASE_URL}/query", json=query_data)
|
| 166 |
-
print(f"Status: {resp.status_code}")
|
| 167 |
-
result = resp.json()
|
| 168 |
-
print(f"Summary: {result.get('summary', 'N/A')}\n")
|
| 169 |
-
|
| 170 |
-
# Test topics
|
| 171 |
-
print("Testing /topics...")
|
| 172 |
-
topics_data = {"num_topics": 5}
|
| 173 |
-
resp = requests.post(f"{BASE_URL}/topics", json=topics_data)
|
| 174 |
-
print(f"Status: {resp.status_code}")
|
| 175 |
-
result = resp.json()
|
| 176 |
-
print(f"Found {len(result.get('topics', {}))} topics\n")
|
| 177 |
-
|
| 178 |
-
print("✓ All basic tests completed!")
|
| 179 |
-
```
|
| 180 |
-
|
| 181 |
-
Run it:
|
| 182 |
-
```bash
|
| 183 |
-
python3 test_api.py
|
| 184 |
-
```
|
| 185 |
-
|
| 186 |
-
## API Endpoints Reference
|
| 187 |
-
|
| 188 |
-
All endpoints use **POST** with JSON bodies:
|
| 189 |
-
|
| 190 |
-
| Endpoint | Body | Purpose |
|
| 191 |
-
|----------|------|---------|
|
| 192 |
-
| `/health` | `{}` | Check server status |
|
| 193 |
-
| `/query` | `{"query":"...", "top_k":5}` | Search/analyze feedback |
|
| 194 |
-
| `/topics` | `{"num_topics":5}` | Extract main topics |
|
| 195 |
-
| `/sentiment` | `{"limit":100}` | Analyze sentiment |
|
| 196 |
-
| `/ingest` | `{}` | Rebuild FAISS index (slow, one-time) |
|
| 197 |
-
|
| 198 |
-
## Troubleshooting
|
| 199 |
-
|
| 200 |
-
### Q: Server won't start
|
| 201 |
-
```
|
| 202 |
-
ModuleNotFoundError: No module named 'xxx'
|
| 203 |
-
```
|
| 204 |
-
**Fix:** Activate venv and reinstall:
|
| 205 |
-
```bash
|
| 206 |
-
source .venv/bin/activate
|
| 207 |
-
pip install -r requirements.txt
|
| 208 |
-
```
|
| 209 |
-
|
| 210 |
-
### Q: First request takes forever
|
| 211 |
-
This is normal! The first request downloads and caches embedding models (~500MB). Subsequent requests are fast.
|
| 212 |
-
**Fix:** Just wait, or use pre-downloaded models (see advanced section).
|
| 213 |
-
|
| 214 |
-
### Q: Can't find index
|
| 215 |
-
```
|
| 216 |
-
FileNotFoundError: Vector index not found
|
| 217 |
-
```
|
| 218 |
-
**Fix:** Run `/ingest` once:
|
| 219 |
-
```bash
|
| 220 |
-
curl -X POST http://localhost:8000/ingest
|
| 221 |
-
```
|
| 222 |
-
|
| 223 |
-
### Q: Get JSON parsing error
|
| 224 |
-
Make sure you're sending proper JSON with `-H "Content-Type: application/json"`.
|
| 225 |
-
|
| 226 |
-
### Q: Responses are in English but I want Hebrew
|
| 227 |
-
The API auto-detects query language and responds in the same language.
|
| 228 |
-
|
| 229 |
-
## Project Structure (Reference)
|
| 230 |
-
|
| 231 |
-
```
|
| 232 |
-
.
|
| 233 |
-
├── app/ # Main application code
|
| 234 |
-
│ ├── api.py # FastAPI endpoints
|
| 235 |
-
│ ├── rag_service.py # RAG logic
|
| 236 |
-
│ ├── analysis.py # Query intent detection
|
| 237 |
-
│ ├── embedding.py # Text embeddings
|
| 238 |
-
│ ├── vector_store.py # FAISS wrapper
|
| 239 |
-
│ ├── sentiment.py # Sentiment analysis
|
| 240 |
-
│ ├── preprocess.py # Text preprocessing
|
| 241 |
-
│ ├── data_loader.py # CSV loading
|
| 242 |
-
│ ├── topics.py # Topic clustering
|
| 243 |
-
│ └── config.py # Configuration
|
| 244 |
-
├── scripts/
|
| 245 |
-
│ ├── validate_local.py # Validation harness (this file)
|
| 246 |
-
│ ├── test_queries.py # Manual query testing
|
| 247 |
-
│ └── precompute_index.py # Build index offline
|
| 248 |
-
├── Feedback.csv # Sample feedback data
|
| 249 |
-
├── Dockerfile # Container definition
|
| 250 |
-
├── docker-compose.yml # Docker compose (local dev)
|
| 251 |
-
├── requirements.txt # Python dependencies
|
| 252 |
-
├── run.py # Server entrypoint
|
| 253 |
-
└── README.md # Full documentation
|
| 254 |
-
```
|
| 255 |
-
|
| 256 |
-
## Advanced: Pre-compute Index Offline
|
| 257 |
-
|
| 258 |
-
If you want to avoid waiting for embedding downloads on first request:
|
| 259 |
-
|
| 260 |
-
```bash
|
| 261 |
-
python3 scripts/precompute_index.py
|
| 262 |
-
```
|
| 263 |
-
|
| 264 |
-
This creates `.vector_index/faiss.index` and `.vector_index/meta.parquet`. Subsequent server starts will use this cached index.
|
| 265 |
-
|
| 266 |
-
## Deploy to Runpod
|
| 267 |
-
|
| 268 |
-
Once local testing is done, follow the **README.md** section "Run on Runpod - Full guide" to:
|
| 269 |
-
1. Tag and push the Docker image
|
| 270 |
-
2. Create a Runpod template
|
| 271 |
-
3. Deploy the endpoint
|
| 272 |
-
4. Test on the cloud
|
| 273 |
-
|
| 274 |
-
The entire cloud deployment keeps all your code unchanged — it just uses your built Docker image.
|
| 275 |
-
|
| 276 |
-
## Getting Help
|
| 277 |
-
|
| 278 |
-
- **API docs (interactive):** http://localhost:8000/docs
|
| 279 |
-
- **Full documentation:** See README.md
|
| 280 |
-
- **Config reference:** See app/config.py
|
| 281 |
-
|
| 282 |
-
## Next Steps
|
| 283 |
-
|
| 284 |
-
1. ✅ Validate with: `python3 scripts/validate_local.py`
|
| 285 |
-
2. ✅ Start server: `python3 run.py`
|
| 286 |
-
3. ✅ Test endpoints using Swagger UI or curl
|
| 287 |
-
4. ✅ When happy, deploy to Runpod using README.md instructions
|
| 288 |
-
|
| 289 |
-
Good luck! 🚀
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,263 +1,187 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
An end-to-end system for analyzing citizen feedback with Retrieval-Augmented Generation (RAG). It ingests `Feedback.csv`, creates multilingual embeddings, builds a FAISS vector index, and exposes a FastAPI API for semantic search, topic clustering, and sentiment summaries. Designed to run locally or in containers, and to be deployable to Runpod.
|
| 4 |
-
|
| 5 |
-
### Features
|
| 6 |
-
- Multilingual ingestion (Hebrew supported) from `Feedback.csv`
|
| 7 |
-
- Preprocessing: optional normalization, language detection
|
| 8 |
-
- Embeddings: Sentence-Transformers (multilingual) + FAISS
|
| 9 |
-
- Retrieval: top-k semantic nearest neighbors with filters
|
| 10 |
-
- Summarization: LLM (OpenAI) if configured; fallback to extractive summary
|
| 11 |
-
- Supports Gemini (preferred) or OpenAI when API keys are provided
|
| 12 |
-
- Topics: k-means topic clustering over embeddings
|
| 13 |
-
- Sentiment: multilingual transformer pipeline
|
| 14 |
-
- FastAPI endpoints and a simple CLI
|
| 15 |
-
|
| 16 |
-
### Project layout
|
| 17 |
-
```
|
| 18 |
-
app/
|
| 19 |
-
api.py
|
| 20 |
-
config.py
|
| 21 |
-
data_loader.py
|
| 22 |
-
embedding.py
|
| 23 |
-
preprocess.py
|
| 24 |
-
rag_service.py
|
| 25 |
-
sentiment.py
|
| 26 |
-
topics.py
|
| 27 |
-
vector_store.py
|
| 28 |
-
run.py
|
| 29 |
-
requirements.txt
|
| 30 |
-
Dockerfile
|
| 31 |
-
```
|
| 32 |
|
| 33 |
-
|
| 34 |
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
#
|
| 40 |
-
python scripts/precompute_index.py
|
| 41 |
-
```
|
| 42 |
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
##
|
| 46 |
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
|
| 53 |
-
2. **יצירת סביבה וירטואלית והתקנה:**
|
| 54 |
```bash
|
|
|
|
| 55 |
python -m venv .venv
|
| 56 |
-
source .venv/bin/activate #
|
| 57 |
-
# או: .venv\Scripts\activate # Windows
|
| 58 |
|
| 59 |
-
|
| 60 |
pip install -r requirements.txt
|
| 61 |
```
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
``
|
|
|
|
|
|
|
| 66 |
GEMINI_API_KEY=your_gemini_key_here
|
| 67 |
-
|
|
|
|
| 68 |
```
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
```
|
| 74 |
|
| 75 |
-
5. **הפעלת השרת:**
|
| 76 |
```bash
|
| 77 |
python run.py
|
| 78 |
```
|
| 79 |
|
| 80 |
-
|
| 81 |
-
פתח דפדפן וגש ל: **http://127.0.0.1:8000**
|
| 82 |
-
|
| 83 |
-
**או לבדיקת API:**
|
| 84 |
-
- Swagger UI: http://127.0.0.1:8000/docs
|
| 85 |
-
- Health check: `curl -X POST http://127.0.0.1:8000/health`
|
| 86 |
|
| 87 |
-
|
| 88 |
-
```bash
|
| 89 |
-
python -m app.rag_service --query "שיפור טופס" --top_k 5
|
| 90 |
-
```
|
| 91 |
|
| 92 |
-
##
|
| 93 |
-
Environment variables:
|
| 94 |
-
- GEMINI_API_KEY: If set, RAG uses Gemini (preferred) for summaries
|
| 95 |
-
- OPENAI_API_KEY: If set, RAG can use OpenAI as a fallback
|
| 96 |
-
- EMBEDDING_MODEL: Sentence-Transformers model name (default: sentence-transformers/paraphrase-multilingual-mpnet-base-v2)
|
| 97 |
-
- VECTOR_INDEX_PATH: Path to persist FAISS index (default: ./.vector_index/faiss.index)
|
| 98 |
-
- VECTOR_METADATA_PATH: Path to persist FAISS index metadata (default: ./.vector_index/meta.parquet)
|
| 99 |
-
- CSV_PATH: Optional path to your CSV (if not `Feedback.csv` in repo root)
|
| 100 |
|
| 101 |
-
|
| 102 |
-
- The first run will download models (embeddings, sentiment); ensure internet access.
|
| 103 |
-
- The system reads from `Feedback.csv` in the repo root. Update `app/data_loader.py` if your schema differs.
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
- Do not commit real secrets. Use environment variables or a local `.env` file.
|
| 110 |
-
- `.env` is gitignored by default via `.gitignore`.
|
| 111 |
-
- Rotate any keys that were ever shared publicly.
|
| 112 |
|
| 113 |
-
##
|
| 114 |
|
| 115 |
-
### 1) Build and push the container
|
| 116 |
-
- From project root:
|
| 117 |
```
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
```
|
| 122 |
|
| 123 |
-
##
|
| 124 |
-
- You will set secrets within Runpod, not in code:
|
| 125 |
-
- Required:
|
| 126 |
-
- `GEMINI_API_KEY` = your Gemini key
|
| 127 |
-
- Optional:
|
| 128 |
-
- `OPENAI_API_KEY` = OpenAI fallback key
|
| 129 |
-
- `CSV_PATH` = path to your CSV if not the default `Feedback.csv`
|
| 130 |
-
- `VECTOR_INDEX_PATH` and `VECTOR_METADATA_PATH` if you change mount/paths
|
| 131 |
-
|
| 132 |
-
### 3) Create a Runpod Template (Serverless HTTP recommended)
|
| 133 |
-
- In Runpod Console → Templates → Create Template
|
| 134 |
-
- Fields:
|
| 135 |
-
- Container Image: `YOUR_DOCKERHUB_USER/feedback-rag:latest` (if you want you can use mine: `galbendavids/feedback-rag:latest`)
|
| 136 |
-
- Container Port: `8000`
|
| 137 |
-
- Command: `python run.py`
|
| 138 |
-
- Environment Variables:
|
| 139 |
-
- `GEMINI_API_KEY=your_key`
|
| 140 |
-
- (optional) `OPENAI_API_KEY=sk-...`
|
| 141 |
-
- (optional) `CSV_PATH=/workspace/Feedback.csv`
|
| 142 |
-
- (optional) `VECTOR_INDEX_PATH=/workspace/.vector_index/faiss.index`
|
| 143 |
-
- (optional) `VECTOR_METADATA_PATH=/workspace/.vector_index/meta.parquet`
|
| 144 |
-
- Volumes (recommended to persist the FAISS index):
|
| 145 |
-
- Create a volume, mount it at `/workspace/.vector_index`
|
| 146 |
-
- Make sure your `VECTOR_*` env vars point to that mount path if changed
|
| 147 |
-
|
| 148 |
-
### 4) Deploy a Serverless Endpoint
|
| 149 |
-
- Create Endpoint from the template (Serverless)
|
| 150 |
-
- Choose region and CPU (CPU is sufficient)
|
| 151 |
-
- Wait until status is Running and an endpoint URL is provided
|
| 152 |
-
|
| 153 |
-
### 5) Upload or point to your CSV
|
| 154 |
-
- Option A (bundled): Keep `Feedback.csv` in the image (already in repo root)
|
| 155 |
-
- Option B (mounted): Upload to a mounted volume and set `CSV_PATH` accordingly
|
| 156 |
-
|
| 157 |
-
### 6) First-time ingestion (build the vector index)
|
| 158 |
-
- Trigger ingestion once to build and persist the FAISS index:
|
| 159 |
-
```
|
| 160 |
-
curl -X POST {YOUR_ENDPOINT_URL}/ingest
|
| 161 |
-
```
|
| 162 |
-
- On first run, models download and embeddings are computed; allow a few minutes
|
| 163 |
-
- The index will be stored under `.vector_index` (persist if using a volume)
|
| 164 |
|
| 165 |
-
###
|
| 166 |
-
|
| 167 |
-
``
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
```
|
| 177 |
-
- Topics (POST JSON):
|
| 178 |
-
```
|
| 179 |
-
curl -X POST {YOUR_ENDPOINT_URL}/topics \
|
| 180 |
-
-H "Content-Type: application/json" \
|
| 181 |
-
-d '{"num_topics":8}'
|
| 182 |
-
```
|
| 183 |
-
- Sentiment (first N rows, POST JSON):
|
| 184 |
-
```
|
| 185 |
-
curl -X POST {YOUR_ENDPOINT_URL}/sentiment \
|
| 186 |
-H "Content-Type: application/json" \
|
| 187 |
-
-d '{"
|
| 188 |
-
```
|
| 189 |
-
- Interactive docs (Swagger UI):
|
| 190 |
-
- Open `{YOUR_ENDPOINT_URL}/docs` in your browser
|
| 191 |
-
|
| 192 |
-
### 8) Using Dedicated Pods (alternative)
|
| 193 |
-
- Launch a Dedicated Pod from the template
|
| 194 |
-
- Ensure command `python run.py` and port `8000`
|
| 195 |
-
- Use the Pod’s public endpoint to access `/health`, `/ingest`, `/query`, etc.
|
| 196 |
-
|
| 197 |
-
### 9) Troubleshooting
|
| 198 |
-
- 404/connection:
|
| 199 |
-
- Endpoint not Running yet or wrong port; port must be `8000`
|
| 200 |
-
- Slow initial response:
|
| 201 |
-
- First-time model downloads are expected; subsequent calls are faster
|
| 202 |
-
- No/few results:
|
| 203 |
-
- Ensure you POSTed `/ingest` first and that your CSV has the `Text` column
|
| 204 |
-
- Index not persisted:
|
| 205 |
-
- Mount a volume at `/workspace/.vector_index` and set `VECTOR_*` paths
|
| 206 |
-
|
| 207 |
-
### 10) Optional: Pre-cache models to speed cold starts
|
| 208 |
-
- You can pre-bake model weights in the image by adding to your `Dockerfile`:
|
| 209 |
-
```
|
| 210 |
-
# Optional: pre-download models during build to reduce cold start time
|
| 211 |
-
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')"
|
| 212 |
-
RUN python -c "from transformers import pipeline; pipeline('sentiment-analysis', model='cardiffnlp/twitter-xlm-roberta-base-sentiment')"
|
| 213 |
```
|
| 214 |
-
- Rebuild and push the image after adding these lines.
|
| 215 |
|
| 216 |
-
##
|
| 217 |
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
```
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
```
|
| 230 |
-
This writes:
|
| 231 |
-
- `.vector_index/faiss.index`
|
| 232 |
-
- `.vector_index/meta.parquet`
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
| 236 |
```
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
```
|
| 241 |
-
(Note: repo size will increase; acceptable for small indices.)
|
| 242 |
|
| 243 |
-
|
| 244 |
-
- Upload the `.vector_index/` folder to a Runpod volume mounted at `/workspace/.vector_index`
|
| 245 |
-
- Set env vars if you changed paths:
|
| 246 |
-
- `VECTOR_INDEX_PATH=/workspace/.vector_index/faiss.index`
|
| 247 |
-
- `VECTOR_METADATA_PATH=/workspace/.vector_index/meta.parquet`
|
| 248 |
|
| 249 |
-
|
|
|
|
|
|
|
| 250 |
|
| 251 |
-
###
|
| 252 |
-
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
##
|
| 259 |
-
- You can add rows to `Feedback.csv` and either:
|
| 260 |
-
- Rebuild the entire index (simple, safest):
|
| 261 |
-
- `uv run -m scripts.precompute_index`
|
| 262 |
-
- Or implement an incremental append (advanced): embed only the new rows with `EmbeddingModel.encode(...)`, call `FaissVectorStore.load(...)`, then `store.add(new_vectors, new_metadata)` and `store.save(...)`. This keeps the same architecture and avoids re-embedding all previous data.
|
| 263 |
|
|
|
|
|
|
| 1 |
+
# Feedback Analysis Agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
מערכת ניתוח משובי משתמשים מבוססת SQL ו-LLM.
|
| 4 |
|
| 5 |
+
## סקירה כללית
|
| 6 |
|
| 7 |
+
המערכת מאפשרת לשאול שאלות בשפה טבעית על משובי משתמשים ולקבל תשובות מפורטות ומבוססות נתונים. המערכת משתמשת בגישה מבוססת SQL - LLM יוצר שאילתות SQL, הן מבוצעות על הנתונים, ו-LLM נוסף יוצר תשובה מפורטת מהתוצאות.
|
| 8 |
+
|
| 9 |
+
## תכונות עיקריות
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
- ✅ **שאילתות בשפה טבעית** - שאל שאלות בעברית על המשובים
|
| 12 |
+
- ✅ **ניתוח אוטומטי** - המערכת יוצרת שאילתות SQL אוטומטית
|
| 13 |
+
- ✅ **בדיקת איכות אוטומטית** - תשובות נבדקות אוטומטית ומשופרות אם נדרש
|
| 14 |
+
- ✅ **ויזואליזציות** - גרפים אוטומטיים של התוצאות
|
| 15 |
+
- ✅ **ממשק משתמש מודרני** - UI צבעוני ואינטואיטיבי
|
| 16 |
|
| 17 |
+
## התקנה והרצה
|
| 18 |
|
| 19 |
+
### דרישות מקדימות
|
| 20 |
|
| 21 |
+
- Python 3.10+
|
| 22 |
+
- קובץ `Feedback.csv` עם העמודות: ID, ServiceName, Level, Text, CreationDate (אופציונלי)
|
| 23 |
|
| 24 |
+
### התקנה
|
| 25 |
|
|
|
|
| 26 |
```bash
|
| 27 |
+
# יצירת סביבה וירטואלית
|
| 28 |
python -m venv .venv
|
| 29 |
+
source .venv/bin/activate # ב-Windows: .venv\Scripts\activate
|
|
|
|
| 30 |
|
| 31 |
+
# התקנת תלויות
|
| 32 |
pip install -r requirements.txt
|
| 33 |
```
|
| 34 |
|
| 35 |
+
### הגדרת API Keys
|
| 36 |
+
|
| 37 |
+
צור קובץ `.env` בשורש הפרויקט:
|
| 38 |
+
|
| 39 |
+
```env
|
| 40 |
GEMINI_API_KEY=your_gemini_key_here
|
| 41 |
+
# או
|
| 42 |
+
OPENAI_API_KEY=your_openai_key_here
|
| 43 |
```
|
| 44 |
|
| 45 |
+
**הערה**: לפחות אחד מה-API keys חייב להיות מוגדר.
|
| 46 |
+
|
| 47 |
+
### הרצה
|
|
|
|
| 48 |
|
|
|
|
| 49 |
```bash
|
| 50 |
python run.py
|
| 51 |
```
|
| 52 |
|
| 53 |
+
השרת יעלה על `http://127.0.0.1:8000`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
פתח את הדפדפן וגש ל-`http://127.0.0.1:8000`
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
## ארכיטקטורה
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
המערכת מבוססת על **4 שלבים**:
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
1. **ניתוח שאילתה** - LLM מנתח את שאלת המשתמש
|
| 62 |
+
2. **יצירת שאילתות SQL** - LLM יוצר 1-5 שאילתות SQL רלוונטיות
|
| 63 |
+
3. **ביצוע שאילתות** - שאילתות SQL מבוצעות על הנתונים (SQLite in-memory)
|
| 64 |
+
4. **סינתזה ותשובה** - LLM יוצר תשובה מפורטת מהתוצאות, כולל בדיקת איכות אוטומטית
|
| 65 |
|
| 66 |
+
לקריאה מפורטת יותר, ראה [ARCHITECTURE.md](ARCHITECTURE.md)
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
## מבנה הפרויקט
|
| 69 |
|
|
|
|
|
|
|
| 70 |
```
|
| 71 |
+
.
|
| 72 |
+
├── app/
|
| 73 |
+
│ ├── api.py # FastAPI endpoints
|
| 74 |
+
│ ├── sql_service.py # ליבת המערכת - SQL-based analysis
|
| 75 |
+
│ ├── config.py # הגדרות מערכת
|
| 76 |
+
│ ├── data_loader.py # טעינת נתונים מ-CSV
|
| 77 |
+
│ └── static/
|
| 78 |
+
│ ├── index.html # ממשק משתמש
|
| 79 |
+
│ └── app.js # לוגיקת frontend
|
| 80 |
+
├── Feedback.csv # נתוני המשובים (לא ב-git)
|
| 81 |
+
├── .env # API keys (לא ב-git)
|
| 82 |
+
├── requirements.txt # תלויות Python
|
| 83 |
+
├── run.py # נקודת כניסה
|
| 84 |
+
└── ARCHITECTURE.md # מסמך ארכיטקטורה מפורט
|
| 85 |
```
|
| 86 |
|
| 87 |
+
## שימוש
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
### דרך הממשק
|
| 90 |
+
|
| 91 |
+
1. פתח `http://127.0.0.1:8000` בדפדפן
|
| 92 |
+
2. הזן שאלה בשדה הטקסט
|
| 93 |
+
3. לחץ על "🔍 שאל"
|
| 94 |
+
4. צפה בתשובה, שאילתות SQL, תוצאות, וגרפים
|
| 95 |
+
|
| 96 |
+
### דרך API
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
curl -X POST http://127.0.0.1:8000/query-sql \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
-H "Content-Type: application/json" \
|
| 101 |
+
-d '{"query": "כמה משתמשים כתבו תודה?", "top_k": 5}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
```
|
|
|
|
| 103 |
|
| 104 |
+
## דוגמאות שאלות
|
| 105 |
|
| 106 |
+
- "כמה משתמשים כתבו תודה?"
|
| 107 |
+
- "מה הנושא המרכזי של המשובים שקיבלו ציון נמוך מ-3?"
|
| 108 |
+
- "חלק את המשובים ל-5 נושאים מרכזיים"
|
| 109 |
+
- "כמה משובים התקבלו בחודש האחרון?"
|
| 110 |
+
- "איך המשתמשים מרגישים כלפי השירות?"
|
| 111 |
|
| 112 |
+
## Quality Assurance
|
| 113 |
+
|
| 114 |
+
המערכת כוללת **בדיקת איכות אוטומטית**:
|
| 115 |
+
|
| 116 |
+
- כל תשובה מקבלת ציון 0-100
|
| 117 |
+
- אם הציון < 80, המערכת מנסה לשפר את התשובה אוטומטית
|
| 118 |
+
- הקריטריונים: רלוונטיות, דיוק, מפורטות, בהירות, תובנות עסקיות
|
| 119 |
+
|
| 120 |
+
## Visualizations
|
| 121 |
+
|
| 122 |
+
המערכת יוצרת **גרפים אוטומטיים**:
|
| 123 |
+
- Bar Charts - להשוואות
|
| 124 |
+
- Line Charts - למגמות לאורך זמן
|
| 125 |
+
- Scatter Plots - לקשרים בין משתנים
|
| 126 |
+
- Histograms - להתפלגות נתונים
|
| 127 |
+
|
| 128 |
+
## Deployment
|
| 129 |
+
|
| 130 |
+
### Docker
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
docker build -t feedback-analysis .
|
| 134 |
+
docker run -p 8000:8000 feedback-analysis
|
| 135 |
```
|
| 136 |
+
|
| 137 |
+
### Runpod
|
| 138 |
+
|
| 139 |
+
ראה [DEPLOYMENT_GUIDE.md](DEPLOYMENT_GUIDE.md) לפרטים.
|
| 140 |
+
|
| 141 |
+
## שינויים והתאמות
|
| 142 |
+
|
| 143 |
+
### שינוי מודל LLM
|
| 144 |
+
|
| 145 |
+
ערוך ב-`app/sql_service.py`:
|
| 146 |
+
```python
|
| 147 |
+
model = genai.GenerativeModel("gemini-2.0-flash") # שנה כאן
|
| 148 |
```
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
### שינוי סף איכות
|
| 151 |
+
|
| 152 |
+
ערוך ב-`app/sql_service.py`:
|
| 153 |
+
```python
|
| 154 |
+
if score < 80: # שנה כאן (0-100)
|
| 155 |
```
|
| 156 |
+
|
| 157 |
+
### הוספת עמודות חדשות
|
| 158 |
+
|
| 159 |
+
ערוך ב-`app/sql_service.py` → `_get_schema_info()`:
|
| 160 |
+
```python
|
| 161 |
+
schema_info = f"""
|
| 162 |
+
טבלת Feedback מכילה את השדות הבאים:
|
| 163 |
+
- NewColumn: ... # הוסף כאן
|
| 164 |
+
"""
|
| 165 |
```
|
|
|
|
| 166 |
|
| 167 |
+
## Troubleshooting
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
+
### שגיאת "No feedback data available"
|
| 170 |
+
- ודא שקובץ `Feedback.csv` קיים
|
| 171 |
+
- ודא שהעמודות הנדרשות קיימות: ID, ServiceName, Level, Text
|
| 172 |
|
| 173 |
+
### שגיאת API Key
|
| 174 |
+
- ודא שקובץ `.env` קיים עם `GEMINI_API_KEY` או `OPENAI_API_KEY`
|
| 175 |
+
|
| 176 |
+
### תשובות לא איכותיות
|
| 177 |
+
- בדוק את הלוגים - המערכת מדפיסה ציוני איכות
|
| 178 |
+
- נסה לשנות את ה-prompt ב-`_synthesize_answer()`
|
| 179 |
+
|
| 180 |
+
## קישורים
|
| 181 |
+
|
| 182 |
+
- GitHub: [לעדכן]
|
| 183 |
+
- קורות חיים: [לעדכן]
|
| 184 |
|
| 185 |
+
## רישיון
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
[לעדכן]
|
README_TESTING_GUIDE.md
DELETED
|
@@ -1,520 +0,0 @@
|
|
| 1 |
-
# Complete Testing & Deployment Guide
|
| 2 |
-
|
| 3 |
-
Welcome! This is your comprehensive guide to testing the Feedback Analysis RAG Agent locally and deploying it to Runpod. Start here.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## 🎯 Quick Navigation
|
| 8 |
-
|
| 9 |
-
Choose your path:
|
| 10 |
-
|
| 11 |
-
### 🏃 Fast Track (10-15 minutes)
|
| 12 |
-
**I want to quickly verify everything works:**
|
| 13 |
-
1. Read: ["Quick Start in 3 Steps"](#quick-start-in-3-steps) below
|
| 14 |
-
2. Run: `python3 scripts/validate_local.py`
|
| 15 |
-
3. Start: `python3 run.py`
|
| 16 |
-
4. Test: Open http://localhost:8000/docs
|
| 17 |
-
|
| 18 |
-
### 🧪 Thorough Testing (30-45 minutes)
|
| 19 |
-
**I want to validate every feature:**
|
| 20 |
-
1. Follow: **TESTING_CHECKLIST.md** (15 comprehensive tests)
|
| 21 |
-
2. Test all endpoints (health, query, topics, sentiment, ingest)
|
| 22 |
-
3. Verify Hebrew support and counting accuracy
|
| 23 |
-
4. Run performance benchmarks
|
| 24 |
-
|
| 25 |
-
### 🚀 Full Deployment (2 hours)
|
| 26 |
-
**I want to deploy to Runpod:**
|
| 27 |
-
1. Complete testing above
|
| 28 |
-
2. Follow: **DEPLOYMENT_GUIDE.md** (step-by-step cloud setup)
|
| 29 |
-
3. Build Docker image
|
| 30 |
-
4. Deploy to Runpod
|
| 31 |
-
5. Test cloud endpoint
|
| 32 |
-
|
| 33 |
-
### 📚 Learn More (ongoing)
|
| 34 |
-
**I want to understand the system:**
|
| 35 |
-
1. Read: **SESSION_SUMMARY.md** (architecture overview)
|
| 36 |
-
2. Read: **QUICK_START.md** (setup details)
|
| 37 |
-
3. Check: **CONTRIBUTING.md** (development workflow)
|
| 38 |
-
4. Explore: Source code in `app/` directory
|
| 39 |
-
|
| 40 |
-
---
|
| 41 |
-
|
| 42 |
-
## ⚡ Quick Start in 3 Steps
|
| 43 |
-
|
| 44 |
-
### Step 1: Validate Everything (2 minutes)
|
| 45 |
-
```bash
|
| 46 |
-
cd /Users/galbd/Desktop/personal/software/ai_agent_gov/Feedback_Analysis_RAG_Agent_runpod
|
| 47 |
-
source .venv/bin/activate
|
| 48 |
-
python3 scripts/validate_local.py
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
**Expected output:**
|
| 52 |
-
```
|
| 53 |
-
[PASS] Dependencies
|
| 54 |
-
[PASS] CSV file
|
| 55 |
-
[PASS] FAISS Index
|
| 56 |
-
[PASS] App imports
|
| 57 |
-
[PASS] Analysis logic
|
| 58 |
-
[PASS] RAGService
|
| 59 |
-
[PASS] API endpoints
|
| 60 |
-
|
| 61 |
-
All 7 checks PASSED! Ready for local testing.
|
| 62 |
-
```
|
| 63 |
-
|
| 64 |
-
### Step 2: Start Server (1 minute)
|
| 65 |
-
```bash
|
| 66 |
-
python3 run.py
|
| 67 |
-
```
|
| 68 |
-
|
| 69 |
-
**Expected output:**
|
| 70 |
-
```
|
| 71 |
-
INFO: Uvicorn running on http://0.0.0.0:8000
|
| 72 |
-
INFO: Application startup complete
|
| 73 |
-
```
|
| 74 |
-
|
| 75 |
-
### Step 3: Test an Endpoint (1 minute)
|
| 76 |
-
|
| 77 |
-
**Option A - Browser (easiest):**
|
| 78 |
-
```
|
| 79 |
-
Open: http://localhost:8000/docs
|
| 80 |
-
Click on /query endpoint
|
| 81 |
-
Enter: {"query":"כמה משתמשים כתבו תודה","top_k":5}
|
| 82 |
-
Click "Try it out"
|
| 83 |
-
```
|
| 84 |
-
|
| 85 |
-
**Option B - curl:**
|
| 86 |
-
```bash
|
| 87 |
-
curl -X POST http://localhost:8000/query \
|
| 88 |
-
-H "Content-Type: application/json" \
|
| 89 |
-
-d '{"query":"כמה משתמשים כתבו תודה","top_k":5}'
|
| 90 |
-
```
|
| 91 |
-
|
| 92 |
-
**Expected:**
|
| 93 |
-
```json
|
| 94 |
-
{
|
| 95 |
-
"query": "כמה משתמשים כתבו תודה",
|
| 96 |
-
"summary": "1168 משובים מכילים ביטויי תודה.",
|
| 97 |
-
"results": [...]
|
| 98 |
-
}
|
| 99 |
-
```
|
| 100 |
-
|
| 101 |
-
✅ **If you see this, the system is working!**
|
| 102 |
-
|
| 103 |
-
---
|
| 104 |
-
|
| 105 |
-
## 📖 Documentation Map
|
| 106 |
-
|
| 107 |
-
### For Setup & Running
|
| 108 |
-
- **QUICK_START.md** - How to set up environment and run locally
|
| 109 |
-
- Prerequisites
|
| 110 |
-
- Virtual environment setup
|
| 111 |
-
- Dependency installation
|
| 112 |
-
- Server startup
|
| 113 |
-
- Basic testing
|
| 114 |
-
|
| 115 |
-
### For Testing
|
| 116 |
-
- **TESTING_CHECKLIST.md** - Complete 15-test validation suite
|
| 117 |
-
- Pre-flight checks
|
| 118 |
-
- All endpoint tests
|
| 119 |
-
- Performance benchmarks
|
| 120 |
-
- Error handling tests
|
| 121 |
-
- Results sign-off
|
| 122 |
-
|
| 123 |
-
### For Deployment
|
| 124 |
-
- **DEPLOYMENT_GUIDE.md** - How to deploy to Runpod
|
| 125 |
-
- Docker image creation
|
| 126 |
-
- Registry setup (Docker Hub)
|
| 127 |
-
- Runpod template creation
|
| 128 |
-
- Endpoint testing
|
| 129 |
-
- Monitoring & scaling
|
| 130 |
-
- Troubleshooting
|
| 131 |
-
|
| 132 |
-
### For Understanding
|
| 133 |
-
- **SESSION_SUMMARY.md** - Complete project overview
|
| 134 |
-
- What was delivered
|
| 135 |
-
- Technical specifications
|
| 136 |
-
- Project structure
|
| 137 |
-
- Validation results
|
| 138 |
-
- Feature list
|
| 139 |
-
|
| 140 |
-
### For Development
|
| 141 |
-
- **CONTRIBUTING.md** - Git workflow and development
|
| 142 |
-
- Branch naming
|
| 143 |
-
- Commit conventions
|
| 144 |
-
- Pull requests
|
| 145 |
-
- Code review process
|
| 146 |
-
|
| 147 |
-
---
|
| 148 |
-
|
| 149 |
-
## 🏗️ System Architecture
|
| 150 |
-
|
| 151 |
-
```
|
| 152 |
-
┌─────────────────────────────────────────┐
|
| 153 |
-
│ FastAPI Server (8000) │
|
| 154 |
-
├─────────────────────────────────────────┤
|
| 155 |
-
│ 5 Endpoints (all POST) │
|
| 156 |
-
│ • /health - Server status │
|
| 157 |
-
│ • /query - Main RAG endpoint │
|
| 158 |
-
│ • /topics - Topic extraction │
|
| 159 |
-
│ • /sentiment - Sentiment analysis │
|
| 160 |
-
│ • /ingest - Index rebuilding │
|
| 161 |
-
└────────────┬────────────────────────────┘
|
| 162 |
-
│
|
| 163 |
-
┌────────────▼────────────────────────────┐
|
| 164 |
-
│ RAG Service Layer │
|
| 165 |
-
├─────────────────────────────────────────┤
|
| 166 |
-
│ • Intent Detection (count vs search) │
|
| 167 |
-
│ • Vector Embeddings (multilingual) │
|
| 168 |
-
│ • FAISS Search (semantic matching) │
|
| 169 |
-
│ • LLM Summarization (optional) │
|
| 170 |
-
└────────────┬────────────────────────────┘
|
| 171 |
-
│
|
| 172 |
-
┌────────────▼────────────────────────────┐
|
| 173 |
-
│ Data Layer │
|
| 174 |
-
├─────────────────────────────────────────┤
|
| 175 |
-
│ • CSV: 9930 feedback records │
|
| 176 |
-
│ • FAISS Index: 14.5 MB │
|
| 177 |
-
│ • Metadata: 450 KB parquet │
|
| 178 |
-
└─────────────────────────────────────────┘
|
| 179 |
-
```
|
| 180 |
-
|
| 181 |
-
---
|
| 182 |
-
|
| 183 |
-
## 📊 What You're Testing
|
| 184 |
-
|
| 185 |
-
### 1. **Data Integrity**
|
| 186 |
-
- CSV loads correctly (9930 rows, 8 columns)
|
| 187 |
-
- FAISS index valid (14.5 MB)
|
| 188 |
-
- Metadata complete (parquet file)
|
| 189 |
-
|
| 190 |
-
### 2. **Core Functionality**
|
| 191 |
-
- Intent detection works (count vs. semantic)
|
| 192 |
-
- Embeddings generated correctly
|
| 193 |
-
- Vector search returns relevant results
|
| 194 |
-
- Summaries generated accurately
|
| 195 |
-
|
| 196 |
-
### 3. **API Endpoints**
|
| 197 |
-
- All 5 endpoints respond (health, query, topics, sentiment, ingest)
|
| 198 |
-
- Request validation works
|
| 199 |
-
- Error handling proper
|
| 200 |
-
- JSON serialization correct
|
| 201 |
-
|
| 202 |
-
### 4. **Multi-Language Support**
|
| 203 |
-
- Hebrew queries processed
|
| 204 |
-
- English queries processed
|
| 205 |
-
- Auto-detection of language
|
| 206 |
-
- Responses in same language as query
|
| 207 |
-
|
| 208 |
-
### 5. **Accuracy**
|
| 209 |
-
- Thank-you count: **1168** ✓
|
| 210 |
-
- Complaint count: **352** ✓
|
| 211 |
-
- Total records: **9930** ✓
|
| 212 |
-
|
| 213 |
-
### 6. **Performance**
|
| 214 |
-
- Health check: <10ms
|
| 215 |
-
- Query endpoint: 1-3 seconds
|
| 216 |
-
- Sentiment analysis: 5-15 seconds per 100 records
|
| 217 |
-
- Index rebuild: 30-60 seconds
|
| 218 |
-
|
| 219 |
-
---
|
| 220 |
-
|
| 221 |
-
## 🧪 Test Levels
|
| 222 |
-
|
| 223 |
-
### Level 1: Smoke Test (5 minutes)
|
| 224 |
-
Quick sanity check that everything basically works:
|
| 225 |
-
```bash
|
| 226 |
-
python3 scripts/validate_local.py # All 7 checks pass
|
| 227 |
-
python3 run.py # Server starts
|
| 228 |
-
curl http://localhost:8000/health # Returns 200
|
| 229 |
-
```
|
| 230 |
-
|
| 231 |
-
### Level 2: Functional Test (15 minutes)
|
| 232 |
-
Test each endpoint individually:
|
| 233 |
-
```bash
|
| 234 |
-
# Use Swagger UI or curl commands from TESTING_CHECKLIST.md
|
| 235 |
-
# Test: health, query, topics, sentiment, ingest
|
| 236 |
-
```
|
| 237 |
-
|
| 238 |
-
### Level 3: Comprehensive Test (45 minutes)
|
| 239 |
-
Full validation using TESTING_CHECKLIST.md:
|
| 240 |
-
- All endpoint combinations
|
| 241 |
-
- Error handling
|
| 242 |
-
- Performance benchmarks
|
| 243 |
-
- Data accuracy verification
|
| 244 |
-
|
| 245 |
-
### Level 4: Load Testing (optional, 30 minutes)
|
| 246 |
-
Stress test the system:
|
| 247 |
-
```bash
|
| 248 |
-
# Use Apache Bench or similar
|
| 249 |
-
ab -n 100 -c 10 http://localhost:8000/health
|
| 250 |
-
```
|
| 251 |
-
|
| 252 |
-
---
|
| 253 |
-
|
| 254 |
-
## ✅ Success Criteria
|
| 255 |
-
|
| 256 |
-
**You know the system is working when:**
|
| 257 |
-
|
| 258 |
-
1. ✅ `validate_local.py` shows: **All 7 checks PASSED**
|
| 259 |
-
2. ✅ Server starts: `python3 run.py` shows **"Application startup complete"**
|
| 260 |
-
3. ✅ `/health` responds: Status **200**, response **`{"status":"ok"}`**
|
| 261 |
-
4. ✅ `/query` responds: Returns count **1168** for thank-yous query
|
| 262 |
-
5. ✅ `/topics` responds: Returns **5 topics** with relevant words
|
| 263 |
-
6. ✅ `/sentiment` responds: Returns **50+ results** with labels
|
| 264 |
-
7. ✅ Hebrew text: Query in Hebrew, response in Hebrew
|
| 265 |
-
8. ✅ Response times: Query endpoint <3 seconds
|
| 266 |
-
|
| 267 |
-
---
|
| 268 |
-
|
| 269 |
-
## 🚀 Common Tasks
|
| 270 |
-
|
| 271 |
-
### Start Fresh
|
| 272 |
-
```bash
|
| 273 |
-
# Activate environment
|
| 274 |
-
source .venv/bin/activate
|
| 275 |
-
|
| 276 |
-
# Clear cache (optional)
|
| 277 |
-
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
| 278 |
-
|
| 279 |
-
# Validate
|
| 280 |
-
python3 scripts/validate_local.py
|
| 281 |
-
|
| 282 |
-
# Run
|
| 283 |
-
python3 run.py
|
| 284 |
-
```
|
| 285 |
-
|
| 286 |
-
### Rebuild Index
|
| 287 |
-
```bash
|
| 288 |
-
# Kill server (CTRL+C in running terminal)
|
| 289 |
-
|
| 290 |
-
# Via API
|
| 291 |
-
curl -X POST http://localhost:8000/ingest
|
| 292 |
-
|
| 293 |
-
# Or via script
|
| 294 |
-
python3 scripts/precompute_index.py
|
| 295 |
-
```
|
| 296 |
-
|
| 297 |
-
### Test Specific Endpoint
|
| 298 |
-
```bash
|
| 299 |
-
# With curl (health example)
|
| 300 |
-
curl -X POST http://localhost:8000/health
|
| 301 |
-
|
| 302 |
-
# With Python
|
| 303 |
-
python3 -c "import requests; print(requests.post('http://localhost:8000/health').json())"
|
| 304 |
-
|
| 305 |
-
# With Swagger UI
|
| 306 |
-
# Open http://localhost:8000/docs
|
| 307 |
-
```
|
| 308 |
-
|
| 309 |
-
### Check Response Times
|
| 310 |
-
```bash
|
| 311 |
-
time curl -X POST http://localhost:8000/query \
|
| 312 |
-
-H "Content-Type: application/json" \
|
| 313 |
-
-d '{"query":"test","top_k":5}'
|
| 314 |
-
```
|
| 315 |
-
|
| 316 |
-
### View Server Logs
|
| 317 |
-
```bash
|
| 318 |
-
# Logs appear in the terminal running python3 run.py
|
| 319 |
-
# For file logs, edit app/api.py to add logging
|
| 320 |
-
```
|
| 321 |
-
|
| 322 |
-
---
|
| 323 |
-
|
| 324 |
-
## ⚠️ Troubleshooting
|
| 325 |
-
|
| 326 |
-
### Problem: Environment not activated
|
| 327 |
-
```
|
| 328 |
-
Error: No module named 'fastapi'
|
| 329 |
-
```
|
| 330 |
-
**Fix:** Activate virtual environment
|
| 331 |
-
```bash
|
| 332 |
-
source .venv/bin/activate
|
| 333 |
-
```
|
| 334 |
-
|
| 335 |
-
### Problem: Index file missing
|
| 336 |
-
```
|
| 337 |
-
FileNotFoundError: Vector index not found
|
| 338 |
-
```
|
| 339 |
-
**Fix:** Rebuild index
|
| 340 |
-
```bash
|
| 341 |
-
curl -X POST http://localhost:8000/ingest
|
| 342 |
-
```
|
| 343 |
-
|
| 344 |
-
### Problem: First request slow
|
| 345 |
-
```
|
| 346 |
-
Takes 20-30 seconds
|
| 347 |
-
```
|
| 348 |
-
**Fix:** Normal - models download first time. Subsequent requests faster.
|
| 349 |
-
|
| 350 |
-
### Problem: Port already in use
|
| 351 |
-
```
|
| 352 |
-
Address already in use
|
| 353 |
-
```
|
| 354 |
-
**Fix:** Kill other process or use different port
|
| 355 |
-
```bash
|
| 356 |
-
# Find process using port 8000
|
| 357 |
-
lsof -i :8000
|
| 358 |
-
kill -9 <PID>
|
| 359 |
-
|
| 360 |
-
# Or start on different port
|
| 361 |
-
uvicorn app.api:app --port 8001
|
| 362 |
-
```
|
| 363 |
-
|
| 364 |
-
### Problem: CSV file not found
|
| 365 |
-
```
|
| 366 |
-
FileNotFoundError: Feedback.csv not found
|
| 367 |
-
```
|
| 368 |
-
**Fix:** Make sure you're in correct directory
|
| 369 |
-
```bash
|
| 370 |
-
cd /Users/galbd/Desktop/personal/software/ai_agent_gov/Feedback_Analysis_RAG_Agent_runpod
|
| 371 |
-
ls -la Feedback.csv # Should exist
|
| 372 |
-
```
|
| 373 |
-
|
| 374 |
-
**See more troubleshooting in:**
|
| 375 |
-
- QUICK_START.md - Troubleshooting section
|
| 376 |
-
- TESTING_CHECKLIST.md - Error handling section
|
| 377 |
-
- DEPLOYMENT_GUIDE.md - Troubleshooting section
|
| 378 |
-
|
| 379 |
-
---
|
| 380 |
-
|
| 381 |
-
## 📋 Pre-Testing Checklist
|
| 382 |
-
|
| 383 |
-
Before you start testing:
|
| 384 |
-
|
| 385 |
-
- [ ] Virtual environment created: `python3 -m venv .venv`
|
| 386 |
-
- [ ] Virtual environment activated: `source .venv/bin/activate`
|
| 387 |
-
- [ ] Dependencies installed: `pip install -r requirements.txt`
|
| 388 |
-
- [ ] CSV file exists: `ls Feedback.csv` shows file
|
| 389 |
-
- [ ] FAISS index exists: `ls .vector_index/faiss.index` shows file
|
| 390 |
-
- [ ] Python version 3.10+: `python3 --version`
|
| 391 |
-
- [ ] Enough disk space: `df -h` shows >1GB free
|
| 392 |
-
- [ ] Enough RAM: `free -h` shows >4GB available
|
| 393 |
-
|
| 394 |
-
---
|
| 395 |
-
|
| 396 |
-
## 🎯 Testing Path by Role
|
| 397 |
-
|
| 398 |
-
### 👨💻 **Developer**
|
| 399 |
-
1. QUICK_START.md - Set up local environment
|
| 400 |
-
2. TESTING_CHECKLIST.md - Test all endpoints
|
| 401 |
-
3. app/ - Explore source code
|
| 402 |
-
4. Make changes, re-test
|
| 403 |
-
5. CONTRIBUTING.md - Commit and push
|
| 404 |
-
|
| 405 |
-
### 👨💼 **Operations/DevOps**
|
| 406 |
-
1. QUICK_START.md - Verify local setup works
|
| 407 |
-
2. DEPLOYMENT_GUIDE.md - Deploy to Runpod
|
| 408 |
-
3. Set up monitoring and alerts
|
| 409 |
-
4. Document runbook
|
| 410 |
-
|
| 411 |
-
### 🧑🔬 **Data Analyst**
|
| 412 |
-
1. QUICK_START.md - Get server running
|
| 413 |
-
2. Test `/query` endpoint with various questions
|
| 414 |
-
3. Test `/topics` endpoint for insight extraction
|
| 415 |
-
4. Test `/sentiment` endpoint for emotion analysis
|
| 416 |
-
5. Verify counts match CSV data
|
| 417 |
-
|
| 418 |
-
### 👤 **End User**
|
| 419 |
-
1. Get endpoint URL from your ops team
|
| 420 |
-
2. Follow API examples in QUICK_START.md
|
| 421 |
-
3. Use Swagger UI: `/docs` endpoint
|
| 422 |
-
4. Ask questions in Hebrew or English
|
| 423 |
-
5. Get answers via REST API
|
| 424 |
-
|
| 425 |
-
---
|
| 426 |
-
|
| 427 |
-
## 📞 Need Help?
|
| 428 |
-
|
| 429 |
-
### Quick Questions
|
| 430 |
-
- Check: **QUICK_START.md** - Most common issues answered
|
| 431 |
-
- Check: **TESTING_CHECKLIST.md** - Test-specific questions
|
| 432 |
-
|
| 433 |
-
### Deployment Issues
|
| 434 |
-
- Read: **DEPLOYMENT_GUIDE.md** - Troubleshooting section
|
| 435 |
-
- Check: **Runpod documentation** - https://docs.runpod.io
|
| 436 |
-
|
| 437 |
-
### Code Questions
|
| 438 |
-
- Read: **SESSION_SUMMARY.md** - Architecture overview
|
| 439 |
-
- Check: **Module docstrings** - `app/*.py` files have documentation
|
| 440 |
-
|
| 441 |
-
### Still Stuck?
|
| 442 |
-
1. Run validation: `python3 scripts/validate_local.py`
|
| 443 |
-
2. Check error message
|
| 444 |
-
3. Read relevant documentation section
|
| 445 |
-
4. Try workaround from Troubleshooting
|
| 446 |
-
|
| 447 |
-
---
|
| 448 |
-
|
| 449 |
-
## 🏁 Next Steps
|
| 450 |
-
|
| 451 |
-
### After Testing Locally ✅
|
| 452 |
-
1. All 7 validation checks pass
|
| 453 |
-
2. All 5 endpoints respond correctly
|
| 454 |
-
3. Hebrew queries work
|
| 455 |
-
4. Counts are accurate (1168, 352)
|
| 456 |
-
|
| 457 |
-
### Ready to Deploy? 🚀
|
| 458 |
-
1. Follow DEPLOYMENT_GUIDE.md
|
| 459 |
-
2. Build Docker image
|
| 460 |
-
3. Push to Docker registry
|
| 461 |
-
4. Create Runpod endpoint
|
| 462 |
-
5. Test cloud deployment
|
| 463 |
-
|
| 464 |
-
### In Production 📊
|
| 465 |
-
1. Monitor response times
|
| 466 |
-
2. Check error logs
|
| 467 |
-
3. Set up auto-scaling
|
| 468 |
-
4. Configure backups
|
| 469 |
-
5. Plan upgrades
|
| 470 |
-
|
| 471 |
-
---
|
| 472 |
-
|
| 473 |
-
## 📚 Document Reference
|
| 474 |
-
|
| 475 |
-
| Document | Purpose | Read Time |
|
| 476 |
-
|----------|---------|-----------|
|
| 477 |
-
| **This file** | Navigation guide | 5 min |
|
| 478 |
-
| **QUICK_START.md** | Local setup | 10 min |
|
| 479 |
-
| **TESTING_CHECKLIST.md** | Full validation | 30-45 min |
|
| 480 |
-
| **DEPLOYMENT_GUIDE.md** | Cloud deployment | 30-60 min |
|
| 481 |
-
| **SESSION_SUMMARY.md** | Project overview | 10 min |
|
| 482 |
-
| **CONTRIBUTING.md** | Development workflow | 5 min |
|
| 483 |
-
| **README.md** | Full documentation | 20 min |
|
| 484 |
-
|
| 485 |
-
---
|
| 486 |
-
|
| 487 |
-
## ✨ Final Checklist Before Going Live
|
| 488 |
-
|
| 489 |
-
- [ ] Local validation: All 7 checks ✅
|
| 490 |
-
- [ ] All endpoints tested: 5/5 working ✅
|
| 491 |
-
- [ ] Response times acceptable: <3s for query ✅
|
| 492 |
-
- [ ] Hebrew support verified: ✅
|
| 493 |
-
- [ ] Counts accurate: 1168/352/9930 ✅
|
| 494 |
-
- [ ] Error handling works: ✅
|
| 495 |
-
- [ ] Docker image builds: ✅
|
| 496 |
-
- [ ] Runpod deployed successfully: ✅
|
| 497 |
-
- [ ] Cloud endpoint tested: ✅
|
| 498 |
-
- [ ] Monitoring configured: ✅
|
| 499 |
-
- [ ] Documentation complete: ✅
|
| 500 |
-
- [ ] Backups ready: ✅
|
| 501 |
-
|
| 502 |
-
---
|
| 503 |
-
|
| 504 |
-
## 🎉 Ready to Begin?
|
| 505 |
-
|
| 506 |
-
**Start here based on what you want to do:**
|
| 507 |
-
|
| 508 |
-
1. **Just verify it works:** `python3 scripts/validate_local.py` → Start server → Test one endpoint
|
| 509 |
-
2. **Full validation:** Read TESTING_CHECKLIST.md and follow all 15 tests
|
| 510 |
-
3. **Deploy to cloud:** Read DEPLOYMENT_GUIDE.md for step-by-step instructions
|
| 511 |
-
4. **Understand the system:** Read SESSION_SUMMARY.md for architecture details
|
| 512 |
-
|
| 513 |
-
---
|
| 514 |
-
|
| 515 |
-
*Last Updated: Today*
|
| 516 |
-
*Version: 1.0*
|
| 517 |
-
*Status: Production Ready* ✨
|
| 518 |
-
|
| 519 |
-
**Your feedback analysis system is ready to use!** 🚀
|
| 520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SESSION_SUMMARY.md
DELETED
|
@@ -1,371 +0,0 @@
|
|
| 1 |
-
# Session Summary - Feedback Analysis RAG Agent
|
| 2 |
-
|
| 3 |
-
## 🎯 Mission Accomplished
|
| 4 |
-
|
| 5 |
-
You now have a **fully functional, locally-tested Feedback Analysis RAG Agent** that:
|
| 6 |
-
- ✅ Answers diverse question types (counting, keyword search, semantic analysis)
|
| 7 |
-
- ✅ Understands Hebrew queries natively
|
| 8 |
-
- ✅ Provides accurate counts (1168 thank-yous, 352 complaints from 9930 feedback records)
|
| 9 |
-
- ✅ Returns results in the query language
|
| 10 |
-
- ✅ Works locally with comprehensive validation
|
| 11 |
-
- ✅ Preserves cloud deployment capability (Runpod-ready)
|
| 12 |
-
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
## 📦 What Was Delivered
|
| 16 |
-
|
| 17 |
-
### 1. **Core RAG Pipeline** (Production-Ready)
|
| 18 |
-
- Query intent detection (counts vs. semantic search)
|
| 19 |
-
- FAISS vector search with multilingual embeddings
|
| 20 |
-
- Multi-language support (Hebrew, English, etc.)
|
| 21 |
-
- Results with semantic relevance scores
|
| 22 |
-
|
| 23 |
-
### 2. **API Server** (All Endpoints Tested)
|
| 24 |
-
- `/health` - Server status check
|
| 25 |
-
- `/query` - Main RAG endpoint (intent-aware, counts/search)
|
| 26 |
-
- `/topics` - Topic extraction (5 topics by default)
|
| 27 |
-
- `/sentiment` - Sentiment analysis (50-500 records)
|
| 28 |
-
- `/ingest` - Index rebuilding (one-time or maintenance)
|
| 29 |
-
|
| 30 |
-
### 3. **Local Development Setup**
|
| 31 |
-
- Virtual environment configuration
|
| 32 |
-
- All dependencies installed and validated
|
| 33 |
-
- Pre-computed FAISS index (14.5 MB)
|
| 34 |
-
- Metadata storage (parquet format, 450 KB)
|
| 35 |
-
- Environment template (`.env.example`)
|
| 36 |
-
|
| 37 |
-
### 4. **Documentation**
|
| 38 |
-
- **QUICK_START.md** - Setup and local testing in 5 steps
|
| 39 |
-
- **TESTING_CHECKLIST.md** - Comprehensive validation guide (15 tests)
|
| 40 |
-
- **CONTRIBUTING.md** - Git workflow and deployment procedures
|
| 41 |
-
- **Module docstrings** - Every Python file documented
|
| 42 |
-
|
| 43 |
-
### 5. **Validation & Testing**
|
| 44 |
-
- **validate_local.py** - 7-check harness (all PASS ✅)
|
| 45 |
-
1. Dependencies check
|
| 46 |
-
2. CSV validation
|
| 47 |
-
3. FAISS index verification
|
| 48 |
-
4. Python imports test
|
| 49 |
-
5. Analysis logic validation
|
| 50 |
-
6. RAGService integration test
|
| 51 |
-
7. All API endpoints functional test
|
| 52 |
-
|
| 53 |
-
---
|
| 54 |
-
|
| 55 |
-
## 🔧 Technical Specifications
|
| 56 |
-
|
| 57 |
-
### Stack
|
| 58 |
-
- **Framework:** FastAPI 0.115.5 + Uvicorn 0.32.0
|
| 59 |
-
- **Vector DB:** FAISS 1.8.0 (CPU, IndexFlatIP)
|
| 60 |
-
- **Embeddings:** Sentence-Transformers 3.1.1 (paraphrase-multilingual-MiniLM-L12-v2)
|
| 61 |
-
- **Language Detection:** langdetect 1.0.9
|
| 62 |
-
- **Sentiment:** Transformers 4.45.2 (nlptown/bert-base-multilingual-uncased-sentiment)
|
| 63 |
-
- **ML:** scikit-learn 1.5.2 (k-means clustering for topics)
|
| 64 |
-
- **Data:** Pandas 2.2.3, PyArrow 14.0.2
|
| 65 |
-
- **Serialization:** orjson 3.10.7 (handles numpy types)
|
| 66 |
-
|
| 67 |
-
### Data
|
| 68 |
-
- **CSV:** 9930 rows of feedback records
|
| 69 |
-
- **Index:** FAISS binary format (14.5 MB)
|
| 70 |
-
- **Metadata:** Parquet format (450 KB)
|
| 71 |
-
- **Columns:** ID, ServiceName, Level, Text, + 4 others
|
| 72 |
-
|
| 73 |
-
### Performance
|
| 74 |
-
- `/health` endpoint: <10ms
|
| 75 |
-
- `/query` endpoint: 1-3 seconds (first call slower due to model load)
|
| 76 |
-
- `/sentiment` endpoint: 5-15 seconds for 100 records
|
| 77 |
-
- Index rebuild (`/ingest`): 30-60 seconds
|
| 78 |
-
|
| 79 |
-
---
|
| 80 |
-
|
| 81 |
-
## ✨ Key Features
|
| 82 |
-
|
| 83 |
-
### 1. Intent Detection
|
| 84 |
-
The system automatically detects query type:
|
| 85 |
-
- **Count queries:** "כמה משתמשים כתבו תודה?" → Returns count with examples
|
| 86 |
-
- **Complaint queries:** "כמה תלונות?" → Counts complaints
|
| 87 |
-
- **Keyword queries:** "cannabis" → Semantic search
|
| 88 |
-
- **Free-form:** Any other question → RAG-based summarization
|
| 89 |
-
|
| 90 |
-
### 2. Multi-Language Support
|
| 91 |
-
- Queries can be in **Hebrew or English**
|
| 92 |
-
- Responses auto-adapt to query language
|
| 93 |
-
- All text properly encoded (no corruption)
|
| 94 |
-
|
| 95 |
-
### 3. Result Quality
|
| 96 |
-
- Semantic relevance scores (0-1)
|
| 97 |
-
- Top results ranked by similarity
|
| 98 |
-
- Full feedback context (service, level, text)
|
| 99 |
-
- Accurate counting (validated against CSV)
|
| 100 |
-
|
| 101 |
-
### 4. Error Handling
|
| 102 |
-
- Clear error messages for invalid requests
|
| 103 |
-
- HTTP status codes (200, 400, 422, 500)
|
| 104 |
-
- Graceful fallbacks (if model fails, returns mock data)
|
| 105 |
-
|
| 106 |
-
---
|
| 107 |
-
|
| 108 |
-
## 📋 Validation Results
|
| 109 |
-
|
| 110 |
-
**Last Run:** ✅ ALL 7 CHECKS PASSED
|
| 111 |
-
|
| 112 |
-
```
|
| 113 |
-
[PASS] Dependencies - All 15+ packages installed
|
| 114 |
-
[PASS] CSV file - 9930 rows, 8 columns
|
| 115 |
-
[PASS] FAISS Index - 14.5 MB ready
|
| 116 |
-
[PASS] App imports - No import errors
|
| 117 |
-
[PASS] Analysis logic - Thanks: 1168 ✓, Complaints: 352 ✓
|
| 118 |
-
[PASS] RAGService - Query endpoint functional
|
| 119 |
-
[PASS] API endpoints - All 5 endpoints responding
|
| 120 |
-
|
| 121 |
-
Status: Ready for local testing
|
| 122 |
-
```
|
| 123 |
-
|
| 124 |
-
---
|
| 125 |
-
|
| 126 |
-
## 🚀 Quick Start (For Your Testing)
|
| 127 |
-
|
| 128 |
-
### Step 1: Activate Environment
|
| 129 |
-
```bash
|
| 130 |
-
cd /Users/galbd/Desktop/personal/software/ai_agent_gov/Feedback_Analysis_RAG_Agent_runpod
|
| 131 |
-
source .venv/bin/activate
|
| 132 |
-
```
|
| 133 |
-
|
| 134 |
-
### Step 2: Validate Everything
|
| 135 |
-
```bash
|
| 136 |
-
python3 scripts/validate_local.py
|
| 137 |
-
```
|
| 138 |
-
*(Should show: All 7 checks PASSED)*
|
| 139 |
-
|
| 140 |
-
### Step 3: Start Server
|
| 141 |
-
```bash
|
| 142 |
-
python3 run.py
|
| 143 |
-
```
|
| 144 |
-
*(Will show: "Uvicorn running on http://0.0.0.0:8000")*
|
| 145 |
-
|
| 146 |
-
### Step 4: Test via Browser or curl
|
| 147 |
-
**Browser:** http://localhost:8000/docs (interactive Swagger UI)
|
| 148 |
-
|
| 149 |
-
**curl example:**
|
| 150 |
-
```bash
|
| 151 |
-
curl -X POST http://localhost:8000/query \
|
| 152 |
-
-H "Content-Type: application/json" \
|
| 153 |
-
-d '{"query":"כמה משתמשים כתבו תודה","top_k":5}'
|
| 154 |
-
```
|
| 155 |
-
|
| 156 |
-
### Step 5: Run Full Test Suite
|
| 157 |
-
See **TESTING_CHECKLIST.md** for 15 comprehensive tests.
|
| 158 |
-
|
| 159 |
-
---
|
| 160 |
-
|
| 161 |
-
## 📁 Project Structure
|
| 162 |
-
|
| 163 |
-
```
|
| 164 |
-
Feedback_Analysis_RAG_Agent_runpod/
|
| 165 |
-
├── app/ # Main application
|
| 166 |
-
│ ├── __init__.py # Package init with docstring
|
| 167 |
-
│ ├── api.py # FastAPI endpoints (all POST)
|
| 168 |
-
│ ├── config.py # Configuration & settings
|
| 169 |
-
│ ├── rag_service.py # RAG orchestration (answer() method)
|
| 170 |
-
│ ├── analysis.py # Query intent detection & counting
|
| 171 |
-
│ ├── embedding.py # Sentence-Transformers wrapper
|
| 172 |
-
│ ├── vector_store.py # FAISS interface
|
| 173 |
-
│ ├── sentiment.py # Sentiment analysis pipeline
|
| 174 |
-
│ ├── preprocess.py # Text preprocessing
|
| 175 |
-
│ ├── data_loader.py # CSV loading & caching
|
| 176 |
-
│ ├── topics.py # Topic clustering (k-means)
|
| 177 |
-
│ └── __pycache__/
|
| 178 |
-
│
|
| 179 |
-
├── scripts/
|
| 180 |
-
│ ├── validate_local.py # 7-check validation harness ✅
|
| 181 |
-
│ ├── precompute_index.py # Build index offline
|
| 182 |
-
│ ├── test_queries.py # Manual query testing
|
| 183 |
-
│ └── __pycache__/
|
| 184 |
-
│
|
| 185 |
-
├── .vector_index/
|
| 186 |
-
│ ├── faiss.index # FAISS index binary (14.5 MB)
|
| 187 |
-
│ └── meta.parquet # Metadata (450 KB)
|
| 188 |
-
│
|
| 189 |
-
├── .venv/ # Python virtual environment
|
| 190 |
-
│
|
| 191 |
-
├── Dockerfile # Container definition
|
| 192 |
-
├── docker-compose.yml # Local docker-compose (optional)
|
| 193 |
-
├── requirements.txt # Python dependencies (25 packages)
|
| 194 |
-
├── run.py # Server entrypoint
|
| 195 |
-
├── Feedback.csv # Sample data (9930 rows)
|
| 196 |
-
│
|
| 197 |
-
├── QUICK_START.md # 5-step local setup guide ✅
|
| 198 |
-
├── TESTING_CHECKLIST.md # 15-test validation guide ✅
|
| 199 |
-
├── CONTRIBUTING.md # Git workflow & deployment ✅
|
| 200 |
-
├── README.md # Full documentation
|
| 201 |
-
├── VERSION # Version file (0.1.0)
|
| 202 |
-
├── .env.example # Environment template ✅
|
| 203 |
-
│
|
| 204 |
-
├── .git/ # Git repository
|
| 205 |
-
└── .gitignore
|
| 206 |
-
|
| 207 |
-
```
|
| 208 |
-
|
| 209 |
-
---
|
| 210 |
-
|
| 211 |
-
## 📝 Recent Changes (This Session)
|
| 212 |
-
|
| 213 |
-
### Fixed Issues
|
| 214 |
-
1. ✅ **Missing tiktoken dependency** - Added to requirements.txt
|
| 215 |
-
2. ✅ **Sentiment model compatibility** - Switched to nlptown/bert-base-multilingual-uncased-sentiment (more compatible)
|
| 216 |
-
3. ✅ **Numpy serialization** - All endpoints use ORJSONResponse + float/str conversions
|
| 217 |
-
4. ✅ **Validation failures** - Now all 7 checks pass
|
| 218 |
-
|
| 219 |
-
### Files Modified
|
| 220 |
-
- `requirements.txt` - Added `tiktoken==0.7.0`
|
| 221 |
-
- `app/sentiment.py` - Improved model loading with fallbacks
|
| 222 |
-
- `TESTING_CHECKLIST.md` - Created (comprehensive guide)
|
| 223 |
-
|
| 224 |
-
### Files Created
|
| 225 |
-
- `TESTING_CHECKLIST.md` - 15-step testing guide
|
| 226 |
-
- `SESSION_SUMMARY.md` - This file
|
| 227 |
-
|
| 228 |
-
---
|
| 229 |
-
|
| 230 |
-
## 🧪 Testing Approach
|
| 231 |
-
|
| 232 |
-
The system has **3 layers of validation:**
|
| 233 |
-
|
| 234 |
-
### Layer 1: Unit/Component Tests
|
| 235 |
-
- CSV format validation
|
| 236 |
-
- Index integrity check
|
| 237 |
-
- Import verification
|
| 238 |
-
- Individual module testing
|
| 239 |
-
|
| 240 |
-
### Layer 2: Integration Tests
|
| 241 |
-
- RAGService end-to-end
|
| 242 |
-
- API endpoint responses
|
| 243 |
-
- Intent detection accuracy
|
| 244 |
-
|
| 245 |
-
### Layer 3: End-to-End Tests
|
| 246 |
-
- Manual curl commands
|
| 247 |
-
- Browser (Swagger UI) testing
|
| 248 |
-
- Multiple query types
|
| 249 |
-
- Performance benchmarks
|
| 250 |
-
|
| 251 |
-
---
|
| 252 |
-
|
| 253 |
-
## 🔒 Data Safety
|
| 254 |
-
|
| 255 |
-
- **Original CSV untouched** - No modifications to source data
|
| 256 |
-
- **Index cached locally** - Pre-computed FAISS index (doesn't rebuild on every start)
|
| 257 |
-
- **Metadata preserved** - Service names, levels, full text stored in parquet
|
| 258 |
-
- **No data uploaded** - All processing local (unless LLM API is used for summaries)
|
| 259 |
-
- **Secure defaults** - `.env.example` template (no real keys committed)
|
| 260 |
-
|
| 261 |
-
---
|
| 262 |
-
|
| 263 |
-
## 🌐 Deployment Ready
|
| 264 |
-
|
| 265 |
-
### For Local Testing
|
| 266 |
-
✅ Everything ready - just run `python3 run.py`
|
| 267 |
-
|
| 268 |
-
### For Runpod Cloud Deployment
|
| 269 |
-
✅ Dockerfile preserved and functional
|
| 270 |
-
- All code changes are local-only
|
| 271 |
-
- Docker image builds without issues
|
| 272 |
-
- Runpod instructions in README.md
|
| 273 |
-
|
| 274 |
-
### For Multiple Environments
|
| 275 |
-
✅ Configuration via environment variables
|
| 276 |
-
- Model paths configurable
|
| 277 |
-
- API keys optional (LLM summaries use cached models if keys unavailable)
|
| 278 |
-
- Port/host configurable
|
| 279 |
-
|
| 280 |
-
---
|
| 281 |
-
|
| 282 |
-
## 📊 Test Coverage
|
| 283 |
-
|
| 284 |
-
| Component | Tests | Status |
|
| 285 |
-
|-----------|-------|--------|
|
| 286 |
-
| Dependencies | 1 | ✅ PASS |
|
| 287 |
-
| CSV Data | 1 | ✅ PASS |
|
| 288 |
-
| FAISS Index | 1 | ✅ PASS |
|
| 289 |
-
| Python Imports | 1 | ✅ PASS |
|
| 290 |
-
| Analysis Logic | 1 | ✅ PASS (1168/352 verified) |
|
| 291 |
-
| RAGService | 1 | ✅ PASS |
|
| 292 |
-
| API Endpoints | 5 | ✅ PASS |
|
| 293 |
-
| **Total** | **15** | **✅ ALL PASS** |
|
| 294 |
-
|
| 295 |
-
---
|
| 296 |
-
|
| 297 |
-
## 🎓 What You Can Now Do
|
| 298 |
-
|
| 299 |
-
### As a Developer
|
| 300 |
-
1. **Run locally** - Full server on your machine
|
| 301 |
-
2. **Debug code** - Step through Python with your IDE
|
| 302 |
-
3. **Modify queries** - Test different intent detection logic
|
| 303 |
-
4. **Add features** - Topics, sentiment, custom analysis
|
| 304 |
-
5. **Understand RAG** - See how embeddings + retrieval works
|
| 305 |
-
|
| 306 |
-
### As a User
|
| 307 |
-
1. **Ask questions** - Count thank-yous, find complaints, search topics
|
| 308 |
-
2. **Get answers** - In Hebrew or English
|
| 309 |
-
3. **Analyze data** - Topics, sentiment, patterns
|
| 310 |
-
4. **Export results** - JSON format, easily integrated
|
| 311 |
-
|
| 312 |
-
### For Deployment
|
| 313 |
-
1. **Test thoroughly** - Use TESTING_CHECKLIST.md
|
| 314 |
-
2. **Deploy locally** - Docker compose available
|
| 315 |
-
3. **Deploy to cloud** - Runpod ready with Dockerfile
|
| 316 |
-
4. **Monitor performance** - Response times, error rates
|
| 317 |
-
|
| 318 |
-
---
|
| 319 |
-
|
| 320 |
-
## ⚠️ Known Limitations & Workarounds
|
| 321 |
-
|
| 322 |
-
| Issue | Impact | Workaround |
|
| 323 |
-
|-------|--------|-----------|
|
| 324 |
-
| First request slow (model download) | 10-30s first time | Runs in background, cached after |
|
| 325 |
-
| Sentiment model download large | ~500MB | Pre-download with precompute script |
|
| 326 |
-
| Hebrew counting requires specific keywords | May miss some variations | Extend keywords in `analysis.py` |
|
| 327 |
-
| No persistent server logs | Can't audit old requests | Add file logging if needed |
|
| 328 |
-
|
| 329 |
-
---
|
| 330 |
-
|
| 331 |
-
## 🚀 Next Steps for You
|
| 332 |
-
|
| 333 |
-
1. **Start the server:** `python3 run.py`
|
| 334 |
-
2. **Open Swagger UI:** http://localhost:8000/docs
|
| 335 |
-
3. **Run test checklist:** Use TESTING_CHECKLIST.md (15 tests)
|
| 336 |
-
4. **Validate counts:** Confirm 1168 thanks, 352 complaints
|
| 337 |
-
5. **Deploy to Runpod:** When satisfied, follow README.md
|
| 338 |
-
|
| 339 |
-
---
|
| 340 |
-
|
| 341 |
-
## 📞 Support Resources
|
| 342 |
-
|
| 343 |
-
- **QUICK_START.md** - How to set up and run
|
| 344 |
-
- **TESTING_CHECKLIST.md** - How to validate
|
| 345 |
-
- **CONTRIBUTING.md** - How to deploy
|
| 346 |
-
- **README.md** - Full documentation
|
| 347 |
-
- **Swagger UI** - Interactive API docs (http://localhost:8000/docs)
|
| 348 |
-
|
| 349 |
-
---
|
| 350 |
-
|
| 351 |
-
## ✅ Sign-Off
|
| 352 |
-
|
| 353 |
-
**Status:** ✅ **READY FOR YOUR TESTING**
|
| 354 |
-
|
| 355 |
-
- All code complete and functional
|
| 356 |
-
- All validation checks passing
|
| 357 |
-
- Documentation comprehensive
|
| 358 |
-
- Local environment configured
|
| 359 |
-
- Cloud deployment prepared
|
| 360 |
-
- Performance acceptable
|
| 361 |
-
|
| 362 |
-
**Next action:** Start the server and run through TESTING_CHECKLIST.md
|
| 363 |
-
|
| 364 |
-
**Estimated testing time:** 30-45 minutes (full suite) or 10-15 minutes (quick smoke test)
|
| 365 |
-
|
| 366 |
-
---
|
| 367 |
-
|
| 368 |
-
*Generated: Today*
|
| 369 |
-
*Session: Feedback Analysis RAG Agent - Complete Implementation*
|
| 370 |
-
*Status: Production Ready ✨*
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SQL_APPROACH_README.md
DELETED
|
@@ -1,172 +0,0 @@
|
|
| 1 |
-
# SQL-Based Approach - מדריך
|
| 2 |
-
|
| 3 |
-
## סקירה כללית
|
| 4 |
-
|
| 5 |
-
גישה חדשה לניתוח משובי משתמשים המבוססת על SQL queries שנוצרות אוטומטית על ידי LLM.
|
| 6 |
-
|
| 7 |
-
## ארכיטקטורה
|
| 8 |
-
|
| 9 |
-
### 1. ניתוח שאילתת המשתמש
|
| 10 |
-
- המשתמש שואל שאלה בעברית או באנגלית
|
| 11 |
-
- השאלה נשלחת ל-LLM לניתוח
|
| 12 |
-
|
| 13 |
-
### 2. יצירת שאילתות SQL (1-5 שאילתות)
|
| 14 |
-
**מיקום:** `app/sql_service.py` - `_generate_sql_queries()`
|
| 15 |
-
|
| 16 |
-
המערכת משתמשת ב-LLM (Gemini או OpenAI) כדי ליצור 1-5 שאילתות SQL שיעזרו לענות על השאלה.
|
| 17 |
-
|
| 18 |
-
**הפרומפט כולל:**
|
| 19 |
-
- מידע על מבנה הטבלה (ID, ServiceName, Level, Text)
|
| 20 |
-
- סטטיסטיקות כלליות
|
| 21 |
-
- כללי SQLite
|
| 22 |
-
- הוראות ליצירת שאילתות תקפות
|
| 23 |
-
|
| 24 |
-
**דוגמאות לשאילתות שנוצרות:**
|
| 25 |
-
```sql
|
| 26 |
-
SELECT ServiceName, COUNT(*) as count, AVG(Level) as avg_level
|
| 27 |
-
FROM feedback
|
| 28 |
-
GROUP BY ServiceName
|
| 29 |
-
ORDER BY count DESC
|
| 30 |
-
LIMIT 10;
|
| 31 |
-
|
| 32 |
-
SELECT Level, COUNT(*) as count
|
| 33 |
-
FROM feedback
|
| 34 |
-
WHERE Level < 3
|
| 35 |
-
GROUP BY Level;
|
| 36 |
-
```
|
| 37 |
-
|
| 38 |
-
### 3. הרצת השאילתות
|
| 39 |
-
**מיקום:** `app/sql_service.py` - `_execute_sql_queries()`
|
| 40 |
-
|
| 41 |
-
- יוצר SQLite in-memory database
|
| 42 |
-
- טוען את ה-DataFrame לטבלה `feedback`
|
| 43 |
-
- מריץ כל שאילתה ומחזיר תוצאות או שגיאות
|
| 44 |
-
|
| 45 |
-
### 4. יצירת תשובה מסכמת
|
| 46 |
-
**מיקום:** `app/sql_service.py` - `_synthesize_answer()`
|
| 47 |
-
|
| 48 |
-
המערכת משתמשת ב-LLM (בתפקיד אנליסט עסקי במשרד הפנים) כדי ליצור תשובה מסכמת.
|
| 49 |
-
|
| 50 |
-
**הפרומפט כולל:**
|
| 51 |
-
- שאלת המשתמש
|
| 52 |
-
- השאילתות שבוצעו
|
| 53 |
-
- התוצאות של כל שאילתה
|
| 54 |
-
- הוראות לכתיבת תשובה מפורטת ומקצועית
|
| 55 |
-
|
| 56 |
-
**דרישות לתשובה:**
|
| 57 |
-
- 5-7 פסקאות, 400-600 מילים
|
| 58 |
-
- מספרים מדויקים מהתוצאות
|
| 59 |
-
- תובנות עסקיות והמלצות מעשיות
|
| 60 |
-
- מבנה ברור: פתיחה, ניתוח, תובנות, סיכום
|
| 61 |
-
|
| 62 |
-
### 5. יצירת ויזואליזציות (אופציונלי)
|
| 63 |
-
**מיקום:** `app/sql_service.py` - `_generate_visualizations()`
|
| 64 |
-
|
| 65 |
-
המערכת מנתחת את תוצאות השאילתות ויוצרת מפרטי ויזואליזציות:
|
| 66 |
-
|
| 67 |
-
**סוגי גרפיקות נתמכים:**
|
| 68 |
-
- **Bar Chart** - לנתונים קטגוריאליים עם ערכים מספריים
|
| 69 |
-
- **Line Chart** - לנתוני זמן או מגמות
|
| 70 |
-
- **Scatter Plot** - לשני משתנים מספריים
|
| 71 |
-
- **Histogram** - להפצת ערכים מספריים
|
| 72 |
-
|
| 73 |
-
**Frontend:** משתמש ב-Chart.js להצגת הגרפיקות
|
| 74 |
-
|
| 75 |
-
## שימוש
|
| 76 |
-
|
| 77 |
-
### דרך API
|
| 78 |
-
|
| 79 |
-
```bash
|
| 80 |
-
POST /query-sql
|
| 81 |
-
Content-Type: application/json
|
| 82 |
-
|
| 83 |
-
{
|
| 84 |
-
"query": "איך המשתמשים מרגישים כלפי השירות?",
|
| 85 |
-
"top_k": 5
|
| 86 |
-
}
|
| 87 |
-
```
|
| 88 |
-
|
| 89 |
-
**תגובה:**
|
| 90 |
-
```json
|
| 91 |
-
{
|
| 92 |
-
"query": "איך המשתמשים מרגישים כלפי השירות?",
|
| 93 |
-
"summary": "תשובה מסכמת מפורטת...",
|
| 94 |
-
"sql_queries": [
|
| 95 |
-
"SELECT Level, COUNT(*) FROM feedback GROUP BY Level",
|
| 96 |
-
"..."
|
| 97 |
-
],
|
| 98 |
-
"query_results": [
|
| 99 |
-
{
|
| 100 |
-
"query": "SELECT ...",
|
| 101 |
-
"result": [...],
|
| 102 |
-
"error": null,
|
| 103 |
-
"row_count": 10
|
| 104 |
-
}
|
| 105 |
-
],
|
| 106 |
-
"visualizations": [
|
| 107 |
-
{
|
| 108 |
-
"type": "bar",
|
| 109 |
-
"title": "תוצאה של שאילתה 1",
|
| 110 |
-
"x": "Level",
|
| 111 |
-
"y": "count",
|
| 112 |
-
"data": [...]
|
| 113 |
-
}
|
| 114 |
-
]
|
| 115 |
-
}
|
| 116 |
-
```
|
| 117 |
-
|
| 118 |
-
### דרך Frontend
|
| 119 |
-
|
| 120 |
-
1. פתח את הדפדפן: `http://127.0.0.1:8000`
|
| 121 |
-
2. בחר "SQL-based (מומלץ - חדש)"
|
| 122 |
-
3. הזן שאלה
|
| 123 |
-
4. לחץ על "שאל"
|
| 124 |
-
5. התוצאות יוצגו עם:
|
| 125 |
-
- תשובה מסכמת
|
| 126 |
-
- שאילתות SQL שבוצעו (אם מסומן "הצג דוגמאות מהנתונים")
|
| 127 |
-
- תוצאות השאילתות
|
| 128 |
-
- גרפיקות אוטומטיות
|
| 129 |
-
|
| 130 |
-
## יתרונות הגישה החדשה
|
| 131 |
-
|
| 132 |
-
1. **דיוק גבוה** - שאילתות SQL מדויקות יותר מ-RAG
|
| 133 |
-
2. **שקיפות** - המשתמש רואה בדיוק אילו שאילתות בוצעו
|
| 134 |
-
3. **גמישות** - יכול לענות על שאלות מורכבות עם מספר שאילתות
|
| 135 |
-
4. **ויזואליזציות** - גרפיקות אוטומטיות של התוצאות
|
| 136 |
-
5. **מהירות** - שאילתות SQL מהירות יותר מ-RAG
|
| 137 |
-
|
| 138 |
-
## השוואה לגישה הישנה (RAG)
|
| 139 |
-
|
| 140 |
-
| תכונה | RAG | SQL-based |
|
| 141 |
-
|------|-----|-----------|
|
| 142 |
-
| דיוק | בינוני | גבוה |
|
| 143 |
-
| שקיפות | נמוכה | גבוהה |
|
| 144 |
-
| מהירות | איטית יותר | מהירה יותר |
|
| 145 |
-
| גמישות | מוגבלת | גבוהה |
|
| 146 |
-
| ויזואליזציות | לא | כן |
|
| 147 |
-
|
| 148 |
-
## הגדרות LLM
|
| 149 |
-
|
| 150 |
-
### יצירת שאילתות SQL
|
| 151 |
-
- **Temperature:** 0.3 (נמוך לדיוק)
|
| 152 |
-
- **Model:** Gemini 1.5 Flash או GPT-4o-mini
|
| 153 |
-
|
| 154 |
-
### יצירת תשובה מסכמת
|
| 155 |
-
- **Temperature:** 0.8 (גבוה ליצירתיות)
|
| 156 |
-
- **Top_p:** 0.95
|
| 157 |
-
- **Max tokens:** 4000 (Gemini) / 3000 (OpenAI)
|
| 158 |
-
|
| 159 |
-
## קבצים רלוונטי��ם
|
| 160 |
-
|
| 161 |
-
- `app/sql_service.py` - הלוגיקה הראשית
|
| 162 |
-
- `app/api.py` - endpoint `/query-sql`
|
| 163 |
-
- `app/static/app.js` - תמיכה frontend בגרפיקות
|
| 164 |
-
- `app/static/index.html` - ממשק משתמש
|
| 165 |
-
|
| 166 |
-
## הערות טכניות
|
| 167 |
-
|
| 168 |
-
- המערכת משתמשת ב-SQLite in-memory database
|
| 169 |
-
- הנתונים נטענים פעם אחת בתחילת הפעלה
|
| 170 |
-
- השאילתות רצות על DataFrame דרך SQLite
|
| 171 |
-
- הגרפיקות נוצרות אוטומטית על בסיס מבנה התוצאות
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
STATUS_REPORT.md
DELETED
|
@@ -1,501 +0,0 @@
|
|
| 1 |
-
# 📊 Project Status Report - November 12, 2025
|
| 2 |
-
|
| 3 |
-
## ✅ COMPLETION STATUS: 100%
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## 🎯 Project Objectives - ALL ACHIEVED
|
| 8 |
-
|
| 9 |
-
### Original Requirements
|
| 10 |
-
- [x] RAG agent answers diverse question types ✅
|
| 11 |
-
- [x] Intent detection for counting queries ✅
|
| 12 |
-
- [x] Multi-language support (Hebrew + English) ✅
|
| 13 |
-
- [x] Local development setup complete ✅
|
| 14 |
-
- [x] Cloud deployment ready (Runpod) ✅
|
| 15 |
-
- [x] Comprehensive validation testing ✅
|
| 16 |
-
- [x] Complete documentation ✅
|
| 17 |
-
|
| 18 |
-
---
|
| 19 |
-
|
| 20 |
-
## 📦 Deliverables
|
| 21 |
-
|
| 22 |
-
### Core System (7 files in `app/`)
|
| 23 |
-
```
|
| 24 |
-
✅ api.py - 5 POST endpoints, all tested
|
| 25 |
-
✅ rag_service.py - RAG orchestration with intent detection
|
| 26 |
-
✅ analysis.py - Query intent + counting logic
|
| 27 |
-
✅ embedding.py - Sentence-Transformers wrapper
|
| 28 |
-
✅ vector_store.py - FAISS interface
|
| 29 |
-
✅ sentiment.py - Multi-language sentiment analysis
|
| 30 |
-
✅ config.py - Configuration management
|
| 31 |
-
+ 4 support modules - (preprocess, data_loader, topics, __init__)
|
| 32 |
-
```
|
| 33 |
-
|
| 34 |
-
### Validation & Testing (3 files in `scripts/`)
|
| 35 |
-
```
|
| 36 |
-
✅ validate_local.py - 7-check validation harness (ALL PASS)
|
| 37 |
-
✅ precompute_index.py - FAISS index builder
|
| 38 |
-
✅ test_queries.py - Manual query testing
|
| 39 |
-
```
|
| 40 |
-
|
| 41 |
-
### Documentation (6 comprehensive guides)
|
| 42 |
-
```
|
| 43 |
-
✅ README_TESTING_GUIDE.md - Master navigation guide (THIS IS YOUR START)
|
| 44 |
-
✅ QUICK_START.md - 5-step local setup
|
| 45 |
-
✅ TESTING_CHECKLIST.md - 15-point comprehensive test suite
|
| 46 |
-
✅ DEPLOYMENT_GUIDE.md - Runpod cloud deployment
|
| 47 |
-
✅ SESSION_SUMMARY.md - Project overview & architecture
|
| 48 |
-
✅ CONTRIBUTING.md - Git workflow & development
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
### Infrastructure
|
| 52 |
-
```
|
| 53 |
-
✅ requirements.txt - 26 dependencies (all installed & working)
|
| 54 |
-
✅ Dockerfile - Production-ready container
|
| 55 |
-
✅ docker-compose.yml - Local development (optional)
|
| 56 |
-
✅ .env.example - Configuration template
|
| 57 |
-
✅ VERSION - Version tracking (0.1.0)
|
| 58 |
-
✅ run.py - Server entrypoint
|
| 59 |
-
```
|
| 60 |
-
|
| 61 |
-
### Data & Index
|
| 62 |
-
```
|
| 63 |
-
✅ Feedback.csv - 9930 rows of feedback
|
| 64 |
-
✅ .vector_index/ - FAISS index (14.5 MB) + metadata (450 KB)
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
---
|
| 68 |
-
|
| 69 |
-
## 🧪 Validation Results
|
| 70 |
-
|
| 71 |
-
### Last Validation Run
|
| 72 |
-
```
|
| 73 |
-
Date: November 12, 2025
|
| 74 |
-
Command: python3 scripts/validate_local.py
|
| 75 |
-
Status: ✅ ALL 7 CHECKS PASSED
|
| 76 |
-
```
|
| 77 |
-
|
| 78 |
-
**Detailed Results:**
|
| 79 |
-
```
|
| 80 |
-
[PASS] ✅ Dependencies - 26/26 packages installed
|
| 81 |
-
[PASS] ✅ CSV file - 9930 rows, 8 columns verified
|
| 82 |
-
[PASS] ✅ FAISS Index - 14.5 MB ready for use
|
| 83 |
-
[PASS] ✅ App imports - No import errors
|
| 84 |
-
[PASS] ✅ Analysis logic - Thanks: 1168 ✓, Complaints: 352 ✓
|
| 85 |
-
[PASS] ✅ RAGService - Query endpoint functional
|
| 86 |
-
[PASS] ✅ API endpoints - All 5 endpoints responding correctly
|
| 87 |
-
|
| 88 |
-
Execution Time: ~2 minutes
|
| 89 |
-
Status: READY FOR PRODUCTION
|
| 90 |
-
```
|
| 91 |
-
|
| 92 |
-
---
|
| 93 |
-
|
| 94 |
-
## 🚀 API Endpoints - All Functional
|
| 95 |
-
|
| 96 |
-
| Endpoint | Method | Status | Response Time | Purpose |
|
| 97 |
-
|----------|--------|--------|----------------|---------|
|
| 98 |
-
| `/health` | POST | ✅ | <10ms | Server health check |
|
| 99 |
-
| `/query` | POST | ✅ | 1-3s | Main RAG endpoint |
|
| 100 |
-
| `/topics` | POST | ✅ | 5-10s | Topic extraction |
|
| 101 |
-
| `/sentiment` | POST | ✅ | 5-15s | Sentiment analysis |
|
| 102 |
-
| `/ingest` | POST | ✅ | 30-60s | Index rebuilding |
|
| 103 |
-
| `/docs` | GET | ✅ | <100ms | Swagger UI (interactive) |
|
| 104 |
-
| `/redoc` | GET | ✅ | <100ms | ReDoc (alternative docs) |
|
| 105 |
-
|
| 106 |
-
---
|
| 107 |
-
|
| 108 |
-
## 📊 Feature Matrix
|
| 109 |
-
|
| 110 |
-
### Query Processing
|
| 111 |
-
```
|
| 112 |
-
✅ Intent Detection
|
| 113 |
-
- Count thank-yous ............................ WORKING
|
| 114 |
-
- Count complaints ............................ WORKING
|
| 115 |
-
- Keyword search ............................. WORKING
|
| 116 |
-
- Free-form RAG questions .................... WORKING
|
| 117 |
-
|
| 118 |
-
✅ Multi-Language Support
|
| 119 |
-
- Hebrew queries ............................ WORKING
|
| 120 |
-
- English queries ........................... WORKING
|
| 121 |
-
- Language auto-detection ................... WORKING
|
| 122 |
-
- Response language matching ............... WORKING
|
| 123 |
-
|
| 124 |
-
✅ Retrieval Accuracy
|
| 125 |
-
- Semantic search scores ................... ACCURATE
|
| 126 |
-
- Top-K ranking ............................ VERIFIED
|
| 127 |
-
- Count validation (1168/352) .............. VERIFIED
|
| 128 |
-
```
|
| 129 |
-
|
| 130 |
-
### Performance
|
| 131 |
-
```
|
| 132 |
-
✅ Response Times
|
| 133 |
-
- Health check: <10ms ...................... EXCELLENT
|
| 134 |
-
- Query endpoint: 1-3s ..................... GOOD
|
| 135 |
-
- Sentiment: 5-15s per 100 ................ ACCEPTABLE
|
| 136 |
-
- Index rebuild: 30-60s ................... ACCEPTABLE
|
| 137 |
-
|
| 138 |
-
✅ Scalability
|
| 139 |
-
- Concurrent requests ..................... TESTED
|
| 140 |
-
- Auto-scaling ready ...................... CONFIGURED
|
| 141 |
-
- Memory efficient ........................ VERIFIED
|
| 142 |
-
```
|
| 143 |
-
|
| 144 |
-
### Reliability
|
| 145 |
-
```
|
| 146 |
-
✅ Error Handling
|
| 147 |
-
- Invalid JSON ............................ HANDLED
|
| 148 |
-
- Missing fields .......................... HANDLED
|
| 149 |
-
- Type errors ............................ HANDLED
|
| 150 |
-
- Model failures ......................... HANDLED
|
| 151 |
-
|
| 152 |
-
✅ Data Integrity
|
| 153 |
-
- CSV validation ......................... VERIFIED
|
| 154 |
-
- Index integrity ........................ VERIFIED
|
| 155 |
-
- No data loss ........................... CONFIRMED
|
| 156 |
-
```
|
| 157 |
-
|
| 158 |
-
---
|
| 159 |
-
|
| 160 |
-
## 📈 Test Coverage
|
| 161 |
-
|
| 162 |
-
```
|
| 163 |
-
Layer 1: Unit Tests
|
| 164 |
-
├─ CSV validation ................................. ✅ PASS
|
| 165 |
-
├─ Index integrity ................................ ✅ PASS
|
| 166 |
-
├─ Import verification ........................... ✅ PASS
|
| 167 |
-
└─ Individual modules ............................ ✅ PASS
|
| 168 |
-
|
| 169 |
-
Layer 2: Integration Tests
|
| 170 |
-
├─ RAGService pipeline ........................... ✅ PASS
|
| 171 |
-
├─ API endpoint responses ........................ ✅ PASS
|
| 172 |
-
└─ Intent detection accuracy ..................... ✅ PASS
|
| 173 |
-
|
| 174 |
-
Layer 3: End-to-End Tests
|
| 175 |
-
├─ Manual curl commands .......................... ✅ PASS
|
| 176 |
-
├─ Browser (Swagger UI) testing ................. ✅ PASS
|
| 177 |
-
├─ Multiple query types ......................... ✅ PASS
|
| 178 |
-
└─ Performance benchmarks ........................ ✅ PASS
|
| 179 |
-
```
|
| 180 |
-
|
| 181 |
-
**Overall Coverage:** 15/15 tests passing (100%)
|
| 182 |
-
|
| 183 |
-
---
|
| 184 |
-
|
| 185 |
-
## 🔧 Technical Stack - Verified Working
|
| 186 |
-
|
| 187 |
-
```
|
| 188 |
-
✅ Framework: FastAPI 0.115.5 + Uvicorn 0.32.0
|
| 189 |
-
✅ ML: Transformers 4.45.2 + PyTorch 2.4.1
|
| 190 |
-
✅ Embeddings: Sentence-Transformers 3.1.1
|
| 191 |
-
✅ Vector DB: FAISS 1.8.0 (CPU)
|
| 192 |
-
✅ Data: Pandas 2.2.3 + PyArrow 14.0.2
|
| 193 |
-
✅ Language: langdetect 1.0.9
|
| 194 |
-
✅ Config: Pydantic 2.9.2 + python-dotenv 1.0.1
|
| 195 |
-
✅ Serialization: orjson 3.10.7
|
| 196 |
-
✅ LLM APIs: Google Generative AI + OpenAI (optional)
|
| 197 |
-
```
|
| 198 |
-
|
| 199 |
-
---
|
| 200 |
-
|
| 201 |
-
## 📚 Documentation Quality
|
| 202 |
-
|
| 203 |
-
### User Guides
|
| 204 |
-
```
|
| 205 |
-
✅ README_TESTING_GUIDE.md - 500+ lines, comprehensive navigation
|
| 206 |
-
✅ QUICK_START.md - 400+ lines, step-by-step setup
|
| 207 |
-
✅ TESTING_CHECKLIST.md - 400+ lines, 15-point validation
|
| 208 |
-
✅ DEPLOYMENT_GUIDE.md - 470+ lines, Runpod instructions
|
| 209 |
-
```
|
| 210 |
-
|
| 211 |
-
### Technical Documentation
|
| 212 |
-
```
|
| 213 |
-
✅ SESSION_SUMMARY.md - 400+ lines, architecture overview
|
| 214 |
-
✅ CONTRIBUTING.md - 150+ lines, development workflow
|
| 215 |
-
✅ Module docstrings - Every Python file documented
|
| 216 |
-
✅ Inline comments - Complex logic explained
|
| 217 |
-
```
|
| 218 |
-
|
| 219 |
-
### Code Quality
|
| 220 |
-
```
|
| 221 |
-
✅ Type hints - Pydantic models for all API inputs/outputs
|
| 222 |
-
✅ Error messages - Clear, actionable error descriptions
|
| 223 |
-
✅ Configuration - Centralized in app/config.py
|
| 224 |
-
✅ Logging - Info/warning/error levels
|
| 225 |
-
```
|
| 226 |
-
|
| 227 |
-
---
|
| 228 |
-
|
| 229 |
-
## 🌐 Deployment Readiness
|
| 230 |
-
|
| 231 |
-
### Local Development
|
| 232 |
-
```
|
| 233 |
-
✅ Virtual environment - Configured (.venv/)
|
| 234 |
-
✅ Dependencies - All installed (26 packages)
|
| 235 |
-
✅ Configuration - Environment template (.env.example)
|
| 236 |
-
✅ Database - Pre-computed index ready
|
| 237 |
-
✅ Server startup - One command: python3 run.py
|
| 238 |
-
```
|
| 239 |
-
|
| 240 |
-
### Docker Containerization
|
| 241 |
-
```
|
| 242 |
-
✅ Dockerfile - Production-ready Dockerfile
|
| 243 |
-
✅ Image build - Tested & working
|
| 244 |
-
✅ Port exposure - 8000 configured
|
| 245 |
-
✅ Environment vars - Passthrough configured
|
| 246 |
-
```
|
| 247 |
-
|
| 248 |
-
### Cloud Deployment (Runpod)
|
| 249 |
-
```
|
| 250 |
-
✅ Deployment guide - Step-by-step instructions
|
| 251 |
-
✅ Registry integration - Docker Hub ready
|
| 252 |
-
✅ Template creation - Documented procedure
|
| 253 |
-
✅ Monitoring setup - Logging configured
|
| 254 |
-
```
|
| 255 |
-
|
| 256 |
-
---
|
| 257 |
-
|
| 258 |
-
## 📋 Ready-to-Use Guides
|
| 259 |
-
|
| 260 |
-
### For First-Time Users (START HERE)
|
| 261 |
-
1. **README_TESTING_GUIDE.md** (5 min read)
|
| 262 |
-
- Shows what to do based on your role
|
| 263 |
-
- Links to relevant guides
|
| 264 |
-
- Quick 3-step verification
|
| 265 |
-
|
| 266 |
-
### For Immediate Setup (10-15 min)
|
| 267 |
-
2. **QUICK_START.md** (follow step-by-step)
|
| 268 |
-
- Python environment setup
|
| 269 |
-
- Dependency installation
|
| 270 |
-
- Server startup
|
| 271 |
-
- Basic testing
|
| 272 |
-
|
| 273 |
-
### For Comprehensive Testing (30-45 min)
|
| 274 |
-
3. **TESTING_CHECKLIST.md** (15 tests)
|
| 275 |
-
- All endpoint validation
|
| 276 |
-
- Performance benchmarks
|
| 277 |
-
- Error handling tests
|
| 278 |
-
- Results sign-off
|
| 279 |
-
|
| 280 |
-
### For Cloud Deployment (2 hours)
|
| 281 |
-
4. **DEPLOYMENT_GUIDE.md** (step-by-step)
|
| 282 |
-
- Docker image creation
|
| 283 |
-
- Registry setup
|
| 284 |
-
- Runpod template
|
| 285 |
-
- Cloud testing
|
| 286 |
-
|
| 287 |
-
---
|
| 288 |
-
|
| 289 |
-
## ✨ Key Achievements
|
| 290 |
-
|
| 291 |
-
### Code Quality
|
| 292 |
-
- ✅ All imports validated (no missing packages)
|
| 293 |
-
- ✅ No syntax errors (validated with Pylance)
|
| 294 |
-
- ✅ Type hints throughout codebase
|
| 295 |
-
- ✅ Comprehensive docstrings
|
| 296 |
-
- ✅ Proper error handling with try/except
|
| 297 |
-
|
| 298 |
-
### Functionality
|
| 299 |
-
- ✅ Query intent detection works perfectly
|
| 300 |
-
- ✅ Count accuracy verified (1168 thanks, 352 complaints)
|
| 301 |
-
- ✅ Multi-language support confirmed (Hebrew + English)
|
| 302 |
-
- ✅ All 5 API endpoints responding
|
| 303 |
-
- ✅ FAISS index optimized and validated
|
| 304 |
-
|
| 305 |
-
### Testing & Validation
|
| 306 |
-
- ✅ 7/7 validation checks passing
|
| 307 |
-
- ✅ All endpoints tested individually
|
| 308 |
-
- ✅ Performance benchmarks acceptable
|
| 309 |
-
- ✅ Error scenarios handled
|
| 310 |
-
- ✅ End-to-end testing complete
|
| 311 |
-
|
| 312 |
-
### Documentation
|
| 313 |
-
- ✅ 6 comprehensive guides created
|
| 314 |
-
- ✅ 1600+ lines of user documentation
|
| 315 |
-
- ✅ Clear navigation and cross-references
|
| 316 |
-
- ✅ Step-by-step instructions for all tasks
|
| 317 |
-
- ✅ Troubleshooting sections included
|
| 318 |
-
|
| 319 |
-
### Deployment
|
| 320 |
-
- ✅ Docker image production-ready
|
| 321 |
-
- ✅ Runpod deployment guide complete
|
| 322 |
-
- ✅ Local and cloud paths preserved
|
| 323 |
-
- ✅ No data or code conflicts
|
| 324 |
-
- ✅ Ready for immediate deployment
|
| 325 |
-
|
| 326 |
-
---
|
| 327 |
-
|
| 328 |
-
## 🎯 What's Working Perfectly
|
| 329 |
-
|
| 330 |
-
### The RAG Pipeline
|
| 331 |
-
```
|
| 332 |
-
User Query
|
| 333 |
-
↓
|
| 334 |
-
Intent Detection (what type of question?)
|
| 335 |
-
↓
|
| 336 |
-
Count Query? → Count from CSV (1168 thanks, 352 complaints)
|
| 337 |
-
Semantic Query? → Embed + FAISS search + LLM summary
|
| 338 |
-
↓
|
| 339 |
-
Response in same language as query
|
| 340 |
-
↓
|
| 341 |
-
JSON API response with results
|
| 342 |
-
```
|
| 343 |
-
|
| 344 |
-
### Count Accuracy
|
| 345 |
-
```
|
| 346 |
-
Query: "כמה משתמשים כתבו תודה"
|
| 347 |
-
Expected: ~1168 records with thank-you keywords
|
| 348 |
-
Actual: 1168 ✅
|
| 349 |
-
Accuracy: 100% ✅
|
| 350 |
-
```
|
| 351 |
-
|
| 352 |
-
### Multi-Language Support
|
| 353 |
-
```
|
| 354 |
-
Hebrew Query → Hebrew Response ✅
|
| 355 |
-
English Query → English Response ✅
|
| 356 |
-
Auto-Detection → Working ✅
|
| 357 |
-
```
|
| 358 |
-
|
| 359 |
-
### API Quality
|
| 360 |
-
```
|
| 361 |
-
All 5 endpoints responding ✅
|
| 362 |
-
JSON serialization clean ✅
|
| 363 |
-
Error messages clear ✅
|
| 364 |
-
Response times acceptable ✅
|
| 365 |
-
```
|
| 366 |
-
|
| 367 |
-
---
|
| 368 |
-
|
| 369 |
-
## ⚠️ Known Limitations (Minor)
|
| 370 |
-
|
| 371 |
-
| Limitation | Impact | Workaround |
|
| 372 |
-
|-----------|--------|-----------|
|
| 373 |
-
| First request slow (model download) | 10-30s | Subsequent requests cached |
|
| 374 |
-
| Sentiment model ~500MB | Storage | Pre-download in Dockerfile |
|
| 375 |
-
| Hebrew variants not captured | Counts may miss variations | Extend keywords in analysis.py |
|
| 376 |
-
| No persistent audit logs | Can't review old requests | Add file logging if needed |
|
| 377 |
-
|
| 378 |
-
**Impact Level:** LOW - All limitations are manageable and documented
|
| 379 |
-
|
| 380 |
-
---
|
| 381 |
-
|
| 382 |
-
## 🚀 Next Actions for You
|
| 383 |
-
|
| 384 |
-
### Immediate (Today)
|
| 385 |
-
1. Read: **README_TESTING_GUIDE.md** (5 min)
|
| 386 |
-
2. Run: `python3 scripts/validate_local.py` (2 min)
|
| 387 |
-
3. Start: `python3 run.py` (1 min)
|
| 388 |
-
4. Test: Open http://localhost:8000/docs (2 min)
|
| 389 |
-
|
| 390 |
-
### Short-term (This week)
|
| 391 |
-
1. Follow: **TESTING_CHECKLIST.md** (45 min)
|
| 392 |
-
2. Verify: All 15 tests passing
|
| 393 |
-
3. Try: Different query types and languages
|
| 394 |
-
|
| 395 |
-
### Medium-term (When ready)
|
| 396 |
-
1. Follow: **DEPLOYMENT_GUIDE.md** (2 hours)
|
| 397 |
-
2. Build: Docker image locally
|
| 398 |
-
3. Deploy: To Runpod
|
| 399 |
-
4. Test: Cloud endpoint
|
| 400 |
-
|
| 401 |
-
---
|
| 402 |
-
|
| 403 |
-
## 📞 Support & Troubleshooting
|
| 404 |
-
|
| 405 |
-
**Quick help:**
|
| 406 |
-
- Most questions answered in: **QUICK_START.md**
|
| 407 |
-
- Testing issues: **TESTING_CHECKLIST.md**
|
| 408 |
-
- Deployment issues: **DEPLOYMENT_GUIDE.md**
|
| 409 |
-
- Architecture questions: **SESSION_SUMMARY.md**
|
| 410 |
-
|
| 411 |
-
**Common issues:**
|
| 412 |
-
1. Environment not activated → See QUICK_START.md
|
| 413 |
-
2. Index not found → Run: `curl -X POST http://localhost:8000/ingest`
|
| 414 |
-
3. Port in use → Use different port or kill process
|
| 415 |
-
4. Slow first request → Normal, model downloads first time
|
| 416 |
-
|
| 417 |
-
---
|
| 418 |
-
|
| 419 |
-
## 📊 Project Metrics
|
| 420 |
-
|
| 421 |
-
```
|
| 422 |
-
Code Statistics
|
| 423 |
-
├─ Python files: 11 (app/ + scripts/)
|
| 424 |
-
├─ Lines of code: ~2000 (excluding venv)
|
| 425 |
-
├─ Documentation files: 6
|
| 426 |
-
├─ Documentation lines: 1600+
|
| 427 |
-
└─ Test coverage: 100%
|
| 428 |
-
|
| 429 |
-
Performance
|
| 430 |
-
├─ Health check: <10ms
|
| 431 |
-
├─ Query endpoint: 1-3 seconds
|
| 432 |
-
├─ Sentiment analysis: 5-15 seconds
|
| 433 |
-
└─ Index build: 30-60 seconds
|
| 434 |
-
|
| 435 |
-
Data
|
| 436 |
-
├─ Feedback records: 9930
|
| 437 |
-
├─ Unique services: 100+
|
| 438 |
-
├─ FAISS index size: 14.5 MB
|
| 439 |
-
└─ Metadata size: 450 KB
|
| 440 |
-
|
| 441 |
-
Testing
|
| 442 |
-
├─ Validation checks: 7/7 PASS
|
| 443 |
-
├─ API endpoints: 5/5 PASS
|
| 444 |
-
├─ End-to-end tests: 15/15 PASS
|
| 445 |
-
└─ Overall: 100% ✅
|
| 446 |
-
```
|
| 447 |
-
|
| 448 |
-
---
|
| 449 |
-
|
| 450 |
-
## ✅ Final Checklist
|
| 451 |
-
|
| 452 |
-
- [x] All code complete and tested
|
| 453 |
-
- [x] All validation checks passing
|
| 454 |
-
- [x] All documentation written
|
| 455 |
-
- [x] Local setup instructions clear
|
| 456 |
-
- [x] Cloud deployment guide ready
|
| 457 |
-
- [x] Git repository clean
|
| 458 |
-
- [x] Dependencies frozen in requirements.txt
|
| 459 |
-
- [x] Docker image production-ready
|
| 460 |
-
- [x] Runpod deployment documented
|
| 461 |
-
- [x] Troubleshooting guide included
|
| 462 |
-
- [x] Performance acceptable
|
| 463 |
-
- [x] Security considerations noted
|
| 464 |
-
- [x] Scalability path clear
|
| 465 |
-
- [x] Backup strategy documented
|
| 466 |
-
- [x] Monitoring setup documented
|
| 467 |
-
|
| 468 |
-
**Status:** ✅ **ALL ITEMS COMPLETE**
|
| 469 |
-
|
| 470 |
-
---
|
| 471 |
-
|
| 472 |
-
## 🎓 Summary
|
| 473 |
-
|
| 474 |
-
You have a **production-ready Feedback Analysis RAG Agent** that:
|
| 475 |
-
|
| 476 |
-
✅ **Works locally** - Full development environment set up
|
| 477 |
-
✅ **Works in the cloud** - Runpod deployment ready
|
| 478 |
-
✅ **Answers diverse questions** - Intent detection + RAG pipeline
|
| 479 |
-
✅ **Supports multiple languages** - Hebrew and English
|
| 480 |
-
✅ **Is well-documented** - 1600+ lines of guides
|
| 481 |
-
✅ **Is thoroughly tested** - 15-point validation suite
|
| 482 |
-
✅ **Is maintainable** - Clean code with docstrings
|
| 483 |
-
|
| 484 |
-
---
|
| 485 |
-
|
| 486 |
-
## 🎉 Thank You!
|
| 487 |
-
|
| 488 |
-
The system is **ready for your testing**. Choose your path:
|
| 489 |
-
|
| 490 |
-
- **Quick verification:** Start with README_TESTING_GUIDE.md (5 min)
|
| 491 |
-
- **Full testing:** Follow TESTING_CHECKLIST.md (45 min)
|
| 492 |
-
- **Deployment:** Use DEPLOYMENT_GUIDE.md (2 hours)
|
| 493 |
-
|
| 494 |
-
**Status:** ✨ **PRODUCTION READY** ✨
|
| 495 |
-
|
| 496 |
-
---
|
| 497 |
-
|
| 498 |
-
*Report Generated: November 12, 2025*
|
| 499 |
-
*Project Status: 100% Complete*
|
| 500 |
-
*Ready for: Immediate Production Use*
|
| 501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TESTING_CHECKLIST.md
DELETED
|
@@ -1,472 +0,0 @@
|
|
| 1 |
-
# Testing Checklist - Comprehensive Validation
|
| 2 |
-
|
| 3 |
-
This document walks you through testing all components of the Feedback Analysis RAG Agent locally before deployment.
|
| 4 |
-
|
| 5 |
-
## ✅ Pre-Flight Checks (5 mins)
|
| 6 |
-
|
| 7 |
-
### 1. Environment Setup
|
| 8 |
-
- [ ] Python 3.11+ installed: `python3 --version`
|
| 9 |
-
- [ ] Virtual environment activated: `source .venv/bin/activate`
|
| 10 |
-
- [ ] All dependencies installed: `pip list | grep -E "fastapi|pandas|faiss|sentence-transformers"`
|
| 11 |
-
- [ ] CSV file exists: `ls -lh Feedback.csv` (should be ~2-3 MB, 9930 rows)
|
| 12 |
-
- [ ] FAISS index exists: `ls -lh .vector_index/` (should have `faiss.index` and `meta.parquet`)
|
| 13 |
-
|
| 14 |
-
### 2. Run Validation Harness
|
| 15 |
-
```bash
|
| 16 |
-
python3 scripts/validate_local.py
|
| 17 |
-
```
|
| 18 |
-
|
| 19 |
-
**Expected Output:**
|
| 20 |
-
```
|
| 21 |
-
[PASS] Dependencies
|
| 22 |
-
[PASS] CSV file
|
| 23 |
-
[PASS] FAISS Index
|
| 24 |
-
[PASS] App imports
|
| 25 |
-
[PASS] Analysis logic
|
| 26 |
-
[PASS] RAGService
|
| 27 |
-
[PASS] API endpoints
|
| 28 |
-
|
| 29 |
-
------------------------------------------------------------
|
| 30 |
-
All 7 checks PASSED! Ready for local testing.
|
| 31 |
-
```
|
| 32 |
-
|
| 33 |
-
- [ ] All 7 checks pass
|
| 34 |
-
- [ ] No error messages
|
| 35 |
-
- [ ] Takes 2-3 minutes total
|
| 36 |
-
|
| 37 |
-
**If any check fails:** See the error message and check QUICK_START.md troubleshooting section.
|
| 38 |
-
|
| 39 |
-
---
|
| 40 |
-
|
| 41 |
-
## 🚀 Server Startup (2 mins)
|
| 42 |
-
|
| 43 |
-
### 3. Start the API Server
|
| 44 |
-
```bash
|
| 45 |
-
python3 run.py
|
| 46 |
-
```
|
| 47 |
-
|
| 48 |
-
**Expected Output:**
|
| 49 |
-
```
|
| 50 |
-
INFO: Uvicorn running on http://0.0.0.0:8000
|
| 51 |
-
INFO: Application startup complete
|
| 52 |
-
```
|
| 53 |
-
|
| 54 |
-
- [ ] Server starts without errors
|
| 55 |
-
- [ ] Listens on `http://0.0.0.0:8000`
|
| 56 |
-
- [ ] No red error messages
|
| 57 |
-
- [ ] Can see "Application startup complete"
|
| 58 |
-
|
| 59 |
-
**Keep this terminal open.** Open a NEW terminal for tests.
|
| 60 |
-
|
| 61 |
-
---
|
| 62 |
-
|
| 63 |
-
## 🧪 Endpoint Testing (10-15 mins)
|
| 64 |
-
|
| 65 |
-
### 4. Test `/health` Endpoint
|
| 66 |
-
|
| 67 |
-
**Via curl:**
|
| 68 |
-
```bash
|
| 69 |
-
curl -X POST http://localhost:8000/health
|
| 70 |
-
```
|
| 71 |
-
|
| 72 |
-
**Expected Response:**
|
| 73 |
-
```json
|
| 74 |
-
{"status":"ok"}
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
-
- [ ] Status code: 200
|
| 78 |
-
- [ ] Response: `{"status":"ok"}`
|
| 79 |
-
|
| 80 |
-
**Via Swagger UI:**
|
| 81 |
-
- [ ] Open http://localhost:8000/docs
|
| 82 |
-
- [ ] Find `/health` endpoint
|
| 83 |
-
- [ ] Click "Try it out" → "Execute"
|
| 84 |
-
- [ ] Response 200 with status
|
| 85 |
-
|
| 86 |
-
---
|
| 87 |
-
|
| 88 |
-
### 5. Test `/query` Endpoint - Count Thank-yous
|
| 89 |
-
|
| 90 |
-
**Via curl:**
|
| 91 |
-
```bash
|
| 92 |
-
curl -X POST http://localhost:8000/query \
|
| 93 |
-
-H "Content-Type: application/json" \
|
| 94 |
-
-d '{"query":"כמה משתמשים כתבו תודה","top_k":2}'
|
| 95 |
-
```
|
| 96 |
-
|
| 97 |
-
**Expected Response:**
|
| 98 |
-
```json
|
| 99 |
-
{
|
| 100 |
-
"query": "כמה משתמשים כתבו תודה",
|
| 101 |
-
"summary": "1168 משובים מכילים ביטויי תודה.",
|
| 102 |
-
"results": [
|
| 103 |
-
{
|
| 104 |
-
"score": 0.808,
|
| 105 |
-
"service": "CannabisUpdate@health.gov.il",
|
| 106 |
-
"level": "5",
|
| 107 |
-
"text": "נח וידידותי למשתמש - תודה"
|
| 108 |
-
},
|
| 109 |
-
...
|
| 110 |
-
]
|
| 111 |
-
}
|
| 112 |
-
```
|
| 113 |
-
|
| 114 |
-
**Check these:**
|
| 115 |
-
- [ ] Status code: 200
|
| 116 |
-
- [ ] Summary contains count: "1168"
|
| 117 |
-
- [ ] Summary in Hebrew (עברית)
|
| 118 |
-
- [ ] Has 2 results (top_k=2)
|
| 119 |
-
- [ ] Each result has: score, service, level, text
|
| 120 |
-
- [ ] Scores are between 0 and 1
|
| 121 |
-
|
| 122 |
-
---
|
| 123 |
-
|
| 124 |
-
### 6. Test `/query` Endpoint - Count Complaints
|
| 125 |
-
|
| 126 |
-
**Via curl:**
|
| 127 |
-
```bash
|
| 128 |
-
curl -X POST http://localhost:8000/query \
|
| 129 |
-
-H "Content-Type: application/json" \
|
| 130 |
-
-d '{"query":"כמה משתמשים מתלוננים על בעיות במערכת","top_k":3}'
|
| 131 |
-
```
|
| 132 |
-
|
| 133 |
-
**Expected Response:**
|
| 134 |
-
- [ ] Status code: 200
|
| 135 |
-
- [ ] Summary contains complaint count (~352)
|
| 136 |
-
- [ ] Has 3 results
|
| 137 |
-
- [ ] Results contain complaint-related feedback
|
| 138 |
-
|
| 139 |
-
---
|
| 140 |
-
|
| 141 |
-
### 7. Test `/query` Endpoint - Keyword Search
|
| 142 |
-
|
| 143 |
-
**Via curl:**
|
| 144 |
-
```bash
|
| 145 |
-
curl -X POST http://localhost:8000/query \
|
| 146 |
-
-H "Content-Type: application/json" \
|
| 147 |
-
-d '{"query":"find feedback about cannabis","top_k":5}'
|
| 148 |
-
```
|
| 149 |
-
|
| 150 |
-
**Expected Response:**
|
| 151 |
-
- [ ] Status code: 200
|
| 152 |
-
- [ ] Summary text (in English)
|
| 153 |
-
- [ ] Has 5 results related to cannabis/search term
|
| 154 |
-
- [ ] Each result has valid scores
|
| 155 |
-
|
| 156 |
-
---
|
| 157 |
-
|
| 158 |
-
### 8. Test `/query` Endpoint - Hebrew Question
|
| 159 |
-
|
| 160 |
-
**Via curl:**
|
| 161 |
-
```bash
|
| 162 |
-
curl -X POST http://localhost:8000/query \
|
| 163 |
-
-H "Content-Type: application/json" \
|
| 164 |
-
-d '{"query":"מה הדעות הכלליות על השירות","top_k":5}'
|
| 165 |
-
```
|
| 166 |
-
|
| 167 |
-
**Expected Response:**
|
| 168 |
-
- [ ] Status code: 200
|
| 169 |
-
- [ ] Summary in Hebrew (response language matches query language)
|
| 170 |
-
- [ ] Has 5 results with diverse feedback
|
| 171 |
-
- [ ] Results ranked by relevance (scores in descending order)
|
| 172 |
-
|
| 173 |
-
---
|
| 174 |
-
|
| 175 |
-
### 9. Test `/topics` Endpoint
|
| 176 |
-
|
| 177 |
-
**Via curl:**
|
| 178 |
-
```bash
|
| 179 |
-
curl -X POST http://localhost:8000/topics \
|
| 180 |
-
-H "Content-Type: application/json" \
|
| 181 |
-
-d '{"num_topics":5}'
|
| 182 |
-
```
|
| 183 |
-
|
| 184 |
-
**Expected Response:**
|
| 185 |
-
```json
|
| 186 |
-
{
|
| 187 |
-
"topics": {
|
| 188 |
-
"0": ["word1", "word2", "word3", ...],
|
| 189 |
-
"1": ["word4", "word5", "word6", ...],
|
| 190 |
-
...
|
| 191 |
-
},
|
| 192 |
-
"total_feedback": 9930
|
| 193 |
-
}
|
| 194 |
-
```
|
| 195 |
-
|
| 196 |
-
- [ ] Status code: 200
|
| 197 |
-
- [ ] Has 5 topics (0-4)
|
| 198 |
-
- [ ] Each topic has top words
|
| 199 |
-
- [ ] Total feedback count is 9930
|
| 200 |
-
- [ ] Words are relevant to feedback content
|
| 201 |
-
|
| 202 |
-
---
|
| 203 |
-
|
| 204 |
-
### 10. Test `/sentiment` Endpoint
|
| 205 |
-
|
| 206 |
-
**Via curl:**
|
| 207 |
-
```bash
|
| 208 |
-
curl -X POST http://localhost:8000/sentiment \
|
| 209 |
-
-H "Content-Type: application/json" \
|
| 210 |
-
-d '{"limit":50}'
|
| 211 |
-
```
|
| 212 |
-
|
| 213 |
-
**Expected Response:**
|
| 214 |
-
```json
|
| 215 |
-
{
|
| 216 |
-
"count": 50,
|
| 217 |
-
"results": [
|
| 218 |
-
{
|
| 219 |
-
"text": "feedback text here",
|
| 220 |
-
"label": "POSITIVE",
|
| 221 |
-
"score": 0.95
|
| 222 |
-
},
|
| 223 |
-
...
|
| 224 |
-
]
|
| 225 |
-
}
|
| 226 |
-
```
|
| 227 |
-
|
| 228 |
-
- [ ] Status code: 200
|
| 229 |
-
- [ ] Count is 50
|
| 230 |
-
- [ ] Has 50 results
|
| 231 |
-
- [ ] Each result has: text, label (POSITIVE/NEGATIVE/NEUTRAL), score (0-1)
|
| 232 |
-
- [ ] Labels are reasonable (positive feedback → POSITIVE, etc.)
|
| 233 |
-
|
| 234 |
-
---
|
| 235 |
-
|
| 236 |
-
### 11. Test `/ingest` Endpoint (Optional - Rebuilds Index)
|
| 237 |
-
|
| 238 |
-
**Via curl (takes 30-60 seconds):**
|
| 239 |
-
```bash
|
| 240 |
-
curl -X POST http://localhost:8000/ingest
|
| 241 |
-
```
|
| 242 |
-
|
| 243 |
-
**Expected Response:**
|
| 244 |
-
```json
|
| 245 |
-
{"status":"ok"}
|
| 246 |
-
```
|
| 247 |
-
|
| 248 |
-
- [ ] Status code: 200
|
| 249 |
-
- [ ] Response `{"status":"ok"}`
|
| 250 |
-
- [ ] Takes 30-60 seconds
|
| 251 |
-
- [ ] Creates/overwrites `.vector_index/` files
|
| 252 |
-
- [ ] After rebuild, query tests still work
|
| 253 |
-
|
| 254 |
-
**Note:** This is for rebuilding the index after updating CSV. Only run if needed.
|
| 255 |
-
|
| 256 |
-
---
|
| 257 |
-
|
| 258 |
-
## 🌐 Browser Testing (5 mins)
|
| 259 |
-
|
| 260 |
-
### 12. Test Swagger UI
|
| 261 |
-
|
| 262 |
-
**Open:** http://localhost:8000/docs
|
| 263 |
-
|
| 264 |
-
- [ ] Page loads without errors
|
| 265 |
-
- [ ] Swagger UI shows all 5 endpoints: health, query, topics, sentiment, ingest
|
| 266 |
-
- [ ] Can expand each endpoint and see schema
|
| 267 |
-
- [ ] Can enter test values and execute
|
| 268 |
-
- [ ] Responses display correctly
|
| 269 |
-
|
| 270 |
-
### 13. Test OpenAPI/ReDoc
|
| 271 |
-
|
| 272 |
-
**Open:** http://localhost:8000/redoc
|
| 273 |
-
|
| 274 |
-
- [ ] Page loads
|
| 275 |
-
- [ ] All endpoints documented
|
| 276 |
-
- [ ] Schema is clear
|
| 277 |
-
|
| 278 |
-
---
|
| 279 |
-
|
| 280 |
-
## 🔍 Detailed Testing (Optional - 5 mins)
|
| 281 |
-
|
| 282 |
-
### 14. Test Different Query Types in `/query`
|
| 283 |
-
|
| 284 |
-
Test each query type to verify intent detection is working:
|
| 285 |
-
|
| 286 |
-
**A) Count Thanks:**
|
| 287 |
-
```bash
|
| 288 |
-
curl -X POST http://localhost:8000/query \
|
| 289 |
-
-H "Content-Type: application/json" \
|
| 290 |
-
-d '{"query":"thank you","top_k":2}'
|
| 291 |
-
```
|
| 292 |
-
- [ ] Detects as counting query
|
| 293 |
-
- [ ] Returns count in summary
|
| 294 |
-
|
| 295 |
-
**B) Count Problems:**
|
| 296 |
-
```bash
|
| 297 |
-
curl -X POST http://localhost:8000/query \
|
| 298 |
-
-H "Content-Type: application/json" \
|
| 299 |
-
-d '{"query":"how many complaints","top_k":2}'
|
| 300 |
-
```
|
| 301 |
-
- [ ] Detects as complaint counting
|
| 302 |
-
- [ ] Returns count
|
| 303 |
-
|
| 304 |
-
**C) Keyword Search:**
|
| 305 |
-
```bash
|
| 306 |
-
curl -X POST http://localhost:8000/query \
|
| 307 |
-
-H "Content-Type: application/json" \
|
| 308 |
-
-d '{"query":"slow","top_k":5}'
|
| 309 |
-
```
|
| 310 |
-
- [ ] Free-form search
|
| 311 |
-
- [ ] Returns semantic matches
|
| 312 |
-
|
| 313 |
-
**D) Hebrew Counting:**
|
| 314 |
-
```bash
|
| 315 |
-
curl -X POST http://localhost:8000/query \
|
| 316 |
-
-H "Content-Type: application/json" \
|
| 317 |
-
-d '{"query":"כמה אנשים כתבו שהדברים עובדים","top_k":3}'
|
| 318 |
-
```
|
| 319 |
-
- [ ] Recognizes Hebrew counting query
|
| 320 |
-
- [ ] Returns appropriate count
|
| 321 |
-
|
| 322 |
-
---
|
| 323 |
-
|
| 324 |
-
### 15. Response Format Validation
|
| 325 |
-
|
| 326 |
-
For each endpoint, verify:
|
| 327 |
-
- [ ] Valid JSON format (no parsing errors)
|
| 328 |
-
- [ ] All required fields present
|
| 329 |
-
- [ ] Numeric fields are numbers (not strings)
|
| 330 |
-
- [ ] Text fields are properly encoded (Hebrew text readable)
|
| 331 |
-
- [ ] Timestamps accurate (if present)
|
| 332 |
-
- [ ] No null/undefined values where data should exist
|
| 333 |
-
|
| 334 |
-
---
|
| 335 |
-
|
| 336 |
-
## 📊 Performance Testing (10 mins)
|
| 337 |
-
|
| 338 |
-
### 16. Measure Response Times
|
| 339 |
-
|
| 340 |
-
**Health endpoint (should be <10ms):**
|
| 341 |
-
```bash
|
| 342 |
-
time curl -X POST http://localhost:8000/health
|
| 343 |
-
```
|
| 344 |
-
|
| 345 |
-
- [ ] Takes <10ms
|
| 346 |
-
- [ ] Consistent across calls
|
| 347 |
-
|
| 348 |
-
**Query endpoint (should be <3 seconds):**
|
| 349 |
-
```bash
|
| 350 |
-
time curl -X POST http://localhost:8000/query \
|
| 351 |
-
-H "Content-Type: application/json" \
|
| 352 |
-
-d '{"query":"test","top_k":5}'
|
| 353 |
-
```
|
| 354 |
-
|
| 355 |
-
- [ ] Takes 1-3 seconds (first call may be slower)
|
| 356 |
-
- [ ] Subsequent calls faster
|
| 357 |
-
- [ ] Consistent performance
|
| 358 |
-
|
| 359 |
-
**Sentiment endpoint (depends on limit):**
|
| 360 |
-
```bash
|
| 361 |
-
time curl -X POST http://localhost:8000/sentiment \
|
| 362 |
-
-H "Content-Type: application/json" \
|
| 363 |
-
-d '{"limit":100}'
|
| 364 |
-
```
|
| 365 |
-
|
| 366 |
-
- [ ] Takes 5-15 seconds for 100 records
|
| 367 |
-
- [ ] Scales reasonably with limit
|
| 368 |
-
|
| 369 |
-
---
|
| 370 |
-
|
| 371 |
-
## 🐛 Error Handling (5 mins)
|
| 372 |
-
|
| 373 |
-
### 17. Test Invalid Requests
|
| 374 |
-
|
| 375 |
-
**Missing required fields:**
|
| 376 |
-
```bash
|
| 377 |
-
curl -X POST http://localhost:8000/query \
|
| 378 |
-
-H "Content-Type: application/json" \
|
| 379 |
-
-d '{}'
|
| 380 |
-
```
|
| 381 |
-
|
| 382 |
-
- [ ] Status code: 422 (Unprocessable Entity)
|
| 383 |
-
- [ ] Error message explains what's missing
|
| 384 |
-
|
| 385 |
-
**Invalid JSON:**
|
| 386 |
-
```bash
|
| 387 |
-
curl -X POST http://localhost:8000/query \
|
| 388 |
-
-H "Content-Type: application/json" \
|
| 389 |
-
-d 'not json'
|
| 390 |
-
```
|
| 391 |
-
|
| 392 |
-
- [ ] Status code: 422 or 400
|
| 393 |
-
- [ ] Clear error response
|
| 394 |
-
|
| 395 |
-
**Invalid top_k:**
|
| 396 |
-
```bash
|
| 397 |
-
curl -X POST http://localhost:8000/query \
|
| 398 |
-
-H "Content-Type: application/json" \
|
| 399 |
-
-d '{"query":"test","top_k":"not a number"}'
|
| 400 |
-
```
|
| 401 |
-
|
| 402 |
-
- [ ] Status code: 422
|
| 403 |
-
- [ ] Error about type
|
| 404 |
-
|
| 405 |
-
---
|
| 406 |
-
|
| 407 |
-
## ✨ Final Sign-Off
|
| 408 |
-
|
| 409 |
-
All tests complete? Check off:
|
| 410 |
-
|
| 411 |
-
- [ ] All 11 main endpoint tests pass
|
| 412 |
-
- [ ] All response formats valid
|
| 413 |
-
- [ ] Performance acceptable
|
| 414 |
-
- [ ] Error handling works
|
| 415 |
-
- [ ] No error logs in server terminal
|
| 416 |
-
- [ ] Hebrew text renders correctly
|
| 417 |
-
- [ ] Counts are accurate (1168 thanks, 352 complaints, 9930 total)
|
| 418 |
-
|
| 419 |
-
---
|
| 420 |
-
|
| 421 |
-
## 📝 Test Results
|
| 422 |
-
|
| 423 |
-
Record your results:
|
| 424 |
-
|
| 425 |
-
**Date tested:** __________
|
| 426 |
-
**Tester name:** __________
|
| 427 |
-
**Python version:** __________
|
| 428 |
-
**Environment:** ☐ Mac ☐ Linux ☐ Windows
|
| 429 |
-
|
| 430 |
-
**Overall status:**
|
| 431 |
-
- [ ] ✅ ALL TESTS PASSED - Ready for deployment
|
| 432 |
-
- [ ] ⚠️ Some issues - See notes below
|
| 433 |
-
|
| 434 |
-
**Notes:**
|
| 435 |
-
```
|
| 436 |
-
(Add any issues, observations, or special notes here)
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
```
|
| 442 |
-
|
| 443 |
-
---
|
| 444 |
-
|
| 445 |
-
## 🚀 Next Steps
|
| 446 |
-
|
| 447 |
-
### If All Tests Pass ✅
|
| 448 |
-
1. Stop the local server: `CTRL+C`
|
| 449 |
-
2. Commit changes: `git add -A && git commit -m "test: all validation passed"`
|
| 450 |
-
3. Push to GitHub: `git push origin main`
|
| 451 |
-
4. Follow **README.md** to deploy to Runpod
|
| 452 |
-
5. Test deployed version on cloud
|
| 453 |
-
|
| 454 |
-
### If Some Tests Fail ⚠️
|
| 455 |
-
1. Check the error message
|
| 456 |
-
2. See QUICK_START.md **Troubleshooting** section
|
| 457 |
-
3. Fix the issue
|
| 458 |
-
4. Re-run `python3 scripts/validate_local.py`
|
| 459 |
-
5. Retry failing endpoint test
|
| 460 |
-
|
| 461 |
-
---
|
| 462 |
-
|
| 463 |
-
## 📚 Reference
|
| 464 |
-
|
| 465 |
-
- **Full documentation:** See README.md
|
| 466 |
-
- **Quick start guide:** See QUICK_START.md
|
| 467 |
-
- **Configuration:** See app/config.py
|
| 468 |
-
- **API definitions:** See app/api.py
|
| 469 |
-
- **Deployment guide:** See CONTRIBUTING.md
|
| 470 |
-
|
| 471 |
-
Good luck! 🎯
|
| 472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/__init__.py
CHANGED
|
@@ -1,13 +1,18 @@
|
|
| 1 |
-
"""
|
|
|
|
| 2 |
|
| 3 |
-
This package contains the core modules
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
Import example:
|
| 7 |
-
from app.
|
|
|
|
| 8 |
|
| 9 |
Keep this file minimal — module-level documentation only.
|
| 10 |
"""
|
| 11 |
|
| 12 |
-
# Makes `app` a package so imports
|
| 13 |
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application package for the Feedback Analysis Agent.
|
| 3 |
|
| 4 |
+
This package contains the core modules for SQL-based feedback analysis:
|
| 5 |
+
- sql_service: Main analysis service using SQL queries
|
| 6 |
+
- api: FastAPI endpoints
|
| 7 |
+
- config: Configuration settings
|
| 8 |
+
- data_loader: CSV data loading
|
| 9 |
|
| 10 |
Import example:
|
| 11 |
+
from app.sql_service import SQLFeedbackService
|
| 12 |
+
from app.api import app
|
| 13 |
|
| 14 |
Keep this file minimal — module-level documentation only.
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
# Makes `app` a package so imports work correctly
|
| 18 |
|
app/analysis.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""Utilities to detect simple question intents and resolve counts over the feedback corpus.
|
| 4 |
-
|
| 5 |
-
This module implements lightweight, rule-based detection for queries such as:
|
| 6 |
-
- "כמה משתמשים כתבו תודה" -> count thank-you messages
|
| 7 |
-
- "כמה מתלוננים על אלמנטים שלא עובדים" -> count complaint-like messages
|
| 8 |
-
|
| 9 |
-
The approach is intentionally simple (keyword matching) to avoid heavy dependencies and
|
| 10 |
-
to provide fast, explainable counts. It returns structured dicts that higher-level code
|
| 11 |
-
can convert to human-readable summaries or JSON responses.
|
| 12 |
-
"""
|
| 13 |
-
|
| 14 |
-
import re
|
| 15 |
-
from typing import Iterable, List, Optional, Tuple
|
| 16 |
-
|
| 17 |
-
import pandas as pd
|
| 18 |
-
|
| 19 |
-
from .preprocess import preprocess_text
|
| 20 |
-
from .config import settings
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
COMPLAINT_KEYWORDS = [
|
| 24 |
-
"לא עובד",
|
| 25 |
-
"לא עובדים",
|
| 26 |
-
"שגיאה",
|
| 27 |
-
"תקלה",
|
| 28 |
-
"לא פועל",
|
| 29 |
-
"נכשל",
|
| 30 |
-
"לא מצליח",
|
| 31 |
-
"לא ניתן",
|
| 32 |
-
"המערכת לא",
|
| 33 |
-
"לא תקין",
|
| 34 |
-
"לא עובדים להם",
|
| 35 |
-
]
|
| 36 |
-
|
| 37 |
-
THANKS_KEYWORDS = ["תודה", "תודה רבה", "תודה!", "תודה רבה!", "תודה רבה מאוד"]
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
def _contains_any(text: str, keywords: Iterable[str]) -> bool:
|
| 41 |
-
t = preprocess_text(text).lower()
|
| 42 |
-
for kw in keywords:
|
| 43 |
-
if kw in t:
|
| 44 |
-
return True
|
| 45 |
-
return False
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def count_keyword_rows(df: pd.DataFrame, keywords: Iterable[str], text_column: str = "Text") -> int:
|
| 49 |
-
if df is None or df.empty:
|
| 50 |
-
return 0
|
| 51 |
-
kws = [str(k).lower() for k in keywords]
|
| 52 |
-
def row_match(s: str) -> bool:
|
| 53 |
-
s = preprocess_text(str(s)).lower()
|
| 54 |
-
return any(kw in s for kw in kws)
|
| 55 |
-
return int(df[text_column].astype(str).apply(row_match).sum())
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def detect_query_type(query: str) -> Tuple[str, Optional[str]]:
|
| 59 |
-
"""Return (type, target) where type is one of: 'count_thanks', 'count_complaint', 'count_keyword', 'freeform'.
|
| 60 |
-
|
| 61 |
-
target may contain a detected keyword or phrase when relevant.
|
| 62 |
-
"""
|
| 63 |
-
q = preprocess_text(query).lower()
|
| 64 |
-
# Simple Hebrew heuristics
|
| 65 |
-
if "תודה" in q or "מודה" in q:
|
| 66 |
-
return ("count_thanks", None)
|
| 67 |
-
if any(k in q for k in ["לא עובד", "לא עובדים", "תקלה", "שגיאה", "לא פועל", "נכשל"]):
|
| 68 |
-
return ("count_complaint", None)
|
| 69 |
-
# Generic "כמה" count with a keyword after 'על' or 'ל' or 'בש"'
|
| 70 |
-
if q.strip().startswith("כמה") or "כמה משתמשים" in q:
|
| 71 |
-
# try extract noun after 'על' or 'ש' or 'עם'
|
| 72 |
-
m = re.search(r"על\s+([^\n\?]+)", q)
|
| 73 |
-
if m:
|
| 74 |
-
return ("count_keyword", m.group(1).strip())
|
| 75 |
-
m2 = re.search(r"כמה\s+[^\s]+\s+([^\n\?]+)", q)
|
| 76 |
-
if m2:
|
| 77 |
-
return ("count_keyword", m2.group(1).strip())
|
| 78 |
-
return ("count_keyword", None)
|
| 79 |
-
return ("freeform", None)
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def resolve_count_from_type(df: pd.DataFrame, qtype: str, target: Optional[str], text_column: str = "Text"):
|
| 83 |
-
if qtype == "count_thanks":
|
| 84 |
-
cnt = count_keyword_rows(df, THANKS_KEYWORDS, text_column=text_column)
|
| 85 |
-
return {"type": "count", "label": "thanks", "count": cnt}
|
| 86 |
-
if qtype == "count_complaint":
|
| 87 |
-
cnt = count_keyword_rows(df, COMPLAINT_KEYWORDS, text_column=text_column)
|
| 88 |
-
return {"type": "count", "label": "complaint_not_working", "count": cnt}
|
| 89 |
-
if qtype == "count_keyword":
|
| 90 |
-
if target:
|
| 91 |
-
# count rows that contain the exact target phrase
|
| 92 |
-
pattern = re.escape(target.lower())
|
| 93 |
-
cnt = int(df[text_column].astype(str).str.lower().str.contains(pattern, regex=True).sum())
|
| 94 |
-
return {"type": "count", "label": f"keyword:{target}", "count": int(cnt)}
|
| 95 |
-
# fallback: return total rows
|
| 96 |
-
return {"type": "count", "label": "all", "count": int(len(df))}
|
| 97 |
-
return {"type": "unknown"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api.py
CHANGED
|
@@ -1,35 +1,34 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from typing import List, Optional, Dict, Any
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
|
| 6 |
-
import pandas as pd
|
| 7 |
-
from fastapi import FastAPI, Query, Request
|
| 8 |
from fastapi.responses import ORJSONResponse, HTMLResponse
|
| 9 |
from fastapi.staticfiles import StaticFiles
|
| 10 |
-
import json
|
| 11 |
-
from pathlib import Path
|
| 12 |
from pydantic import BaseModel, Field
|
| 13 |
|
| 14 |
from .config import settings
|
| 15 |
from .data_loader import load_feedback
|
| 16 |
-
from .embedding import EmbeddingModel
|
| 17 |
-
from .rag_service import RAGService
|
| 18 |
from .sql_service import SQLFeedbackService
|
| 19 |
-
from .sentiment import analyze_sentiments
|
| 20 |
-
from .topics import kmeans_topics
|
| 21 |
-
from .vector_store import FaissVectorStore
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
app = FastAPI(title="Feedback Analysis RAG Agent", version="1.0.0", default_response_class=ORJSONResponse)
|
| 25 |
-
svc = RAGService()
|
| 26 |
# Initialize SQL service lazily to avoid errors on startup if data is missing
|
| 27 |
-
|
|
|
|
| 28 |
try:
|
| 29 |
-
sql_svc = SQLFeedbackService()
|
|
|
|
| 30 |
except Exception as e:
|
| 31 |
print(f"Warning: Could not initialize SQL service: {e}", flush=True)
|
| 32 |
-
embedder = svc.embedder
|
| 33 |
|
| 34 |
# Simple in-memory history persisted best-effort to `.query_history.json`
|
| 35 |
history_file = Path(".query_history.json")
|
|
@@ -43,30 +42,59 @@ if history_file.exists():
|
|
| 43 |
|
| 44 |
|
| 45 |
def save_history() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
try:
|
| 47 |
with history_file.open("w", encoding="utf-8") as f:
|
| 48 |
json.dump(history, f, ensure_ascii=False, indent=2)
|
| 49 |
except Exception:
|
| 50 |
-
#
|
| 51 |
pass
|
| 52 |
|
| 53 |
|
| 54 |
class QueryRequest(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
query: str = Field(..., example="תסווג את התלונות 5 סוגים")
|
| 56 |
top_k: int = Field(5, example=5)
|
| 57 |
|
| 58 |
|
| 59 |
class QueryResponse(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
query: str
|
| 61 |
summary: Optional[str]
|
| 62 |
-
results: Optional[List[Dict[str, Any]]] = None
|
| 63 |
|
| 64 |
|
| 65 |
class SQLQueryResponse(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
query: str
|
| 67 |
summary: str
|
| 68 |
sql_queries: List[str]
|
| 69 |
-
query_results: List[Dict[str, Any]]
|
| 70 |
visualizations: Optional[List[Dict[str, Any]]] = None
|
| 71 |
|
| 72 |
|
|
@@ -106,15 +134,47 @@ def query_sql(req: QueryRequest) -> SQLQueryResponse:
|
|
| 106 |
result = sql_svc.analyze_query(req.query)
|
| 107 |
|
| 108 |
# Convert query results to JSON-serializable format
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
query_results = []
|
| 110 |
for qr in result.query_results:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
query_results.append({
|
| 112 |
"query": qr.query,
|
| 113 |
-
"result":
|
| 114 |
"error": qr.error,
|
| 115 |
"row_count": len(qr.result) if not qr.error else 0
|
| 116 |
})
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
return SQLQueryResponse(
|
| 119 |
query=result.user_query,
|
| 120 |
summary=result.summary,
|
|
@@ -135,202 +195,6 @@ def query_sql(req: QueryRequest) -> SQLQueryResponse:
|
|
| 135 |
)
|
| 136 |
|
| 137 |
|
| 138 |
-
@app.post("/ingest")
|
| 139 |
-
def ingest() -> Dict[str, Any]:
|
| 140 |
-
"""Build the vector index from Feedback.csv"""
|
| 141 |
-
try:
|
| 142 |
-
svc.ingest()
|
| 143 |
-
return {"status": "ingested", "message": "Vector index built successfully"}
|
| 144 |
-
except FileNotFoundError as e:
|
| 145 |
-
return {"status": "error", "message": f"CSV file not found: {str(e)}"}
|
| 146 |
-
except Exception as e:
|
| 147 |
-
return {"status": "error", "message": f"Ingestion failed: {str(e)}"}
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
@app.post("/query", response_model=QueryResponse)
|
| 151 |
-
def query(req: QueryRequest, request: Request) -> QueryResponse:
|
| 152 |
-
"""Free-form question answering over feedback data.
|
| 153 |
-
|
| 154 |
-
This endpoint also appends the (request, response) pair to an in-memory history
|
| 155 |
-
which is persisted best-effort to `.query_history.json`.
|
| 156 |
-
"""
|
| 157 |
-
try:
|
| 158 |
-
# Use the higher-level answer pipeline which can handle counts and keyword queries
|
| 159 |
-
out = svc.answer(req.query, top_k=req.top_k)
|
| 160 |
-
|
| 161 |
-
# Return summary and also include results for frontend display if requested
|
| 162 |
-
# Convert numpy types to native Python types for JSON serialization
|
| 163 |
-
def convert_to_python_type(val):
|
| 164 |
-
import numpy as np
|
| 165 |
-
if isinstance(val, (np.integer, np.int64, np.int32)):
|
| 166 |
-
return int(val)
|
| 167 |
-
elif isinstance(val, (np.floating, np.float64, np.float32)):
|
| 168 |
-
return float(val)
|
| 169 |
-
elif isinstance(val, np.ndarray):
|
| 170 |
-
return val.tolist()
|
| 171 |
-
return val
|
| 172 |
-
|
| 173 |
-
resp_dict = {
|
| 174 |
-
"query": out.query,
|
| 175 |
-
"summary": out.summary,
|
| 176 |
-
"results": [
|
| 177 |
-
{
|
| 178 |
-
"score": convert_to_python_type(r.score),
|
| 179 |
-
"service": str(r.row.get(settings.service_column, "")),
|
| 180 |
-
"level": convert_to_python_type(r.row.get(settings.level_column, "")),
|
| 181 |
-
"text": str(r.row.get(settings.text_column, "")),
|
| 182 |
-
}
|
| 183 |
-
for r in out.results[:10] # Limit to 10 for frontend
|
| 184 |
-
] if out.results else []
|
| 185 |
-
}
|
| 186 |
-
|
| 187 |
-
# append to history (store only the summary)
|
| 188 |
-
try:
|
| 189 |
-
history.append({"query": out.query, "response": {"summary": out.summary}})
|
| 190 |
-
save_history()
|
| 191 |
-
except Exception:
|
| 192 |
-
pass
|
| 193 |
-
|
| 194 |
-
# Return QueryResponse with results
|
| 195 |
-
return QueryResponse(**resp_dict)
|
| 196 |
-
except FileNotFoundError:
|
| 197 |
-
resp = QueryResponse(
|
| 198 |
-
query=req.query,
|
| 199 |
-
summary="Error: Vector index not found. Please run /ingest first.",
|
| 200 |
-
)
|
| 201 |
-
try:
|
| 202 |
-
history.append({"query": resp.query, "response": {"summary": resp.summary}})
|
| 203 |
-
save_history()
|
| 204 |
-
except Exception:
|
| 205 |
-
pass
|
| 206 |
-
return resp
|
| 207 |
-
except Exception as e:
|
| 208 |
-
import traceback
|
| 209 |
-
error_details = traceback.format_exc()
|
| 210 |
-
print(f"Error in /query endpoint: {error_details}", flush=True)
|
| 211 |
-
resp = QueryResponse(
|
| 212 |
-
query=req.query,
|
| 213 |
-
summary=f"שגיאה: {str(e)}. אנא בדוק את הלוגים לפרטים נוספים.",
|
| 214 |
-
results=[]
|
| 215 |
-
)
|
| 216 |
-
try:
|
| 217 |
-
history.append({"query": resp.query, "response": {"summary": resp.summary}})
|
| 218 |
-
save_history()
|
| 219 |
-
except Exception:
|
| 220 |
-
pass
|
| 221 |
-
return resp
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
class TopicsRequest(BaseModel):
|
| 225 |
-
num_topics: int = Field(5, example=5)
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
@app.post("/topics")
|
| 229 |
-
def topics(req: TopicsRequest) -> Dict[str, Any]:
|
| 230 |
-
"""Extract main topics from feedback. Accepts POST body: {"num_topics": int}.
|
| 231 |
-
|
| 232 |
-
Using POST allows larger and structured request bodies (and avoids URL length limits).
|
| 233 |
-
"""
|
| 234 |
-
num_topics = req.num_topics
|
| 235 |
-
try:
|
| 236 |
-
# Load embeddings from store
|
| 237 |
-
store = FaissVectorStore.load(settings.vector_index_path, settings.vector_metadata_path)
|
| 238 |
-
# FAISS does not expose vectors, so recompute for this endpoint
|
| 239 |
-
df = load_feedback()
|
| 240 |
-
texts = df[settings.text_column].astype(str).tolist()
|
| 241 |
-
if not texts:
|
| 242 |
-
return {"num_topics": 0, "topics": {}, "error": "No feedback data found"}
|
| 243 |
-
|
| 244 |
-
embeddings = embedder.encode(texts)
|
| 245 |
-
res = kmeans_topics(embeddings, num_topics=num_topics)
|
| 246 |
-
|
| 247 |
-
# Group texts by topic
|
| 248 |
-
topics_out: Dict[int, List[str]] = {}
|
| 249 |
-
for label, text in zip(res.labels, texts):
|
| 250 |
-
topics_out.setdefault(int(label), []).append(text)
|
| 251 |
-
|
| 252 |
-
# Generate topic names/summaries using LLM if available
|
| 253 |
-
topic_summaries: Dict[int, str] = {}
|
| 254 |
-
for topic_id, topic_texts in topics_out.items():
|
| 255 |
-
# Take sample texts for summary
|
| 256 |
-
sample_texts = topic_texts[:10] if len(topic_texts) > 10 else topic_texts
|
| 257 |
-
sample_str = "\n".join(f"- {t[:200]}" for t in sample_texts[:5])
|
| 258 |
-
|
| 259 |
-
prompt = (
|
| 260 |
-
"Based on the following citizen feedback examples, provide a short topic name (2-4 words) "
|
| 261 |
-
"in Hebrew that describes what users are talking about. "
|
| 262 |
-
"Return ONLY the topic name, nothing else.\n\n"
|
| 263 |
-
f"Examples:\n{sample_str}\n\nTopic name:"
|
| 264 |
-
)
|
| 265 |
-
|
| 266 |
-
topic_name = f"נושא {topic_id + 1}" # Default fallback
|
| 267 |
-
|
| 268 |
-
# Try Gemini first
|
| 269 |
-
if settings.gemini_api_key:
|
| 270 |
-
try:
|
| 271 |
-
import google.generativeai as genai
|
| 272 |
-
genai.configure(api_key=settings.gemini_api_key)
|
| 273 |
-
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 274 |
-
resp = model.generate_content(prompt)
|
| 275 |
-
text = getattr(resp, "text", None)
|
| 276 |
-
if isinstance(text, str) and text.strip():
|
| 277 |
-
topic_name = text.strip()
|
| 278 |
-
except Exception:
|
| 279 |
-
pass
|
| 280 |
-
|
| 281 |
-
# Fallback to OpenAI
|
| 282 |
-
if topic_name.startswith("נושא") and settings.openai_api_key:
|
| 283 |
-
try:
|
| 284 |
-
from openai import OpenAI
|
| 285 |
-
client = OpenAI(api_key=settings.openai_api_key)
|
| 286 |
-
resp = client.chat.completions.create(
|
| 287 |
-
model="gpt-4o-mini",
|
| 288 |
-
messages=[{"role": "user", "content": prompt}],
|
| 289 |
-
temperature=0.3,
|
| 290 |
-
max_tokens=20,
|
| 291 |
-
)
|
| 292 |
-
if resp.choices[0].message.content:
|
| 293 |
-
topic_name = resp.choices[0].message.content.strip()
|
| 294 |
-
except Exception:
|
| 295 |
-
pass
|
| 296 |
-
|
| 297 |
-
topic_summaries[topic_id] = topic_name
|
| 298 |
-
|
| 299 |
-
# Format response with topic names
|
| 300 |
-
formatted_topics: Dict[str, Any] = {}
|
| 301 |
-
for topic_id, topic_texts in topics_out.items():
|
| 302 |
-
formatted_topics[str(topic_id)] = {
|
| 303 |
-
"name": topic_summaries.get(topic_id, f"נושא {topic_id + 1}"),
|
| 304 |
-
"count": len(topic_texts),
|
| 305 |
-
"examples": topic_texts[:5] # First 5 examples
|
| 306 |
-
}
|
| 307 |
-
|
| 308 |
-
return {
|
| 309 |
-
"num_topics": num_topics,
|
| 310 |
-
"topics": formatted_topics,
|
| 311 |
-
"total_feedback": len(texts)
|
| 312 |
-
}
|
| 313 |
-
except FileNotFoundError:
|
| 314 |
-
return {"error": "Vector index not found. Please run /ingest first.", "num_topics": 0, "topics": {}}
|
| 315 |
-
except Exception as e:
|
| 316 |
-
return {"error": str(e), "num_topics": 0, "topics": {}}
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
class SentimentRequest(BaseModel):
|
| 320 |
-
limit: int = Field(100, example=50)
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
@app.post("/sentiment")
|
| 324 |
-
def sentiment(req: SentimentRequest) -> Dict[str, Any]:
|
| 325 |
-
"""Analyze sentiment for the first `limit` feedback entries. Accepts POST body: {"limit": 100}.
|
| 326 |
-
|
| 327 |
-
Using POST keeps the API consistent for clients that prefer JSON bodies over URL query params.
|
| 328 |
-
"""
|
| 329 |
-
limit = req.limit
|
| 330 |
-
df = load_feedback().head(limit)
|
| 331 |
-
texts = df[settings.text_column].astype(str).tolist()
|
| 332 |
-
out = analyze_sentiments(texts)
|
| 333 |
-
return {"count": len(out), "results": out}
|
| 334 |
|
| 335 |
|
| 336 |
# Mount static files for a simple frontend if present
|
|
@@ -352,13 +216,25 @@ def root() -> HTMLResponse:
|
|
| 352 |
|
| 353 |
@app.get("/history")
|
| 354 |
def get_history() -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
return {"history": history}
|
| 356 |
|
| 357 |
|
| 358 |
@app.post("/history/clear")
|
| 359 |
def clear_history() -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
global history
|
| 361 |
history = []
|
| 362 |
-
save_history()
|
| 363 |
return {"status": "cleared"}
|
| 364 |
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from typing import List, Optional, Dict, Any
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import json
|
| 6 |
|
| 7 |
+
from fastapi import FastAPI
|
|
|
|
|
|
|
| 8 |
from fastapi.responses import ORJSONResponse, HTMLResponse
|
| 9 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
|
|
|
| 10 |
from pydantic import BaseModel, Field
|
| 11 |
|
| 12 |
from .config import settings
|
| 13 |
from .data_loader import load_feedback
|
|
|
|
|
|
|
| 14 |
from .sql_service import SQLFeedbackService
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# FastAPI application for Feedback Analysis using SQL-based approach
|
| 17 |
+
app = FastAPI(
|
| 18 |
+
title="Feedback Analysis Agent",
|
| 19 |
+
version="2.0.0",
|
| 20 |
+
description="SQL-based feedback analysis system using LLM-generated queries",
|
| 21 |
+
default_response_class=ORJSONResponse
|
| 22 |
+
)
|
| 23 |
|
|
|
|
|
|
|
| 24 |
# Initialize SQL service lazily to avoid errors on startup if data is missing
|
| 25 |
+
# This service handles all query processing using SQL-based approach
|
| 26 |
+
sql_svc: Optional[SQLFeedbackService] = None
|
| 27 |
try:
|
| 28 |
+
sql_svc = SQLFeedbackService()
|
| 29 |
+
print("SQL service initialized successfully", flush=True)
|
| 30 |
except Exception as e:
|
| 31 |
print(f"Warning: Could not initialize SQL service: {e}", flush=True)
|
|
|
|
| 32 |
|
| 33 |
# Simple in-memory history persisted best-effort to `.query_history.json`
|
| 34 |
history_file = Path(".query_history.json")
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def save_history() -> None:
|
| 45 |
+
"""
|
| 46 |
+
Save query history to disk.
|
| 47 |
+
|
| 48 |
+
This is a best-effort operation - if saving fails (e.g., disk full,
|
| 49 |
+
permissions issue), the error is silently ignored to avoid breaking
|
| 50 |
+
the main application flow. History is stored in `.query_history.json`.
|
| 51 |
+
"""
|
| 52 |
try:
|
| 53 |
with history_file.open("w", encoding="utf-8") as f:
|
| 54 |
json.dump(history, f, ensure_ascii=False, indent=2)
|
| 55 |
except Exception:
|
| 56 |
+
# Best-effort persistence; ignore errors to avoid breaking main flow
|
| 57 |
pass
|
| 58 |
|
| 59 |
|
| 60 |
class QueryRequest(BaseModel):
|
| 61 |
+
"""
|
| 62 |
+
Request model for query endpoints.
|
| 63 |
+
|
| 64 |
+
Attributes:
|
| 65 |
+
query: The natural language question to analyze
|
| 66 |
+
top_k: Number of results to return (kept for compatibility, not actively used)
|
| 67 |
+
"""
|
| 68 |
query: str = Field(..., example="תסווג את התלונות 5 סוגים")
|
| 69 |
top_k: int = Field(5, example=5)
|
| 70 |
|
| 71 |
|
| 72 |
class QueryResponse(BaseModel):
|
| 73 |
+
"""
|
| 74 |
+
Response model for legacy query endpoint (deprecated).
|
| 75 |
+
|
| 76 |
+
Kept for backward compatibility but not actively used.
|
| 77 |
+
"""
|
| 78 |
query: str
|
| 79 |
summary: Optional[str]
|
| 80 |
+
results: Optional[List[Dict[str, Any]]] = None
|
| 81 |
|
| 82 |
|
| 83 |
class SQLQueryResponse(BaseModel):
|
| 84 |
+
"""
|
| 85 |
+
Response model for SQL-based query endpoint.
|
| 86 |
+
|
| 87 |
+
Attributes:
|
| 88 |
+
query: The original user query
|
| 89 |
+
summary: Final synthesized answer in natural language
|
| 90 |
+
sql_queries: List of SQL queries that were generated and executed
|
| 91 |
+
query_results: Results from each SQL query (as dictionaries for JSON serialization)
|
| 92 |
+
visualizations: Optional list of visualization specifications for frontend rendering
|
| 93 |
+
"""
|
| 94 |
query: str
|
| 95 |
summary: str
|
| 96 |
sql_queries: List[str]
|
| 97 |
+
query_results: List[Dict[str, Any]]
|
| 98 |
visualizations: Optional[List[Dict[str, Any]]] = None
|
| 99 |
|
| 100 |
|
|
|
|
| 134 |
result = sql_svc.analyze_query(req.query)
|
| 135 |
|
| 136 |
# Convert query results to JSON-serializable format
|
| 137 |
+
# Pandas DataFrames may contain numpy types that aren't JSON-serializable
|
| 138 |
+
# This helper function converts them to native Python types
|
| 139 |
+
def convert_to_python_type(val):
|
| 140 |
+
"""
|
| 141 |
+
Convert numpy types to native Python types for JSON serialization.
|
| 142 |
+
|
| 143 |
+
FastAPI/Pydantic can't serialize numpy types directly, so we need
|
| 144 |
+
to convert them. This function handles integers, floats, and arrays.
|
| 145 |
+
"""
|
| 146 |
+
import numpy as np
|
| 147 |
+
if isinstance(val, (np.integer, np.int64, np.int32)):
|
| 148 |
+
return int(val)
|
| 149 |
+
elif isinstance(val, (np.floating, np.float64, np.float32)):
|
| 150 |
+
return float(val)
|
| 151 |
+
elif isinstance(val, np.ndarray):
|
| 152 |
+
return val.tolist()
|
| 153 |
+
return val
|
| 154 |
+
|
| 155 |
query_results = []
|
| 156 |
for qr in result.query_results:
|
| 157 |
+
# Convert DataFrame to dict and clean numpy types
|
| 158 |
+
records = []
|
| 159 |
+
if not qr.error and len(qr.result) > 0:
|
| 160 |
+
for record in qr.result.to_dict('records'):
|
| 161 |
+
cleaned_record = {k: convert_to_python_type(v) for k, v in record.items()}
|
| 162 |
+
records.append(cleaned_record)
|
| 163 |
+
|
| 164 |
query_results.append({
|
| 165 |
"query": qr.query,
|
| 166 |
+
"result": records,
|
| 167 |
"error": qr.error,
|
| 168 |
"row_count": len(qr.result) if not qr.error else 0
|
| 169 |
})
|
| 170 |
|
| 171 |
+
# Save to history
|
| 172 |
+
try:
|
| 173 |
+
history.append({"query": result.user_query, "response": {"summary": result.summary}})
|
| 174 |
+
save_history()
|
| 175 |
+
except Exception:
|
| 176 |
+
pass
|
| 177 |
+
|
| 178 |
return SQLQueryResponse(
|
| 179 |
query=result.user_query,
|
| 180 |
summary=result.summary,
|
|
|
|
| 195 |
)
|
| 196 |
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
# Mount static files for a simple frontend if present
|
|
|
|
| 216 |
|
| 217 |
@app.get("/history")
|
| 218 |
def get_history() -> Dict[str, Any]:
|
| 219 |
+
"""
|
| 220 |
+
Get query history.
|
| 221 |
+
|
| 222 |
+
Returns all previously asked questions and their responses.
|
| 223 |
+
History is persisted to `.query_history.json` and loaded on startup.
|
| 224 |
+
"""
|
| 225 |
return {"history": history}
|
| 226 |
|
| 227 |
|
| 228 |
@app.post("/history/clear")
|
| 229 |
def clear_history() -> Dict[str, Any]:
|
| 230 |
+
"""
|
| 231 |
+
Clear query history.
|
| 232 |
+
|
| 233 |
+
Removes all stored queries from memory and disk.
|
| 234 |
+
Useful for testing or privacy purposes.
|
| 235 |
+
"""
|
| 236 |
global history
|
| 237 |
history = []
|
| 238 |
+
save_history() # Persist the cleared state to disk
|
| 239 |
return {"status": "cleared"}
|
| 240 |
|
app/config.py
CHANGED
|
@@ -1,27 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from dataclasses import dataclass
|
| 3 |
from dotenv import load_dotenv # type: ignore
|
| 4 |
|
| 5 |
|
| 6 |
-
# Load .env if present (kept out of git via .gitignore)
|
|
|
|
| 7 |
load_dotenv(override=False)
|
| 8 |
|
| 9 |
|
| 10 |
@dataclass
|
| 11 |
class Settings:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
openai_api_key: str | None = os.getenv("OPENAI_API_KEY")
|
| 13 |
gemini_api_key: str | None = os.getenv("GEMINI_API_KEY")
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
| 17 |
-
)
|
| 18 |
-
vector_index_path: str = os.getenv("VECTOR_INDEX_PATH", ".vector_index/faiss.index")
|
| 19 |
-
vector_metadata_path: str = os.getenv("VECTOR_METADATA_PATH", ".vector_index/meta.parquet")
|
| 20 |
csv_path: str = os.getenv("CSV_PATH", "Feedback.csv")
|
|
|
|
|
|
|
| 21 |
text_column: str = os.getenv("TEXT_COLUMN", "Text")
|
| 22 |
service_column: str = os.getenv("SERVICE_COLUMN", "ServiceName")
|
| 23 |
level_column: str = os.getenv("LEVEL_COLUMN", "Level")
|
| 24 |
|
| 25 |
|
|
|
|
| 26 |
settings = Settings()
|
| 27 |
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration settings for the Feedback Analysis system.
|
| 3 |
+
|
| 4 |
+
This module loads environment variables and provides a centralized Settings class
|
| 5 |
+
for all configuration values. Settings can be overridden via environment variables
|
| 6 |
+
or a .env file (which is git-ignored for security).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
import os
|
| 10 |
from dataclasses import dataclass
|
| 11 |
from dotenv import load_dotenv # type: ignore
|
| 12 |
|
| 13 |
|
| 14 |
+
# Load .env file if present (kept out of git via .gitignore for security)
|
| 15 |
+
# This allows local development without exposing API keys
|
| 16 |
load_dotenv(override=False)
|
| 17 |
|
| 18 |
|
| 19 |
@dataclass
|
| 20 |
class Settings:
|
| 21 |
+
"""
|
| 22 |
+
Application settings loaded from environment variables.
|
| 23 |
+
|
| 24 |
+
All settings can be overridden via environment variables or .env file.
|
| 25 |
+
This provides flexibility for different deployment environments.
|
| 26 |
+
"""
|
| 27 |
+
# LLM API keys - at least one must be set for the system to work
|
| 28 |
openai_api_key: str | None = os.getenv("OPENAI_API_KEY")
|
| 29 |
gemini_api_key: str | None = os.getenv("GEMINI_API_KEY")
|
| 30 |
+
|
| 31 |
+
# CSV data file path - relative to project root
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
csv_path: str = os.getenv("CSV_PATH", "Feedback.csv")
|
| 33 |
+
|
| 34 |
+
# Column names in the CSV file - adjust if your CSV uses different column names
|
| 35 |
text_column: str = os.getenv("TEXT_COLUMN", "Text")
|
| 36 |
service_column: str = os.getenv("SERVICE_COLUMN", "ServiceName")
|
| 37 |
level_column: str = os.getenv("LEVEL_COLUMN", "Level")
|
| 38 |
|
| 39 |
|
| 40 |
+
# Global settings instance - import this in other modules
|
| 41 |
settings = Settings()
|
| 42 |
|
app/embedding.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""EmbeddingModel wrapper around sentence-transformers.
|
| 4 |
-
|
| 5 |
-
This class lazily loads a SentenceTransformer model (configured via
|
| 6 |
-
`settings.embedding_model_name`) and exposes `encode` and `encode_single`.
|
| 7 |
-
Normalizes embeddings to unit length for cosine-similarity search in FAISS.
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
from typing import Iterable, List
|
| 11 |
-
|
| 12 |
-
import numpy as np
|
| 13 |
-
from sentence_transformers import SentenceTransformer # type: ignore
|
| 14 |
-
|
| 15 |
-
from .config import settings
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class EmbeddingModel:
|
| 19 |
-
def __init__(self, model_name: str | None = None) -> None:
|
| 20 |
-
self.model_name = model_name or settings.embedding_model_name
|
| 21 |
-
self.model = SentenceTransformer(self.model_name)
|
| 22 |
-
|
| 23 |
-
def encode(self, texts: Iterable[str], batch_size: int = 32) -> np.ndarray:
|
| 24 |
-
embeddings = self.model.encode(
|
| 25 |
-
list(texts),
|
| 26 |
-
batch_size=batch_size,
|
| 27 |
-
show_progress_bar=True,
|
| 28 |
-
convert_to_numpy=True,
|
| 29 |
-
normalize_embeddings=True,
|
| 30 |
-
)
|
| 31 |
-
return embeddings
|
| 32 |
-
|
| 33 |
-
def encode_single(self, text: str) -> np.ndarray:
|
| 34 |
-
return self.encode([text])[0]
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/preprocess.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""Text preprocessing helpers.
|
| 4 |
-
|
| 5 |
-
Includes minimal normalization and an optional language detection helper. The
|
| 6 |
-
`langdetect` dependency is optional — when it's not installed, `detect_language`
|
| 7 |
-
returns "unknown". This keeps lightweight workflows (like simple counting) runnable
|
| 8 |
-
without installing all NLP dependencies.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
try:
|
| 12 |
-
from langdetect import detect, DetectorFactory # type: ignore
|
| 13 |
-
DetectorFactory.seed = 42
|
| 14 |
-
|
| 15 |
-
def detect_language(text: str) -> str:
|
| 16 |
-
try:
|
| 17 |
-
return detect(text)
|
| 18 |
-
except Exception:
|
| 19 |
-
return "unknown"
|
| 20 |
-
except Exception:
|
| 21 |
-
# langdetect is optional for lightweight usage; provide fallback
|
| 22 |
-
def detect_language(text: str) -> str:
|
| 23 |
-
return "unknown"
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def normalize_text(text: str) -> str:
|
| 27 |
-
# Minimal normalization; keep non-latin scripts (Hebrew)
|
| 28 |
-
return " ".join(str(text).split())
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def preprocess_text(text: str) -> str:
|
| 32 |
-
return normalize_text(text)
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/rag_service.py
DELETED
|
@@ -1,1057 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import argparse
|
| 4 |
-
from dataclasses import dataclass
|
| 5 |
-
from typing import List, Optional, Dict
|
| 6 |
-
|
| 7 |
-
import numpy as np
|
| 8 |
-
import pandas as pd
|
| 9 |
-
|
| 10 |
-
from .config import settings
|
| 11 |
-
from .data_loader import load_feedback
|
| 12 |
-
from .embedding import EmbeddingModel
|
| 13 |
-
from .preprocess import preprocess_text
|
| 14 |
-
from .vector_store import FaissVectorStore, SearchResult
|
| 15 |
-
from .analysis import detect_query_type, resolve_count_from_type
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
try:
|
| 19 |
-
from openai import OpenAI # type: ignore
|
| 20 |
-
except Exception: # pragma: no cover - optional
|
| 21 |
-
OpenAI = None # type: ignore
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
import google.generativeai as genai # type: ignore
|
| 25 |
-
except Exception: # pragma: no cover - optional
|
| 26 |
-
genai = None # type: ignore
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
@dataclass
|
| 30 |
-
class RetrievalOutput:
|
| 31 |
-
query: str
|
| 32 |
-
results: List[SearchResult]
|
| 33 |
-
summary: Optional[str]
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
class RAGService:
|
| 37 |
-
def __init__(self) -> None:
|
| 38 |
-
self.embedder = EmbeddingModel()
|
| 39 |
-
self.store: Optional[FaissVectorStore] = None
|
| 40 |
-
|
| 41 |
-
def ingest(self, df: Optional[pd.DataFrame] = None) -> None:
|
| 42 |
-
data = df if df is not None else load_feedback()
|
| 43 |
-
texts = [preprocess_text(t) for t in data[settings.text_column].astype(str).tolist()]
|
| 44 |
-
vectors = self.embedder.encode(texts)
|
| 45 |
-
|
| 46 |
-
store = FaissVectorStore(dim=vectors.shape[1])
|
| 47 |
-
store.add(vectors.astype(np.float32), data[[settings.text_column, settings.service_column, settings.level_column]])
|
| 48 |
-
store.save(settings.vector_index_path, settings.vector_metadata_path)
|
| 49 |
-
self.store = store
|
| 50 |
-
|
| 51 |
-
def _ensure_store(self) -> None:
|
| 52 |
-
if self.store is None:
|
| 53 |
-
import os
|
| 54 |
-
if not os.path.exists(settings.vector_index_path):
|
| 55 |
-
raise FileNotFoundError(
|
| 56 |
-
f"Vector index not found at {settings.vector_index_path}. "
|
| 57 |
-
"Please run /ingest endpoint first or precompute the index."
|
| 58 |
-
)
|
| 59 |
-
self.store = FaissVectorStore.load(settings.vector_index_path, settings.vector_metadata_path)
|
| 60 |
-
|
| 61 |
-
def retrieve(self, query: str, top_k: int = 5, level_filter: Optional[tuple] = None) -> List[SearchResult]:
|
| 62 |
-
"""Retrieve results with optional level filtering.
|
| 63 |
-
|
| 64 |
-
Args:
|
| 65 |
-
query: Search query
|
| 66 |
-
top_k: Number of results to retrieve
|
| 67 |
-
level_filter: Optional tuple (min_level, max_level) to filter by level
|
| 68 |
-
"""
|
| 69 |
-
self._ensure_store()
|
| 70 |
-
assert self.store is not None
|
| 71 |
-
q_vec = self.embedder.encode_single(preprocess_text(query))
|
| 72 |
-
|
| 73 |
-
# Retrieve more results if filtering is needed (to ensure we get enough after filtering)
|
| 74 |
-
search_k = top_k * 3 if level_filter else top_k
|
| 75 |
-
results = self.store.search(q_vec, top_k=search_k)
|
| 76 |
-
|
| 77 |
-
# Apply level filter if specified
|
| 78 |
-
if level_filter:
|
| 79 |
-
min_level, max_level = level_filter
|
| 80 |
-
filtered_results = []
|
| 81 |
-
for r in results:
|
| 82 |
-
level = r.row.get(settings.level_column)
|
| 83 |
-
if level is not None:
|
| 84 |
-
try:
|
| 85 |
-
level_val = float(level)
|
| 86 |
-
if min_level <= level_val <= max_level:
|
| 87 |
-
filtered_results.append(r)
|
| 88 |
-
if len(filtered_results) >= top_k:
|
| 89 |
-
break
|
| 90 |
-
except (ValueError, TypeError):
|
| 91 |
-
continue
|
| 92 |
-
return filtered_results
|
| 93 |
-
|
| 94 |
-
return results[:top_k]
|
| 95 |
-
|
| 96 |
-
def summarize(self, query: str, contexts: List[str]) -> Optional[str]:
|
| 97 |
-
if not contexts:
|
| 98 |
-
return None
|
| 99 |
-
joined = "\n".join(f"- {c}" for c in contexts[:10])
|
| 100 |
-
# Detect if query is in Hebrew
|
| 101 |
-
is_hebrew = any('\u0590' <= char <= '\u05FF' for char in query)
|
| 102 |
-
lang_instruction = "ענה בעברית" if is_hebrew else "Answer in the language of the query"
|
| 103 |
-
|
| 104 |
-
prompt = (
|
| 105 |
-
f"You are a government digital services assistant. Based on the following citizen feedback snippets, "
|
| 106 |
-
f"write a concise summary (max 100 words) highlighting key issues and suggestions. "
|
| 107 |
-
f"{lang_instruction}.\n\n"
|
| 108 |
-
f"Query:\n{query}\n\nFeedback:\n{joined}\n\nSummary:"
|
| 109 |
-
)
|
| 110 |
-
# Prefer Gemini if configured
|
| 111 |
-
if settings.gemini_api_key and genai is not None:
|
| 112 |
-
try:
|
| 113 |
-
genai.configure(api_key=settings.gemini_api_key)
|
| 114 |
-
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 115 |
-
resp = model.generate_content(prompt)
|
| 116 |
-
text = getattr(resp, "text", None)
|
| 117 |
-
if isinstance(text, str) and text.strip():
|
| 118 |
-
return text.strip()
|
| 119 |
-
except Exception:
|
| 120 |
-
pass
|
| 121 |
-
# Fallback to OpenAI if available
|
| 122 |
-
if settings.openai_api_key and OpenAI is not None:
|
| 123 |
-
client = OpenAI(api_key=settings.openai_api_key)
|
| 124 |
-
try:
|
| 125 |
-
resp = client.chat.completions.create(
|
| 126 |
-
model="gpt-4o-mini",
|
| 127 |
-
messages=[{"role": "user", "content": prompt}],
|
| 128 |
-
temperature=0.2,
|
| 129 |
-
max_tokens=200,
|
| 130 |
-
)
|
| 131 |
-
return resp.choices[0].message.content
|
| 132 |
-
except Exception:
|
| 133 |
-
pass
|
| 134 |
-
# Fallback: simple extractive "summary"
|
| 135 |
-
return " ".join(contexts[:3])
|
| 136 |
-
|
| 137 |
-
def _validate_and_fix_response(self, response: str, query: str, aggregates_str: str) -> str:
|
| 138 |
-
"""Validate response and fix if needed. Returns validated/fixed response."""
|
| 139 |
-
if not response or len(response.strip()) < 50:
|
| 140 |
-
return "לא הצלחתי ליצור תשובה מספקת מהנתונים. אנא נסה שאילתה אחרת או בדוק שהאינדקס נבנה כראוי."
|
| 141 |
-
|
| 142 |
-
# Check for obvious nonsense patterns
|
| 143 |
-
nonsense_patterns = [
|
| 144 |
-
"אני לא יכול", "I cannot", "I don't know", "לא יודע",
|
| 145 |
-
"לא ניתן", "cannot provide", "unable to", "אני לא",
|
| 146 |
-
"I'm sorry", "I apologize", "סליחה", "לא מצאתי"
|
| 147 |
-
]
|
| 148 |
-
if any(pattern in response.lower() for pattern in [p.lower() for p in nonsense_patterns]):
|
| 149 |
-
# Try to fix by asking the model to be more specific
|
| 150 |
-
return self._request_fix(response, query, aggregates_str)
|
| 151 |
-
|
| 152 |
-
# Check if response is too short (relaxed threshold - allow shorter responses if they're good)
|
| 153 |
-
word_count = len(response.split())
|
| 154 |
-
if word_count < 400:
|
| 155 |
-
# Response is very short, try to get more detail (target is 600-800 words)
|
| 156 |
-
return self._request_fix(response, query, aggregates_str)
|
| 157 |
-
|
| 158 |
-
# Check if response is just a jumble of words (no clear structure, no sentences, just words)
|
| 159 |
-
# Count sentences (periods, exclamation marks, question marks)
|
| 160 |
-
sentence_count = response.count('.') + response.count('!') + response.count('?')
|
| 161 |
-
if sentence_count < 10 and word_count > 200:
|
| 162 |
-
# Response has many words but few sentences - might be a jumble
|
| 163 |
-
# Check if there are enough paragraphs (double newlines or line breaks)
|
| 164 |
-
paragraph_count = response.count('\n\n') + response.count('\r\n\r\n')
|
| 165 |
-
if paragraph_count < 3:
|
| 166 |
-
# Response seems like a jumble - not enough structure
|
| 167 |
-
return self._request_fix(response, query, aggregates_str)
|
| 168 |
-
|
| 169 |
-
# Check if response seems too generic or just a list of examples (doesn't contain analysis)
|
| 170 |
-
has_numbers = any(char.isdigit() for char in response)
|
| 171 |
-
has_analysis_terms = any(term in response for term in [
|
| 172 |
-
"משוב", "משתמש", "שירות", "דירוג", "נתונים", "ניתוח", "מגמה", "דפוס",
|
| 173 |
-
"השוואה", "אחוז", "ממוצע", "feedback", "user", "service", "analysis", "pattern", "trend"
|
| 174 |
-
])
|
| 175 |
-
|
| 176 |
-
# Check for business understanding terms
|
| 177 |
-
has_business_terms = any(term in response for term in [
|
| 178 |
-
"משמעות", "השפעה", "סיכון", "הזדמנות", "המלצה", "צעד", "תיקון", "שיפור",
|
| 179 |
-
"מגמה", "דפוס", "נושא", "בעיה", "פתרון", "impact", "risk", "opportunity",
|
| 180 |
-
"recommendation", "action", "improvement", "trend", "pattern", "issue", "solution"
|
| 181 |
-
])
|
| 182 |
-
|
| 183 |
-
# Check if response is just listing examples (too many bullet points or numbers)
|
| 184 |
-
bullet_points = response.count("•") + response.count("-") + response.count("1.") + response.count("2.") + response.count("3.") + response.count("4.") + response.count("5.")
|
| 185 |
-
is_mostly_list = bullet_points > word_count / 15 # More than ~6.5% of content is list markers
|
| 186 |
-
|
| 187 |
-
# Check if response is just a list of short phrases (common pattern: each line is a short phrase)
|
| 188 |
-
lines = [line.strip() for line in response.split("\n") if line.strip()]
|
| 189 |
-
short_lines = sum(1 for line in lines if len(line.split()) < 8) # Lines with less than 8 words
|
| 190 |
-
is_list_of_phrases = len(lines) > 3 and short_lines > len(lines) * 0.7 # More than 70% are short lines
|
| 191 |
-
|
| 192 |
-
# Check if response lacks coherent structure (too many short sentences, not enough paragraphs)
|
| 193 |
-
sentences = response.count(".") + response.count("!") + response.count("?")
|
| 194 |
-
avg_sentence_length = word_count / max(sentences, 1)
|
| 195 |
-
is_fragmented = avg_sentence_length < 12 and word_count > 100 # Too many very short sentences
|
| 196 |
-
|
| 197 |
-
# Check if response has enough paragraphs (should have at least 3-4 paragraphs for a good analysis)
|
| 198 |
-
paragraphs = [p.strip() for p in response.split("\n\n") if p.strip()]
|
| 199 |
-
has_enough_paragraphs = len(paragraphs) >= 3
|
| 200 |
-
|
| 201 |
-
# Check if query is about feelings/opinions and response should cover both sides
|
| 202 |
-
is_feelings_query = any(term in query.lower() for term in [
|
| 203 |
-
"מרגיש", "רגש", "דעה", "אוהב", "שונא", "מרוצה", "לא מרוצ��",
|
| 204 |
-
"feel", "opinion", "like", "dislike", "satisfied", "unsatisfied"
|
| 205 |
-
])
|
| 206 |
-
|
| 207 |
-
if is_feelings_query:
|
| 208 |
-
# Check if response covers both positive and negative sides
|
| 209 |
-
has_positive_terms = any(term in response for term in [
|
| 210 |
-
"מרוצה", "אוהב", "חיובי", "טוב", "מעולה", "מצוין", "דירוג גבוה", "דירוג 4", "דירוג 5",
|
| 211 |
-
"satisfied", "positive", "good", "excellent", "high rating", "rating 4", "rating 5"
|
| 212 |
-
])
|
| 213 |
-
has_negative_terms = any(term in response for term in [
|
| 214 |
-
"לא מרוצה", "שונא", "שלילי", "רע", "גרוע", "דירוג נמוך", "דירוג 1", "דירוג 2",
|
| 215 |
-
"unsatisfied", "negative", "bad", "poor", "low rating", "rating 1", "rating 2"
|
| 216 |
-
])
|
| 217 |
-
if not (has_positive_terms and has_negative_terms) and word_count < 500:
|
| 218 |
-
# Response doesn't cover both sides and is short, try to improve
|
| 219 |
-
return self._request_fix(response, query, aggregates_str)
|
| 220 |
-
|
| 221 |
-
# Relaxed validation - only fix if really problematic (target is 600-800 words)
|
| 222 |
-
if (not has_numbers or not has_analysis_terms) and word_count < 400:
|
| 223 |
-
# Response seems too generic or lacks analysis, try to improve
|
| 224 |
-
return self._request_fix(response, query, aggregates_str)
|
| 225 |
-
|
| 226 |
-
if is_mostly_list and word_count < 400:
|
| 227 |
-
# Response is mostly a list and very short, try to improve
|
| 228 |
-
return self._request_fix(response, query, aggregates_str)
|
| 229 |
-
|
| 230 |
-
if not has_business_terms and word_count < 500:
|
| 231 |
-
# Response lacks business understanding and is short, try to improve
|
| 232 |
-
return self._request_fix(response, query, aggregates_str)
|
| 233 |
-
|
| 234 |
-
if is_fragmented and word_count < 400:
|
| 235 |
-
# Response is too fragmented and short, try to improve
|
| 236 |
-
return self._request_fix(response, query, aggregates_str)
|
| 237 |
-
|
| 238 |
-
if not has_enough_paragraphs and word_count < 400:
|
| 239 |
-
# Response doesn't have enough structure and is short, try to improve
|
| 240 |
-
return self._request_fix(response, query, aggregates_str)
|
| 241 |
-
|
| 242 |
-
# Check for repetitive or nonsensical patterns (same word repeated many times)
|
| 243 |
-
words = response.split()
|
| 244 |
-
if len(words) > 0:
|
| 245 |
-
word_freq = {}
|
| 246 |
-
for word in words:
|
| 247 |
-
word_freq[word] = word_freq.get(word, 0) + 1
|
| 248 |
-
max_repetition = max(word_freq.values()) if word_freq else 0
|
| 249 |
-
if max_repetition > len(words) * 0.2: # If any word appears more than 20% of the time
|
| 250 |
-
# Response seems repetitive/nonsensical
|
| 251 |
-
return self._request_fix(response, query, aggregates_str)
|
| 252 |
-
|
| 253 |
-
return response
|
| 254 |
-
|
| 255 |
-
def _request_fix(self, original_response: str, query: str, aggregates_str: str) -> str:
|
| 256 |
-
"""Ask the LLM to fix/improve a response that failed validation."""
|
| 257 |
-
# Check if query is about feelings/opinions
|
| 258 |
-
is_feelings_query = any(term in query.lower() for term in [
|
| 259 |
-
"מרגיש", "רגש", "דעה", "אוהב", "שונא", "מרוצה", "לא מרוצה",
|
| 260 |
-
"feel", "opinion", "like", "dislike", "satisfied", "unsatisfied"
|
| 261 |
-
])
|
| 262 |
-
|
| 263 |
-
feelings_instruction = ""
|
| 264 |
-
if is_feelings_query:
|
| 265 |
-
feelings_instruction = (
|
| 266 |
-
f"\nחשוב מאוד - השאלה מתייחסת לרגשות/תחושות/דעות. פורמט ספציפי:\n"
|
| 267 |
-
f"1. התחל עם סיכום כללי קצר (2-3 משפטים) שמתאר את התמונה הגדולה:\n"
|
| 268 |
-
f" - דוגמה: 'נראה שיש רגשות מעורבים כלפי השירות' או 'רוב המשתמשים מרוצים מהשירות'\n"
|
| 269 |
-
f" - כלול מספרים: כמה משתמשים מרוצים? כמה לא? מה האחוזים?\n"
|
| 270 |
-
f"2. המשך עם ניתוח של המשתמשים המרוצים (דירוג 4-5):\n"
|
| 271 |
-
f" - מה הם אומרים? מה הם אוהבים? מה עובד טוב?\n"
|
| 272 |
-
f" - כלול דוגמאות קונקרטיות מהמשובים - צטט או תאר משובים ספציפיים\n"
|
| 273 |
-
f" - דוגמה: 'רוב המשתמשים מרוצים ומודים על השירות, כפי שניתן לראות במשובים כמו...'\n"
|
| 274 |
-
f"3. המשך עם ניתוח של המשתמשים הלא מרוצים (דירוג 1-2):\n"
|
| 275 |
-
f" - מה הם אומרים? מה הם לא אוהבים? מה לא עובד?\n"
|
| 276 |
-
f" - כלול בעיות ספציפיות עם דוגמאות קונקרטיות מהמשובים\n"
|
| 277 |
-
f" - דוגמה: 'חלק מהמשתמשים מצביעים על בעיות משמעותיות כמו שדות שלא ניתן לערוך אותם, חוסר ידיעה שמונעת מהם להזין שדות אחרים, או ציפייה (ותסכול) על אי קבלת מסמכים בדואר'\n"
|
| 278 |
-
f" - צטט או תאר משובים ספציפיים שמדגימים את הבעיות\n"
|
| 279 |
-
f"4. סיים עם סיכום והמלצות\n"
|
| 280 |
-
)
|
| 281 |
-
|
| 282 |
-
fix_prompt = (
|
| 283 |
-
f"התשובה הבאה לא מספקת - היא קצרה מדי, לא קוהרנטית, גיבוב של מילים, או חסר מבנה ברור. אנא כתוב תשובה חדשה ומתוקנת:\n\n"
|
| 284 |
-
f"חשוב מאוד - אגרגציה חכמה (קריטי!):\n"
|
| 285 |
-
f"1. קודם כל, עשה אגרגציה חכמה של כל הנתונים:\n"
|
| 286 |
-
f" - קרא ונתח את כל הסטטיסטיקות והסיכומים שסופקו\n"
|
| 287 |
-
f" - זהה את הדפוסים והנושאים המרכזיים שחוזרים על עצמם\n"
|
| 288 |
-
f" - הבן את התמונה הגדולה - מה המגמות הכלליות? מה הנושאים הדומיננטיים?\n"
|
| 289 |
-
f" - השווה בין קבוצות שונות (מרוצים vs לא מרוצים, שירותים שונים)\n"
|
| 290 |
-
f"\n"
|
| 291 |
-
f"2. רק אחרי שעשית אגרגציה חכמה - כתוב תשובה מסכמת ברורה ומסודרת:\n"
|
| 292 |
-
f" - תשובה שמסכמת את הממצאים העיקריים מהאגרגציה\n"
|
| 293 |
-
f" - תשובה שמראה הבנה עמוקה של הדפוסים והנושאים המרכזיים\n"
|
| 294 |
-
f" - תשובה שמבוססת על ניתוח מעמיק, לא רק חיבור של משובים בודדים\n"
|
| 295 |
-
f" - תשובה ברורה ומסודרת - לא גיבוב של מילים\n"
|
| 296 |
-
f" - אל תכתוב: 'משתמש אחד אמר X, משתמש שני אמר Y'\n"
|
| 297 |
-
f" - במקום זה, כתוב: 'נראה שיש דפוס ברור של X בקרב Y% מהמשתמשים'\n"
|
| 298 |
-
f"\n"
|
| 299 |
-
f"מבנה התשובה - חובה (קריטי!):\n"
|
| 300 |
-
f"התשובה חייבת להיות מסודרת בבירור עם מבנה ברור:\n"
|
| 301 |
-
f"1. פתיחה - סיכום מנהלים (פסקה אחת, 3-4 משפטים): סיכום כללי של התמונה הגדולה עם מספרים\n"
|
| 302 |
-
f"2. ניתוח מפורט לפי נושאים/דעות (3-5 פסקאות, כל פסקה 4-6 משפטים): כל פסקה בנושא/דעה מרכזי אחד\n"
|
| 303 |
-
f"3. השוואות וניתוח מעמיק (2-3 פסקאות): השוואות בין קבוצות ושירותים\n"
|
| 304 |
-
f"4. תובנות עסקיות והמלצות (2-3 פסקאות): משמעות, השפעה, המלצות\n"
|
| 305 |
-
f"5. סיכום (פסקה אחת, 2-3 משפטים): מסקנות עיקריות ונקודות מפתח\n"
|
| 306 |
-
f"\n"
|
| 307 |
-
f"דרישות לתשובה המתוקנת (חובה!):\n"
|
| 308 |
-
f"1. תשובה קוהרנטית, מפורטת מאוד ומקיפה בפסקאות מלאות (לפחות 7-10 פסקאות, לפחות 600-800 מילים)\n"
|
| 309 |
-
f"2. תשובה ברורה ומסודרת - לא גיבוב של מילים, אלא מבנה ברור עם סעיפים ופסקאות\n"
|
| 310 |
-
f"3. כל פסקה צריכה להיות קוהרנטית וממוקדת בנושא אחד (4-6 משפטים ארוכים ומפורטים)\n"
|
| 311 |
-
f"4. תשובה שמראה הבנה רחבה ומקיפה של כל הנתונים - לא רק רשימת משובים בודדים\n"
|
| 312 |
-
f"5. תשובה שכוללת כמה דעות/נושאים מרכזיים (לא רק נושא אחד)\n"
|
| 313 |
-
f"6. הרחב על כל נקודה - תן הסברים מפורטים, דוגמאות מרובות, והשוואות מעמיקות\n"
|
| 314 |
-
f"7. תשובה שמסכמת את הממצאים העיקריים מהאגרגציה החכמה - לא רק חיבור של משובים\n"
|
| 315 |
-
f"{feelings_instruction}"
|
| 316 |
-
f"8. מבוססת אך ורק על הנתונים הסטטיסטיים הבאים:\n{aggregates_str}\n"
|
| 317 |
-
f"9. עונה ישירות על השאלה: {query}\n"
|
| 318 |
-
f"10. כוללת מספרים מדויקים מהנתונים (כמה משתמשים, אחוזים, ממוצעים, וכו')\n"
|
| 319 |
-
f"11. מראה הבנה של דפוסים ונושאים מרכזיים - לא רק דוגמאות בודדות\n"
|
| 320 |
-
f"12. תשובה קוהרנטית ומקצועית - ניתוח מעמיק, לא רק חיבור של משובים\n"
|
| 321 |
-
f"13. כוללת תובנות עסקיות מעמיקות והמלצות מעשיות\n"
|
| 322 |
-
f"14. הגיונית, לוגית, וקשורה לשאלה - לא שטויות או גיבוב מילים\n"
|
| 323 |
-
f"15. כתובה בעברית מקצועית וקולחת\n\n"
|
| 324 |
-
f"התשובה המקורית (לא מספקת - אל תשתמש בה, רק כתוב תשובה חדשה):\n{original_response}\n\n"
|
| 325 |
-
f"אנא כתוב תשובה חדשה ומתוקנת שעומדת בכל הדרישות לעיל - תשובה קוהרנטית ומקיפה בפסקאות מלאות שמראה הבנה של כל הנתונים:"
|
| 326 |
-
)
|
| 327 |
-
|
| 328 |
-
# Try Gemini first
|
| 329 |
-
if settings.gemini_api_key and genai is not None:
|
| 330 |
-
try:
|
| 331 |
-
genai.configure(api_key=settings.gemini_api_key)
|
| 332 |
-
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 333 |
-
generation_config = {
|
| 334 |
-
"temperature": 0.7, # Moderate temperature for fixes - still creative but focused
|
| 335 |
-
"top_p": 0.95,
|
| 336 |
-
"top_k": 40,
|
| 337 |
-
"max_output_tokens": 3000,
|
| 338 |
-
}
|
| 339 |
-
resp = model.generate_content(fix_prompt, generation_config=generation_config)
|
| 340 |
-
text = getattr(resp, "text", None)
|
| 341 |
-
if isinstance(text, str) and text.strip() and len(text.strip()) > 100:
|
| 342 |
-
return text.strip()
|
| 343 |
-
except Exception:
|
| 344 |
-
pass
|
| 345 |
-
|
| 346 |
-
# Fallback to OpenAI
|
| 347 |
-
if settings.openai_api_key and OpenAI is not None:
|
| 348 |
-
try:
|
| 349 |
-
client = OpenAI(api_key=settings.openai_api_key)
|
| 350 |
-
resp = client.chat.completions.create(
|
| 351 |
-
model="gpt-4o-mini",
|
| 352 |
-
messages=[{"role": "user", "content": fix_prompt}],
|
| 353 |
-
temperature=0.7,
|
| 354 |
-
max_tokens=2500,
|
| 355 |
-
)
|
| 356 |
-
fixed = resp.choices[0].message.content
|
| 357 |
-
if fixed and len(fixed.strip()) > 100:
|
| 358 |
-
return fixed.strip()
|
| 359 |
-
except Exception:
|
| 360 |
-
pass
|
| 361 |
-
|
| 362 |
-
# If fix failed, return original with note
|
| 363 |
-
return f"{original_response}\n\n[הערה: התשובה עשויה להיות לא מלאה. אנא נסה שאילתה יותר ספציפית.]"
|
| 364 |
-
|
| 365 |
-
def synthesize(self, query: str, results: List[SearchResult], contexts: List[str], max_contexts: int = 100, level_filter: Optional[tuple] = None) -> Optional[str]:
|
| 366 |
-
"""Produce a free-form, analyst-style answer that synthesizes the retrieved contexts.
|
| 367 |
-
|
| 368 |
-
This method asks the LLM to act as an experienced data analyst for digital business
|
| 369 |
-
processes and to synthesize insights, root causes, business impact and recommended
|
| 370 |
-
next steps. It is explicitly not an extractive response of "most relevant" snippets.
|
| 371 |
-
"""
|
| 372 |
-
if not contexts:
|
| 373 |
-
return None
|
| 374 |
-
|
| 375 |
-
# Load full dataset for comprehensive analysis
|
| 376 |
-
try:
|
| 377 |
-
df = load_feedback()
|
| 378 |
-
# Apply level filter to dataset if specified
|
| 379 |
-
if level_filter:
|
| 380 |
-
min_level, max_level = level_filter
|
| 381 |
-
df = df[(df[settings.level_column] >= min_level) & (df[settings.level_column] <= max_level)].copy()
|
| 382 |
-
total_records = len(df)
|
| 383 |
-
except Exception:
|
| 384 |
-
df = None
|
| 385 |
-
total_records = 0
|
| 386 |
-
|
| 387 |
-
# Instead of showing individual examples, create comprehensive summaries
|
| 388 |
-
# Group by service and level to show patterns
|
| 389 |
-
if df is not None and len(df) > 0:
|
| 390 |
-
# Create service-level summaries
|
| 391 |
-
service_summaries = []
|
| 392 |
-
for service_name in df[settings.service_column].unique()[:20]: # Top 20 services
|
| 393 |
-
service_df = df[df[settings.service_column] == service_name]
|
| 394 |
-
if len(service_df) > 0:
|
| 395 |
-
avg_level = service_df[settings.level_column].mean()
|
| 396 |
-
count = len(service_df)
|
| 397 |
-
high_ratings = len(service_df[service_df[settings.level_column] >= 4])
|
| 398 |
-
low_ratings = len(service_df[service_df[settings.level_column] <= 2])
|
| 399 |
-
# Sample a few representative texts
|
| 400 |
-
sample_texts = service_df[settings.text_column].head(3).tolist()
|
| 401 |
-
service_summaries.append(
|
| 402 |
-
f"שירות: {service_name}\n"
|
| 403 |
-
f" - מספר משובים: {count}\n"
|
| 404 |
-
f" - ממוצע דירוג: {avg_level:.2f}\n"
|
| 405 |
-
f" - דירוגים גבוהים (4-5): {high_ratings} ({(high_ratings/count*100):.1f}%)\n"
|
| 406 |
-
f" - דירוגים נמוכים (1-2): {low_ratings} ({(low_ratings/count*100):.1f}%)\n"
|
| 407 |
-
f" - דוגמאות: {', '.join([t[:100] + '...' if len(t) > 100 else t for t in sample_texts])}"
|
| 408 |
-
)
|
| 409 |
-
|
| 410 |
-
# Create level-based summaries
|
| 411 |
-
level_summaries = []
|
| 412 |
-
for level in sorted(df[settings.level_column].unique()):
|
| 413 |
-
level_df = df[df[settings.level_column] == level]
|
| 414 |
-
if len(level_df) > 0:
|
| 415 |
-
count = len(level_df)
|
| 416 |
-
percentage = (count / total_records * 100)
|
| 417 |
-
# Sample representative texts
|
| 418 |
-
sample_texts = level_df[settings.text_column].head(5).tolist()
|
| 419 |
-
level_summaries.append(
|
| 420 |
-
f"דירוג {level} ({count} משובים, {percentage:.1f}%):\n"
|
| 421 |
-
f" דוגמאות: {' | '.join([t[:80] + '...' if len(t) > 80 else t for t in sample_texts[:3]])}"
|
| 422 |
-
)
|
| 423 |
-
|
| 424 |
-
# Include top retrieved examples for context (but not all)
|
| 425 |
-
top_examples = []
|
| 426 |
-
for i, r in enumerate(results[:50]): # Top 50 most relevant
|
| 427 |
-
text = r.row.get(settings.text_column, "")
|
| 428 |
-
service = r.row.get(settings.service_column, "")
|
| 429 |
-
level = r.row.get(settings.level_column, "")
|
| 430 |
-
score = r.score
|
| 431 |
-
top_examples.append(f"[דמיון: {score:.3f}, שירות: {service}, דירוג: {level}] {text[:150]}{'...' if len(text) > 150 else ''}")
|
| 432 |
-
|
| 433 |
-
joined = (
|
| 434 |
-
f"סיכום מקיף של כל הנתונים ({total_records} משובים בסך הכל):\n\n"
|
| 435 |
-
f"סיכום לפי שירותים (20 השירותים המובילים):\n" + "\n\n".join(service_summaries) + "\n\n"
|
| 436 |
-
f"סיכום לפי דירוגים:\n" + "\n\n".join(level_summaries) + "\n\n"
|
| 437 |
-
f"דוגמאות רלוונטיות ביותר (50 המשובים הרלוונטיים ביותר לשאילתה):\n" + "\n".join(f"{i+1}. {ex}" for i, ex in enumerate(top_examples))
|
| 438 |
-
)
|
| 439 |
-
else:
|
| 440 |
-
# Fallback to original method if dataset loading fails
|
| 441 |
-
safe_ctxs = []
|
| 442 |
-
for i, r in enumerate(results[:max_contexts]):
|
| 443 |
-
text = r.row.get(settings.text_column, "")
|
| 444 |
-
service = r.row.get(settings.service_column, "")
|
| 445 |
-
level = r.row.get(settings.level_column, "")
|
| 446 |
-
ctx = f"[שירות: {service}, דירוג: {level}] {text}"
|
| 447 |
-
if len(ctx) > 400:
|
| 448 |
-
ctx = ctx[:400] + "..."
|
| 449 |
-
safe_ctxs.append(ctx)
|
| 450 |
-
joined = "\n\n".join(f"{i+1}. {c}" for i, c in enumerate(safe_ctxs))
|
| 451 |
-
|
| 452 |
-
# Detect if query is in Hebrew
|
| 453 |
-
is_hebrew = any('\u0590' <= char <= '\u05FF' for char in query)
|
| 454 |
-
lang_instruction = "ענה בעברית באופן מקצועי" if is_hebrew else "Answer in the language of the query in a professional tone"
|
| 455 |
-
|
| 456 |
-
instruction = (
|
| 457 |
-
"אתה אנליסט נתונים בכיר ומנוסה, מומחה בניתוח משובי משתמשים על שירותים דיגיטליים.\n"
|
| 458 |
-
"יש לך גישה מלאה לכל הנתונים - אתה רואה את התמונה הגדולה של כל המשובים.\n"
|
| 459 |
-
"\n"
|
| 460 |
-
"מבנה הנתונים:\n"
|
| 461 |
-
"- Text: הטקסט המלא של המשוב\n"
|
| 462 |
-
"- Level: הדירוג (1-5, 5=הטוב ביותר, 1=הגרוע ביותר)\n"
|
| 463 |
-
"- ServiceName: שם השירות\n"
|
| 464 |
-
"\n"
|
| 465 |
-
"חשוב מאוד - איך לעבוד (קריטי!):\n"
|
| 466 |
-
"1. קודם כל, עשה אגרגציה חכמה של כל הנתונים:\n"
|
| 467 |
-
" - קרא ונתח את כל הסטטיסטיקות והסיכומים שסופקו\n"
|
| 468 |
-
" - זהה את הדפוסים והנושאים המרכזיים שחוזרים על עצמם\n"
|
| 469 |
-
" - הבן את התמונה הגדולה - מה המגמות הכלליות? מה הנושאים הדומיננטיים?\n"
|
| 470 |
-
" - השווה בין קבוצות שונות (מרוצים vs לא מרוצים, שירותים שונים)\n"
|
| 471 |
-
" - זהה קשרים והקשרים בין נושאים שונים\n"
|
| 472 |
-
" - הבן את המשמעות העסקית - מה זה אומר בפועל?\n"
|
| 473 |
-
"\n"
|
| 474 |
-
"2. אחרי האגרגציה החכמה - כתוב תשובה ברורה ומסודרת:\n"
|
| 475 |
-
" - תשובה שמסכמת את הממצאים העיקריים מהאגרגציה\n"
|
| 476 |
-
" - תשובה שמראה הבנה עמוקה של הדפוסים והנושאים המרכזיים\n"
|
| 477 |
-
" - תשובה שמבוססת על ניתוח מעמיק, לא רק חיבור של משובים בודדים\n"
|
| 478 |
-
" - תשובה ברורה ומסודרת - לא גיבוב של מילים\n"
|
| 479 |
-
"\n"
|
| 480 |
-
"3. מה זה אומר בפועל:\n"
|
| 481 |
-
" - אל תכתוב: 'משתמש אחד אמר X, משתמש שני אמר Y, משתמש שלישי אמר Z'\n"
|
| 482 |
-
" - במקום זה, כתוב: 'נראה שיש דפוס ברור של X בקרב Y% מהמשתמשים, בעוד ש-Z% מציינים Y'\n"
|
| 483 |
-
" - זהה נושאים מרכזיים שחוזרים על עצמם והסבר אותם בצורה ברורה\n"
|
| 484 |
-
" - השווה בין קבוצות שונות והסבר את ההבדלים\n"
|
| 485 |
-
" - תן תובנות עסקיות שמבוססות על הבנה של כל הנתונים יחד\n"
|
| 486 |
-
"\n"
|
| 487 |
-
"כללים חשובים:\n"
|
| 488 |
-
"1. תשובותיך מבוססות רק על הנתונים שסופקו - אל תמציא\n"
|
| 489 |
-
"2. תן תשובה קוהרנטית ומקיפה שמראה הבנה של כל הנתונים\n"
|
| 490 |
-
"3. כל מספר חייב להיות מדויק מהנתונים\n"
|
| 491 |
-
"4. תשובה מפורטת מאוד וארוכה (7-10 פסקאות, 600-800 מילים לפחות)\n"
|
| 492 |
-
"5. תשובה ברורה ומסודרת - לא גיבוב של מילים\n"
|
| 493 |
-
"\n"
|
| 494 |
-
"מבנה התשובה - חובה (קריטי!):\n"
|
| 495 |
-
"התשובה חייבת להיות מסודרת בבירור עם סעיפים ופסקאות:\n"
|
| 496 |
-
"\n"
|
| 497 |
-
"1. פתיחה - סיכום מנהלים (פסקה אחת, 3-4 משפטים):\n"
|
| 498 |
-
" - תן סיכום כללי קצר של התמונה הגדולה\n"
|
| 499 |
-
" - מה המגמות הכלליות? מה המסקנות העיקריות?\n"
|
| 500 |
-
" - כלול מספרים מדויקים (כמה משתמשים מרוצים? כמה לא? אחוזים?)\n"
|
| 501 |
-
"\n"
|
| 502 |
-
"2. ניתוח מפורט לפי נושאים/דעות (3-5 פסקאות, כל פסקה 4-6 משפטים):\n"
|
| 503 |
-
" - כל פסקה תעסוק בנושא/דעה מרכזי אחד\n"
|
| 504 |
-
" - זהה את הנושאים המרכזיים מהאגרגציה והסבר כל אחד מהם\n"
|
| 505 |
-
" - כלול מספרים מדויקים, אחוזים, והשוואות\n"
|
| 506 |
-
" - כלול דוגמאות קונקרטיות מהמשובים (2-3 דוגמאות לכל נושא)\n"
|
| 507 |
-
" - הסבר את המשמעות העסקית של כל נושא\n"
|
| 508 |
-
"\n"
|
| 509 |
-
"3. השוואות וניתוח מעמיק (2-3 פסקאות, כל פסקה 4-6 משפטים):\n"
|
| 510 |
-
" - השווה בין קבוצות שונות (מרוצים vs לא מרוצים)\n"
|
| 511 |
-
" - השווה בין שירותים שונים\n"
|
| 512 |
-
" - זהה קשרים והקשרים - מה גורם למה?\n"
|
| 513 |
-
" - הסבר את ההבדלים והמשמעות שלהם\n"
|
| 514 |
-
"\n"
|
| 515 |
-
"4. תובנות עסקיות והמלצות (2-3 פסקאות, כל פסקה 4-6 משפטים):\n"
|
| 516 |
-
" - מה המשמעות העסקית של הממצאים?\n"
|
| 517 |
-
" - מה ההשפעה על השירות?\n"
|
| 518 |
-
" - מה הסיכונים וההזדמנויות?\n"
|
| 519 |
-
" - המלצות מעשיות וקונקרטיות - מה צריך לעשות?\n"
|
| 520 |
-
"\n"
|
| 521 |
-
"5. סיכום (פסקה אחת, 2-3 משפטים):\n"
|
| 522 |
-
" - סיכום קצר של המסקנות העיקריות\n"
|
| 523 |
-
" - נקודות מפתח לפעולה\n"
|
| 524 |
-
"\n"
|
| 525 |
-
"כללי כתיבה:\n"
|
| 526 |
-
"- כתוב בצורה ברורה ומסודרת - לא גיבוב של מילים\n"
|
| 527 |
-
"- כל פסקה צריכה להיות קוהרנטית וממוקדת בנושא אחד\n"
|
| 528 |
-
"- השתמש במעברים ברורים בין פסקאות\n"
|
| 529 |
-
"- כלול מספרים מדויקים, אחוזים, והשוואות\n"
|
| 530 |
-
"- כלול דוגמאות קונקרטיות מהמשובים\n"
|
| 531 |
-
"- כתוב בצורה טבעית וקולחת - כאילו אתה מסביר למנהל\n"
|
| 532 |
-
"- תן תשובה ארוכה ומקיפה - לפחות 600-800 מילים, 7-10 פסקאות\n"
|
| 533 |
-
"- הרחב על כל נקודה - תן הסברים מפורטים, דוגמאות מרובות, והשוואות מעמיקות\n"
|
| 534 |
-
"- תשובה שמסכמת את הממצאים העיקריים מהאגרגציה החכמה שעשית\n"
|
| 535 |
-
"\n"
|
| 536 |
-
"בדיקה אחרונה לפני שליחת התשובה - חובה לבדוק:\n"
|
| 537 |
-
"1. האם התשובה ברורה ומסודרת עם מבנה ברור (פתיחה, ניתוח לפי נושאים, השוואות, תובנות, סיכום)?\n"
|
| 538 |
-
"2. האם התשובה לא גיבוב של מילים אלא תשובה קוהרנטית ומסודרת?\n"
|
| 539 |
-
"3. האם עשית אגרגציה חכמה של כל הנתונים לפני כתיבת התשובה?\n"
|
| 540 |
-
"4. האם התשובה מסכמת את הממצאים העיקריים מהאגרגציה (לא רק חיבור של משובים בודדים)?\n"
|
| 541 |
-
"5. האם התשובה מראה הבנה עמוקה של הדפוסים והנושאים המרכזיים?\n"
|
| 542 |
-
"6. האם התשובה ארוכה ומקיפה מספיק (לפחות 600-800 מילים, 7-10 פסקאות)?\n"
|
| 543 |
-
"7. האם התשובה כוללת כמה דעות/נושאים מרכזיים (לא רק נושא אחד)?\n"
|
| 544 |
-
"8. אם השאלה מתייחסת לרגשות/תחושות/דעות - האם התשובה כוללת ניתוח של שני הצדדים (מרוצים ולא מרוצים)?\n"
|
| 545 |
-
"9. האם התשובה מראה הבנה עסקית מעמיקה (משמעות, השפעה, המלצות)?\n"
|
| 546 |
-
"10. האם הרחבת על כל נקודה עם הסברים מפורטים ודוגמאות מרובות?\n"
|
| 547 |
-
"11. האם כל המספרים מדויקים מהנתונים?\n"
|
| 548 |
-
"12. האם כל השירותים קיימים בנתונים?\n"
|
| 549 |
-
"13. האם התשובה הגיונית ולוגית (לא שטויות)?\n"
|
| 550 |
-
"14. האם התשובה קשורה לשאלה שנשאלה?\n"
|
| 551 |
-
"15. האם התשובה מפורטת ומקצועית?\n"
|
| 552 |
-
"16. האם התשובה כוללת תובנות עסקיות והמלצות מעשיות?\n"
|
| 553 |
-
"\n"
|
| 554 |
-
"אם התשובה לא עומדת בכל הקריטריונים לעיל, כתוב תשובה חדשה שעומדת בכל הקריטריונים.\n"
|
| 555 |
-
)
|
| 556 |
-
|
| 557 |
-
# Compute comprehensive aggregates locally to include in the prompt
|
| 558 |
-
try:
|
| 559 |
-
df = load_feedback()
|
| 560 |
-
total = len(df)
|
| 561 |
-
|
| 562 |
-
# Level distribution
|
| 563 |
-
level_dist = df[settings.level_column].value_counts().sort_index().to_dict()
|
| 564 |
-
level_percentages = {k: f"{(v/total*100):.1f}%" for k, v in level_dist.items()}
|
| 565 |
-
|
| 566 |
-
# Service statistics
|
| 567 |
-
counts_by_service = df.groupby(settings.service_column).size().sort_values(ascending=False).head(15).to_dict()
|
| 568 |
-
avg_level_by_service = df.groupby(settings.service_column)[settings.level_column].mean().sort_values(ascending=False).head(15).to_dict()
|
| 569 |
-
|
| 570 |
-
# High vs Low ratings
|
| 571 |
-
high_ratings = df[df[settings.level_column] >= 4]
|
| 572 |
-
low_ratings = df[df[settings.level_column] <= 2]
|
| 573 |
-
high_count = len(high_ratings)
|
| 574 |
-
low_count = len(low_ratings)
|
| 575 |
-
|
| 576 |
-
# Service-level analysis
|
| 577 |
-
low_level_df = df[df[settings.level_column] < 3]
|
| 578 |
-
low_level_counts = low_level_df.groupby(settings.service_column).size().sort_values(ascending=False).head(10).to_dict()
|
| 579 |
-
high_level_df = df[df[settings.level_column] >= 4]
|
| 580 |
-
high_level_counts = high_level_df.groupby(settings.service_column).size().sort_values(ascending=False).head(10).to_dict()
|
| 581 |
-
|
| 582 |
-
# Sample texts by rating
|
| 583 |
-
high_sample_texts = high_ratings[settings.text_column].head(5).tolist() if len(high_ratings) > 0 else []
|
| 584 |
-
low_sample_texts = low_ratings[settings.text_column].head(5).tolist() if len(low_ratings) > 0 else []
|
| 585 |
-
|
| 586 |
-
aggregates_str = (
|
| 587 |
-
f"סטטיסטיקות כלליות:\n"
|
| 588 |
-
f"- סך הכל משובים: {total}\n"
|
| 589 |
-
f"- חלוקת דירוגים: {level_dist} ({level_percentages})\n"
|
| 590 |
-
f"- משתמשים מרוצים (דירוג 4-5): {high_count} ({(high_count/total*100):.1f}%)\n"
|
| 591 |
-
f"- משתמשים לא מרוצים (דירוג 1-2): {low_count} ({(low_count/total*100):.1f}%)\n"
|
| 592 |
-
f"\n"
|
| 593 |
-
f"שירותים עם הכי הרבה משובים: {counts_by_service}\n"
|
| 594 |
-
f"שירותים עם ממוצע דירוג גבוה (4+): {dict(list(avg_level_by_service.items())[:10])}\n"
|
| 595 |
-
f"שירותים עם הכי הרבה דירוגים נמוכים (1-2): {low_level_counts}\n"
|
| 596 |
-
f"שירותים עם הכי הרבה דירוגים גבוהים (4-5): {high_level_counts}\n"
|
| 597 |
-
f"\n"
|
| 598 |
-
f"דוגמאות משובים עם דירוג גבוה (4-5):\n" + "\n".join([f" - {t[:200]}" for t in high_sample_texts[:3]]) + "\n"
|
| 599 |
-
f"\n"
|
| 600 |
-
f"דוגמאות משובים עם דירוג נמוך (1-2):\n" + "\n".join([f" - {t[:200]}" for t in low_sample_texts[:3]]) + "\n"
|
| 601 |
-
)
|
| 602 |
-
except Exception as e:
|
| 603 |
-
aggregates_str = f"סטטיסטיקות: שגיאה בטעינת נתונים - {str(e)}\n"
|
| 604 |
-
|
| 605 |
-
# Special-case: the user asked to split into N topics (e.g., "חלק את המשובים ל5 נושאים")
|
| 606 |
-
import re
|
| 607 |
-
m = re.search(r"(\d+)\s*נוש", query)
|
| 608 |
-
topic_split_pattern = ("חלק" in query and ("נוש" in query or "נושא" in query)) or m or ("נושא" in query and "מרכזי" in query and "תחום" in query)
|
| 609 |
-
if topic_split_pattern:
|
| 610 |
-
try:
|
| 611 |
-
n_topics = int(m.group(1)) if m else 5
|
| 612 |
-
texts = df[settings.text_column].astype(str).tolist()
|
| 613 |
-
embeddings = self.embedder.encode(texts)
|
| 614 |
-
from .topics import kmeans_topics
|
| 615 |
-
res = kmeans_topics(embeddings, num_topics=n_topics)
|
| 616 |
-
|
| 617 |
-
# Build a comprehensive summary of clusters with detailed examples and statistics
|
| 618 |
-
clusters: Dict[int, list] = {}
|
| 619 |
-
cluster_services: Dict[int, Dict[str, int]] = {} # service counts per cluster
|
| 620 |
-
cluster_levels: Dict[int, Dict[int, int]] = {} # level distribution per cluster
|
| 621 |
-
|
| 622 |
-
for label, text, row_idx in zip(res.labels, texts, range(len(texts))):
|
| 623 |
-
cluster_id = int(label)
|
| 624 |
-
clusters.setdefault(cluster_id, []).append(text)
|
| 625 |
-
|
| 626 |
-
# Track service distribution per cluster
|
| 627 |
-
if cluster_id not in cluster_services:
|
| 628 |
-
cluster_services[cluster_id] = {}
|
| 629 |
-
if cluster_id not in cluster_levels:
|
| 630 |
-
cluster_levels[cluster_id] = {}
|
| 631 |
-
|
| 632 |
-
service = df.iloc[row_idx].get(settings.service_column, "Unknown")
|
| 633 |
-
level = df.iloc[row_idx].get(settings.level_column, 0)
|
| 634 |
-
cluster_services[cluster_id][service] = cluster_services[cluster_id].get(service, 0) + 1
|
| 635 |
-
cluster_levels[cluster_id][level] = cluster_levels[cluster_id].get(level, 0) + 1
|
| 636 |
-
|
| 637 |
-
cluster_summaries = []
|
| 638 |
-
for tid in sorted(clusters.keys()):
|
| 639 |
-
items = clusters[tid]
|
| 640 |
-
count = len(items)
|
| 641 |
-
percentage = (count / total_records * 100) if total_records > 0 else 0
|
| 642 |
-
|
| 643 |
-
# Get top services for this cluster
|
| 644 |
-
top_services = sorted(cluster_services[tid].items(), key=lambda x: x[1], reverse=True)[:5]
|
| 645 |
-
services_str = ", ".join([f"{svc} ({cnt})" for svc, cnt in top_services])
|
| 646 |
-
|
| 647 |
-
# Get level distribution
|
| 648 |
-
level_dist = cluster_levels[tid]
|
| 649 |
-
avg_level = sum(level * count for level, count in level_dist.items()) / sum(level_dist.values()) if level_dist else 0
|
| 650 |
-
high_ratings = sum(count for level, count in level_dist.items() if level >= 4)
|
| 651 |
-
low_ratings = sum(count for level, count in level_dist.items() if level <= 2)
|
| 652 |
-
|
| 653 |
-
# Get diverse sample texts (not just first 3)
|
| 654 |
-
sample_size = min(10, len(items))
|
| 655 |
-
step = max(1, len(items) // sample_size)
|
| 656 |
-
sample = [items[i] for i in range(0, len(items), step)][:5]
|
| 657 |
-
|
| 658 |
-
cluster_summaries.append(
|
| 659 |
-
f"נושא {tid + 1}:\n"
|
| 660 |
-
f" - מספר משובים: {count} ({(percentage):.1f}% מכלל המשובים)\n"
|
| 661 |
-
f" - ממוצע דירוג: {avg_level:.2f}\n"
|
| 662 |
-
f" - דירוגים גבוהים (4-5): {high_ratings} ({(high_ratings/count*100):.1f}% מהנושא)\n"
|
| 663 |
-
f" - דירוגים נמוכים (1-2): {low_ratings} ({(low_ratings/count*100):.1f}% מהנושא)\n"
|
| 664 |
-
f" - שירותים עיקריים: {services_str}\n"
|
| 665 |
-
f" - דוגמאות משובים:\n" + "\n".join([f" * {t[:150]}{'...' if len(t) > 150 else ''}" for t in sample])
|
| 666 |
-
)
|
| 667 |
-
|
| 668 |
-
clusters_str = "\n\n".join(cluster_summaries)
|
| 669 |
-
|
| 670 |
-
prompt = (
|
| 671 |
-
f"{instruction}\n\n{lang_instruction}.\n\n"
|
| 672 |
-
f"שאלת המשתמש:\n{query}\n\n"
|
| 673 |
-
f"סטטיסטיקות כלליות של כל הנתונים:\n{aggregates_str}\n\n"
|
| 674 |
-
f"ניתוח נושאים (clusters) - {n_topics} נושאים שזוהו:\n{clusters_str}\n\n"
|
| 675 |
-
f"הוראות מפורטות לניתוח נושאים:\n"
|
| 676 |
-
f"1. עבור כל נושא (נושא 1, נושא 2, וכו'), תן:\n"
|
| 677 |
-
f" א. שם נושא קצר ומשמעותי (2-4 מילים בעברית) שמתאר את התחום/הנושא המרכזי עליו המשתמשים מדברים\n"
|
| 678 |
-
f" - השם צריך להיות ברור ומשמעותי, לא גנרי (לא 'נושא 1' אלא משהו כמו 'בעיות טכניות' או 'שדות לא ערוכים')\n"
|
| 679 |
-
f" - השם צריך לשקף את התוכן המרכזי של המשובים בנושא זה\n"
|
| 680 |
-
f" ב. תיאור מפורט של הנושא (3-5 משפטים) שמסביר:\n"
|
| 681 |
-
f" - מה הנושא הזה? על מה המשתמשים מדברים?\n"
|
| 682 |
-
f" - מה הדפוסים המרכזיים? מה חוזר על עצמו?\n"
|
| 683 |
-
f" - מה המשתמשים אומרים? מה הם מרגישים?\n"
|
| 684 |
-
f" - אילו שירותים קשורים לנושא הזה?\n"
|
| 685 |
-
f" - מה רמת שביעות הרצון בנושא הזה? (תבסס על ממוצע הדירוג והחלוקה בין דירוגים גבוהים לנמוכים)\n"
|
| 686 |
-
f" ג. דוגמאות קונקרטיות מהמשובים (2-3 דוגמאות) שמדגימות את הנושא\n"
|
| 687 |
-
f" - צטט או תאר משובים ספציפיים מהדוגמאות שסופקו\n"
|
| 688 |
-
f" - הדוגמאות צריכות להמחיש את הנושא בצורה ברורה\n"
|
| 689 |
-
f" ד. תובנות עסקיות והמלצות מעשיות (2-3 משפטים)\n"
|
| 690 |
-
f" - מה המשמעות של הנושא הזה? מה ההשפעה על השירות?\n"
|
| 691 |
-
f" - מה צריך לעשות? מה הפעולות המומלצות?\n"
|
| 692 |
-
f"\n"
|
| 693 |
-
f"2. פורמט התשובה:\n"
|
| 694 |
-
f" - התחל עם משפט פתיחה קצר שמתאר את התמונה הכללית: 'ניתן לזהות {n_topics} נושאים מרכזיים במשובים...'\n"
|
| 695 |
-
f" - עבור כל נושא, כתוב פסקה מפורטת (5-7 משפטים) שכוללת את כל הנקודות לעיל (שם, תיאור, דוגמאות, תובנות)\n"
|
| 696 |
-
f" - כל נושא צריך להיות מוצג בבירור, עם שם בולט (למשל: 'נושא 1: [שם הנושא]')\n"
|
| 697 |
-
f" - סיים עם סיכום כללי (2-3 משפטים) שמסכם את הממצאים העיקריים\n"
|
| 698 |
-
f"\n"
|
| 699 |
-
f"3. כללי כתיבה:\n"
|
| 700 |
-
f" - השתמש במספרים המדויקים מהניתוח (כמה משובים בכל נושא, אחוזים, ממוצע דירוג, וכו')\n"
|
| 701 |
-
f" - ציין שירותים ספציפיים מהניתוח\n"
|
| 702 |
-
f" - השתמש בדוגמאות הקונקרטיות מהמשובים - צטט או תאר משובים ספציפיים\n"
|
| 703 |
-
f" - כתוב בעברית מקצועית וקולחת\n"
|
| 704 |
-
f" - תן תשובה מפורטת מאוד ומקיפה (לפחות 700-900 מילים בסך הכל, 8-12 פסקאות)\n"
|
| 705 |
-
f" - כל נושא צריך לקבל טיפול שווה ומפורט\n"
|
| 706 |
-
)
|
| 707 |
-
except Exception as e:
|
| 708 |
-
print(f"Error in topic clustering: {e}", flush=True)
|
| 709 |
-
# fallback to standard prompt if clustering fails
|
| 710 |
-
prompt = (
|
| 711 |
-
f"{instruction}\n\n{lang_instruction}.\n\nUser query:\n{query}\n\nDataset aggregates:\n{aggregates_str}\n\nFeedback examples (truncated):\n{joined}\n\nPlease present a clear, actionable, and human-readable analysis."
|
| 712 |
-
)
|
| 713 |
-
# Send to LLM below
|
| 714 |
-
elif ("נמוך" in query and ("3" in query or "שלוש" in query)) or ("level < 3" in query) or ("ציון" in query and "3" in query and ("נמוך" in query or "מתחת" in query)) or ("נושא" in query and "מרכזי" in query and ("נמוך" in query or "ציון" in query)):
|
| 715 |
-
# User asks about items with level < 3 or main topic of low-rated feedback
|
| 716 |
-
try:
|
| 717 |
-
if df is None or len(df) == 0:
|
| 718 |
-
raise ValueError("No data available")
|
| 719 |
-
low_level_df = df[df[settings.level_column] < 3].copy()
|
| 720 |
-
low_texts = low_level_df[settings.text_column].astype(str).tolist()
|
| 721 |
-
if low_texts:
|
| 722 |
-
embeddings = self.embedder.encode(low_texts)
|
| 723 |
-
from .topics import kmeans_topics
|
| 724 |
-
# Use 3-5 topics depending on data size
|
| 725 |
-
n_topics = min(5, max(3, len(low_texts) // 20))
|
| 726 |
-
res = kmeans_topics(embeddings, num_topics=n_topics)
|
| 727 |
-
|
| 728 |
-
clusters: Dict[int, list] = {}
|
| 729 |
-
cluster_services: Dict[int, Dict[str, int]] = {}
|
| 730 |
-
cluster_indices: Dict[int, list] = {} # Store original indices for service/level lookup
|
| 731 |
-
|
| 732 |
-
for idx, (label, text) in enumerate(zip(res.labels, low_texts)):
|
| 733 |
-
cluster_id = int(label)
|
| 734 |
-
clusters.setdefault(cluster_id, []).append(text)
|
| 735 |
-
cluster_indices.setdefault(cluster_id, []).append(idx)
|
| 736 |
-
|
| 737 |
-
if cluster_id not in cluster_services:
|
| 738 |
-
cluster_services[cluster_id] = {}
|
| 739 |
-
|
| 740 |
-
# Get service for this feedback
|
| 741 |
-
original_idx = low_level_df.index[idx]
|
| 742 |
-
service = df.iloc[original_idx].get(settings.service_column, "Unknown")
|
| 743 |
-
cluster_services[cluster_id][service] = cluster_services[cluster_id].get(service, 0) + 1
|
| 744 |
-
|
| 745 |
-
# Build comprehensive cluster summaries
|
| 746 |
-
cluster_summaries = []
|
| 747 |
-
for tid in sorted(clusters.keys()):
|
| 748 |
-
items = clusters[tid]
|
| 749 |
-
count = len(items)
|
| 750 |
-
percentage = (count / len(low_texts) * 100) if low_texts else 0
|
| 751 |
-
|
| 752 |
-
# Get top services
|
| 753 |
-
top_services = sorted(cluster_services[tid].items(), key=lambda x: x[1], reverse=True)[:5]
|
| 754 |
-
services_str = ", ".join([f"{svc} ({cnt})" for svc, cnt in top_services])
|
| 755 |
-
|
| 756 |
-
# Get diverse sample texts
|
| 757 |
-
sample_size = min(8, len(items))
|
| 758 |
-
step = max(1, len(items) // sample_size)
|
| 759 |
-
sample = [items[i] for i in range(0, len(items), step)][:5]
|
| 760 |
-
|
| 761 |
-
cluster_summaries.append(
|
| 762 |
-
f"נושא {tid + 1} (משובים עם דירוג נמוך):\n"
|
| 763 |
-
f" - מספר משובים: {count} ({(percentage):.1f}% מכלל המשובים עם דירוג נמוך)\n"
|
| 764 |
-
f" - שירותים עיקריים: {services_str}\n"
|
| 765 |
-
f" - דוגמאות משובים:\n" + "\n".join([f" * {t[:150]}{'...' if len(t) > 150 else ''}" for t in sample])
|
| 766 |
-
)
|
| 767 |
-
|
| 768 |
-
clusters_str = "\n\n".join(cluster_summaries)
|
| 769 |
-
|
| 770 |
-
# Identify the largest/most dominant cluster
|
| 771 |
-
largest_cluster = max(clusters.items(), key=lambda x: len(x[1]))
|
| 772 |
-
largest_tid = largest_cluster[0]
|
| 773 |
-
largest_items = largest_cluster[1]
|
| 774 |
-
largest_services = sorted(cluster_services[largest_tid].items(), key=lambda x: x[1], reverse=True)[:3]
|
| 775 |
-
|
| 776 |
-
else:
|
| 777 |
-
clusters_str = "(לא נמצאו משובים עם דירוג נמוך)"
|
| 778 |
-
largest_cluster = None
|
| 779 |
-
largest_tid = None
|
| 780 |
-
largest_items = []
|
| 781 |
-
largest_services = []
|
| 782 |
-
|
| 783 |
-
prompt = (
|
| 784 |
-
f"{instruction}\n\n{lang_instruction}.\n\n"
|
| 785 |
-
f"שאלת המשתמש:\n{query}\n\n"
|
| 786 |
-
f"סטטיסטיקות כלליות:\n{aggregates_str}\n\n"
|
| 787 |
-
f"ניתוח נושאים במשובים עם דירוג נמוך (ציון < 3):\n{clusters_str}\n\n"
|
| 788 |
-
f"הוראות מפורטות לניתוח:\n"
|
| 789 |
-
f"1. זהה את הנושא המרכזי/הדומיננטי ביותר במשובים עם דירוג נמוך:\n"
|
| 790 |
-
f" - איזה נושא מופיע הכי הרבה? מה הנושא הגדול ביותר?\n"
|
| 791 |
-
f" - מה הנושא שמדאיג ביותר? מה הנושא שצריך לטפל בו בעדיפות?\n"
|
| 792 |
-
f"\n"
|
| 793 |
-
f"2. תן שם ברור ומשמעותי לנושא המרכזי (2-4 מילים בעברית):\n"
|
| 794 |
-
f" - השם צריך לשקף את הבעיה המרכזית או הנושא עליו המשתמשים מתלוננים\n"
|
| 795 |
-
f" - דוגמאות: 'בעיות טכניות במערכת', 'שדות לא ערוכים', 'חוסר בהירות בהנחיות'\n"
|
| 796 |
-
f"\n"
|
| 797 |
-
f"3. תאר את הנושא המרכזי בפירוט (5-7 משפטים):\n"
|
| 798 |
-
f" - מה הנושא הזה? על מה המשתמשים מתלוננים?\n"
|
| 799 |
-
f" - מה הבעיות הספציפיות? מה לא עובד?\n"
|
| 800 |
-
f" - מה הדפוסים המרכזיים? מה חוזר על עצמו?\n"
|
| 801 |
-
f" - אילו שירותים מושפעים ביותר? (ציין שמות שירותים ספציפיים ומספרים)\n"
|
| 802 |
-
f" - כמה משובים מתייחסים לנושא הזה? מה האחוז מכלל המשובים עם דירוג נמוך?\n"
|
| 803 |
-
f"\n"
|
| 804 |
-
f"4. כלול דוגמאות קונקרטיות מהמשובים (3-5 דוגמאות):\n"
|
| 805 |
-
f" - צטט או תאר משובים ספציפיים שמדגימים את הנושא המרכזי\n"
|
| 806 |
-
f" - הדוגמאות צריכות להמחיש את הבעיה בצורה ברורה\n"
|
| 807 |
-
f" - השתמש בדוגמאות מהנושא הגדול ביותר (נושא {largest_tid + 1 if largest_tid is not None else 'הגדול ביותר'})\n"
|
| 808 |
-
f"\n"
|
| 809 |
-
f"5. תובנות עסקיות והמלצות מעשיות (3-4 משפטים):\n"
|
| 810 |
-
f" - מה המשמעות של הנושא הזה? מה ההשפעה על השירות?\n"
|
| 811 |
-
f" - מה הסיכונים אם לא מטפלים בנושא הזה?\n"
|
| 812 |
-
f" - מה הפעולות המומלצות לתיקון? מה צריך לעשות בעדיפות גבוהה?\n"
|
| 813 |
-
f"\n"
|
| 814 |
-
f"6. פורמט התשובה:\n"
|
| 815 |
-
f" - התחל עם משפט פתיחה: 'הנושא המרכזי במשובים עם דירוג נמוך הוא...'\n"
|
| 816 |
-
f" - המשך עם שם הנושא (בולט, למשל: 'נוש�� מרכזי: [שם הנושא]')\n"
|
| 817 |
-
f" - המשך עם תיאור מפורט של הנושא (5-7 משפטים)\n"
|
| 818 |
-
f" - המשך עם דוגמאות קונקרטיות (3-5 דוגמאות)\n"
|
| 819 |
-
f" - סיים עם תובנות עסקיות והמלצות מעשיות\n"
|
| 820 |
-
f"\n"
|
| 821 |
-
f"7. כללי כתיבה:\n"
|
| 822 |
-
f" - השתמש במספרים המדויקים מהניתוח (כמה משובים, אחוזים, שירותים)\n"
|
| 823 |
-
f" - ציין שירותים ספציפיים מהניתוח\n"
|
| 824 |
-
f" - השתמש בדוגמאות הקונקרטיות מהמשובים - צטט או תאר משובים ספציפיים\n"
|
| 825 |
-
f" - כתוב בעברית מקצועית וקולחת\n"
|
| 826 |
-
f" - תן תשובה מפורטת מאוד ומקיפה (לפחות 600-800 מילים, 7-10 פסקאות)\n"
|
| 827 |
-
f" - התמקד בנושא המרכזי/הדומיננטי ביותר, לא בכל הנושאים\n"
|
| 828 |
-
)
|
| 829 |
-
except Exception as e:
|
| 830 |
-
print(f"Error in low-level topic analysis: {e}", flush=True)
|
| 831 |
-
prompt = (
|
| 832 |
-
f"{instruction}\n\n{lang_instruction}.\n\nUser query:\n{query}\n\nDataset aggregates:\n{aggregates_str}\n\nFeedback examples (truncated):\n{joined}\n\nPlease present a clear, actionable, and human-readable analysis."
|
| 833 |
-
)
|
| 834 |
-
elif "שירותים" in query or "שירות" in query:
|
| 835 |
-
# User asked about services with issues vs services working well
|
| 836 |
-
try:
|
| 837 |
-
svc_stats = df.groupby(settings.service_column)[settings.level_column].agg(['mean','count']).sort_values('mean')
|
| 838 |
-
problematic = svc_stats[svc_stats['mean'] < 3].head(10).to_dict('index')
|
| 839 |
-
good = svc_stats[svc_stats['mean'] >= 4].head(10).to_dict('index')
|
| 840 |
-
svc_str = f"Problematic (mean<3): {problematic}\nWorking well (mean>=4): {good}\n"
|
| 841 |
-
prompt = (
|
| 842 |
-
f"{instruction}\n\n{lang_instruction}.\n\n"
|
| 843 |
-
f"שאלת המשתמש:\n{query}\n\n"
|
| 844 |
-
f"סטטיסטיקות וניתוח הנתונים:\n{aggregates_str}\n\n"
|
| 845 |
-
f"סטטיסטיקות ברמת שירות:\n{svc_str}\n\n"
|
| 846 |
-
f"דוגמאות משובים רלוונטיים:\n{joined}\n\n"
|
| 847 |
-
f"הוראות:\n"
|
| 848 |
-
f"- תאר אילו שירותים יש להם בעיות חמורות (ציין שמות שירותים ספציפיים, ממוצע דירוג, מספר משובים)\n"
|
| 849 |
-
f"- תאר אילו שירותים עובדים טוב (ציין שמות שירותים ספציפיים, ממוצע דירוג, מספר משובים)\n"
|
| 850 |
-
f"- השווה בין שירותים עם בעיות לשירותים שעובדים טוב - מה ההבדל?\n"
|
| 851 |
-
f"- תן המלצות מעשיות לתיקון ולניטור בעדיפות\n"
|
| 852 |
-
f"- השתמש בדוגמאות מהמשובים שסופקו\n"
|
| 853 |
-
f"- תן תשובה מפורטת (3-5 פסקאות) עם מספרים מדויקים\n"
|
| 854 |
-
)
|
| 855 |
-
except Exception:
|
| 856 |
-
prompt = (
|
| 857 |
-
f"{instruction}\n\n{lang_instruction}.\n\nUser query:\n{query}\n\nDataset aggregates:\n{aggregates_str}\n\nFeedback examples (truncated):\n{joined}\n\nPlease present a clear, actionable, and human-readable analysis."
|
| 858 |
-
)
|
| 859 |
-
else:
|
| 860 |
-
prompt = (
|
| 861 |
-
f"{instruction}\n\n{lang_instruction}.\n\n"
|
| 862 |
-
f"שאלת המשתמש:\n{query}\n\n"
|
| 863 |
-
f"סטטיסטיקות מקיפות של כל הנתונים:\n{aggregates_str}\n\n"
|
| 864 |
-
f"סיכום מקיף של כל הנתונים (כולל סיכומים לפי שירותים, דירוגים, ודוגמאות רלוונטיות):\n{joined}\n\n"
|
| 865 |
-
f"הוראות חשובות - אגרגציה חכמה ומבנה ברור (קריטי!):\n"
|
| 866 |
-
f"1. קודם כל, עשה אגרגציה חכמה של כל הנתונים:\n"
|
| 867 |
-
f" - קרא ונתח את כל הסטטיסטיקות והסיכומים שסופקו\n"
|
| 868 |
-
f" - זהה את הדפוסים והנושאים המרכזיים שחוזרים על עצמם\n"
|
| 869 |
-
f" - הבן את התמונה הגדולה - מה המגמות הכלליות? מה הנושאים הדומיננטיים?\n"
|
| 870 |
-
f" - השווה בין קבוצות שונות (מרוצים vs לא מרוצים, שירותים שונים)\n"
|
| 871 |
-
f" - זהה קשרים והקשרים בין נושאים שונים\n"
|
| 872 |
-
f"\n"
|
| 873 |
-
f"2. רק אחרי שעשית אגרגציה חכמה - כתוב תשובה מסכמת ברורה ומסודרת:\n"
|
| 874 |
-
f" - תשובה שמסכמת את הממצאים העיקריים מהאגרגציה\n"
|
| 875 |
-
f" - תשובה שמראה הבנה עמוקה של הדפוסים והנושאים המרכזיים\n"
|
| 876 |
-
f" - תשובה שמבוססת על ניתוח מעמיק, לא רק חיבור של משובים בודדים\n"
|
| 877 |
-
f" - תשובה ברורה ומסודרת - לא גיבוב של מילים\n"
|
| 878 |
-
f" - אל תכתוב: 'משתמש אחד אמר X, משתמש שני אמר Y'\n"
|
| 879 |
-
f" - במקום זה, כתוב: 'נראה שיש דפוס ברור של X בקרב Y% מהמשתמשים'\n"
|
| 880 |
-
f"\n"
|
| 881 |
-
f"3. מבנה התשובה - חובה:\n"
|
| 882 |
-
f" - פתיחה - סיכום מנהלים (פסקה אחת, 3-4 משפטים): סיכום כללי של התמונה הגדולה עם מספרים\n"
|
| 883 |
-
f" - ניתוח מפורט לפי נושאים/דעות (3-5 פסקאות, כל פסקה 4-6 משפטים): כל פסקה בנושא/דעה מרכזי אחד\n"
|
| 884 |
-
f" - השוואות וניתוח מעמיק (2-3 פסקאות): השוואות בין קבוצות ושירותים\n"
|
| 885 |
-
f" - תובנות עסקיות והמלצות (2-3 פסקאות): משמעות, השפעה, המלצות\n"
|
| 886 |
-
f" - סיכום (פסקה אחת, 2-3 משפטים): מסקנות עיקריות ונקודות מפתח\n"
|
| 887 |
-
f"\n"
|
| 888 |
-
f"4. פרטים נוספים:\n"
|
| 889 |
-
f" - אתה רואה את כל הנתונים - תן תשובה קוהרנטית שמראה הבנה רחבה של כל הנתונים\n"
|
| 890 |
-
f" - השתמש בסטטיסטיקות הכלליות כדי להבין את התמונה הגדולה\n"
|
| 891 |
-
f" - השתמש בסיכומים לפי שירותים ודירוגים כדי לזהות דפוסים\n"
|
| 892 |
-
f" - כששואלים על רגשות/תחושות/דעות:\n"
|
| 893 |
-
f" * התחל עם סיכום כללי קצר (2-3 משפטים) שמתאר את התמונה הגדולה\n"
|
| 894 |
-
f" * המשך עם ניתוח של המשתמשים המרוצים (דירוג 4-5) - מה הם אומרים? מה הם אוהבים? כלול דוגמאות קונקרטיות\n"
|
| 895 |
-
f" * המשך עם ניתוח של המשתמשים הלא מרוצים (דירוג 1-2) - מה הם אומרים? מה הבעיות? כלול בעיות ספציפיות עם דוגמאות\n"
|
| 896 |
-
f" * סיים עם סיכום והמלצות\n"
|
| 897 |
-
f" - השווה בין קבוצות משתמשים (מרוצים vs לא מרוצים) ושירותים שונים - מה המשמעות?\n"
|
| 898 |
-
f" - ציין שירותים ספציפיים ומספרים מדויקים מהנתונים\n"
|
| 899 |
-
f" - תן תשובה מפורטת מאוד (7-10 פסקאות, לפחות 600-800 מילים) המנתחת את הנתונים לעומק\n"
|
| 900 |
-
f" - תשובה שכוללת כמה דעות/נושאים מרכזיים (לא רק נושא אחד)\n"
|
| 901 |
-
f" - כלול תובנות עסקיות מעמיקות: מה המשמעות של הממצאים? מה ההשפעה על השירות?\n"
|
| 902 |
-
f" - כלול המלצות מעשיות וקונקרטיות - מה צריך לעשות?\n"
|
| 903 |
-
f" - כתוב בעברית מקצועית וקולחת - כאילו אתה אנליסט שמסביר את הממצאים למנהל\n"
|
| 904 |
-
f" - לפני שליחת התשובה, בדוק פעמיים: האם עשית אגרגציה חכמה? האם התשובה מסכמת את הממצאים העיקריים? האם היא מראה הבנה עמוקה של הדפוסים? האם התשובה ברורה ומסודרת עם מבנה ברור?\n"
|
| 905 |
-
)
|
| 906 |
-
|
| 907 |
-
# Try Gemini first
|
| 908 |
-
if settings.gemini_api_key and genai is not None:
|
| 909 |
-
try:
|
| 910 |
-
genai.configure(api_key=settings.gemini_api_key)
|
| 911 |
-
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 912 |
-
# Use generation config for longer, more detailed and creative responses
|
| 913 |
-
# Higher temperature for more creative, comprehensive analysis that covers both sides
|
| 914 |
-
generation_config = {
|
| 915 |
-
"temperature": 0.9, # Higher temperature for more creative and comprehensive responses
|
| 916 |
-
"top_p": 0.95,
|
| 917 |
-
"top_k": 40,
|
| 918 |
-
"max_output_tokens": 5000, # Increased for longer, more comprehensive responses
|
| 919 |
-
}
|
| 920 |
-
resp = model.generate_content(prompt, generation_config=generation_config)
|
| 921 |
-
text = getattr(resp, "text", None)
|
| 922 |
-
if isinstance(text, str) and text.strip():
|
| 923 |
-
# Validate and fix response if needed
|
| 924 |
-
validated = self._validate_and_fix_response(text.strip(), query, aggregates_str)
|
| 925 |
-
return validated
|
| 926 |
-
except Exception as e:
|
| 927 |
-
print(f"Gemini error: {e}", flush=True)
|
| 928 |
-
pass
|
| 929 |
-
|
| 930 |
-
# Fallback to OpenAI if available
|
| 931 |
-
if settings.openai_api_key and OpenAI is not None:
|
| 932 |
-
client = OpenAI(api_key=settings.openai_api_key)
|
| 933 |
-
try:
|
| 934 |
-
resp = client.chat.completions.create(
|
| 935 |
-
model="gpt-4o-mini",
|
| 936 |
-
messages=[{"role": "user", "content": prompt}],
|
| 937 |
-
temperature=0.8, # Higher temperature for more creative and comprehensive responses
|
| 938 |
-
top_p=0.95, # Higher top_p for more diverse and creative sampling
|
| 939 |
-
max_tokens=4000, # Increased for longer, more comprehensive responses
|
| 940 |
-
)
|
| 941 |
-
response_text = resp.choices[0].message.content
|
| 942 |
-
if response_text:
|
| 943 |
-
# Validate and fix response if needed
|
| 944 |
-
validated = self._validate_and_fix_response(response_text, query, aggregates_str)
|
| 945 |
-
return validated
|
| 946 |
-
except Exception as e:
|
| 947 |
-
print(f"OpenAI error: {e}", flush=True)
|
| 948 |
-
pass
|
| 949 |
-
|
| 950 |
-
# Fallback: short extractive-ish synthesis
|
| 951 |
-
# Compose a short paragraph from top contexts
|
| 952 |
-
extract = " ".join(contexts[:5])
|
| 953 |
-
return extract
|
| 954 |
-
|
| 955 |
-
def _detect_level_filter(self, query: str) -> Optional[tuple]:
|
| 956 |
-
"""Detect if query asks for specific level range (e.g., level < 3, דירוג נמוך)."""
|
| 957 |
-
query_lower = query.lower()
|
| 958 |
-
|
| 959 |
-
# Check for low level queries (level < 3)
|
| 960 |
-
low_level_patterns = [
|
| 961 |
-
"דירוג נמוך", "ציון נמוך", "level < 3", "level<3", "דירוגים נמוכים",
|
| 962 |
-
"ציונים נמוכים", "מתחת ל-3", "מתחת ל3", "פחות מ-3", "פחות מ3",
|
| 963 |
-
"דירוג 1", "דירוג 2", "ציון 1", "ציון 2", "לא מרוצים"
|
| 964 |
-
]
|
| 965 |
-
|
| 966 |
-
# Check for high level queries (level >= 4)
|
| 967 |
-
high_level_patterns = [
|
| 968 |
-
"דירוג גבוה", "ציון גבוה", "level >= 4", "level>=4", "דירוגים גבוהים",
|
| 969 |
-
"ציונים גבוהים", "מעל 4", "מעל ל-4", "יותר מ-4", "יותר מ4",
|
| 970 |
-
"דירוג 4", "דירוג 5", "ציון 4", "ציון 5", "מרוצים"
|
| 971 |
-
]
|
| 972 |
-
|
| 973 |
-
if any(pattern in query_lower for pattern in low_level_patterns):
|
| 974 |
-
return (1, 2) # Filter for level 1-2
|
| 975 |
-
elif any(pattern in query_lower for pattern in high_level_patterns):
|
| 976 |
-
return (4, 5) # Filter for level 4-5
|
| 977 |
-
|
| 978 |
-
return None
|
| 979 |
-
|
| 980 |
-
def query(self, query: str, top_k: int = 5) -> RetrievalOutput:
|
| 981 |
-
# Detect if query asks for specific level range
|
| 982 |
-
level_filter = self._detect_level_filter(query)
|
| 983 |
-
|
| 984 |
-
# Use a very large retrieval to get comprehensive understanding of the data
|
| 985 |
-
# This ensures the model sees a broad representation of all feedback
|
| 986 |
-
adjusted_k = max(top_k, 100) # Use 100 records for comprehensive RAG-based analysis
|
| 987 |
-
results = self.retrieve(query, top_k=adjusted_k, level_filter=level_filter)
|
| 988 |
-
contexts = [r.row[settings.text_column] for r in results]
|
| 989 |
-
# Use comprehensive synthesis that analyzes the full dataset, not just retrieved items
|
| 990 |
-
summary = self.synthesize(query, results, contexts, max_contexts=adjusted_k, level_filter=level_filter)
|
| 991 |
-
return RetrievalOutput(query=query, results=results, summary=summary)
|
| 992 |
-
|
| 993 |
-
def answer(self, query: str, top_k: int = 5) -> RetrievalOutput:
|
| 994 |
-
"""Higher-level answer pipeline that handles counting/keyword questions explicitly.
|
| 995 |
-
|
| 996 |
-
For queries detected as counts (e.g., thanks, complaints, 'כמה'), compute counts over
|
| 997 |
-
the full dataset and return a short summary plus example contexts from retrieval.
|
| 998 |
-
Falls back to `query` for freeform QA.
|
| 999 |
-
"""
|
| 1000 |
-
# Detect level filter for all query types
|
| 1001 |
-
level_filter = self._detect_level_filter(query)
|
| 1002 |
-
|
| 1003 |
-
qtype, target = detect_query_type(query)
|
| 1004 |
-
if qtype in ("count_thanks", "count_complaint", "count_keyword"):
|
| 1005 |
-
# Use full dataset for accurate counts (with level filter if specified)
|
| 1006 |
-
df = load_feedback()
|
| 1007 |
-
if level_filter:
|
| 1008 |
-
min_level, max_level = level_filter
|
| 1009 |
-
df = df[(df[settings.level_column] >= min_level) & (df[settings.level_column] <= max_level)].copy()
|
| 1010 |
-
resolved = resolve_count_from_type(df, qtype, target, text_column=settings.text_column)
|
| 1011 |
-
count = int(resolved.get("count", 0))
|
| 1012 |
-
# Friendly, language-aware summary
|
| 1013 |
-
is_hebrew = any('\u0590' <= ch <= '\u05FF' for ch in query)
|
| 1014 |
-
if resolved.get("label") == "thanks":
|
| 1015 |
-
summary = (f"{count} משובים מכילים ביטויי תודה." if is_hebrew
|
| 1016 |
-
else f"{count} feedback entries contain thanks.")
|
| 1017 |
-
elif resolved.get("label") == "complaint_not_working":
|
| 1018 |
-
summary = (f"{count} משובים מתארים בעיות/אלמנטים שלא עובדים." if is_hebrew
|
| 1019 |
-
else f"{count} feedback entries report elements not working.")
|
| 1020 |
-
else:
|
| 1021 |
-
label = resolved.get("label", "")
|
| 1022 |
-
if label.startswith("keyword:"):
|
| 1023 |
-
phrase = label.split("keyword:", 1)[1]
|
| 1024 |
-
summary = (f"{count} משובים מכילים את הביטוי '{phrase}'." if is_hebrew
|
| 1025 |
-
else f"{count} feedback entries contain the phrase '{phrase}'.")
|
| 1026 |
-
else:
|
| 1027 |
-
summary = (f"{count} משובים נמצאו." if is_hebrew else f"{count} feedback entries found.")
|
| 1028 |
-
|
| 1029 |
-
# Provide examples from semantic retrieval for context (with level filter)
|
| 1030 |
-
results = self.retrieve(query, top_k=top_k, level_filter=level_filter)
|
| 1031 |
-
return RetrievalOutput(query=query, results=results, summary=summary)
|
| 1032 |
-
|
| 1033 |
-
# Fallback to semantic QA (which already handles level filter)
|
| 1034 |
-
return self.query(query, top_k=top_k)
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
def main() -> None:
|
| 1038 |
-
parser = argparse.ArgumentParser()
|
| 1039 |
-
parser.add_argument("--ingest", action="store_true", help="Ingest CSV and build index")
|
| 1040 |
-
parser.add_argument("--query", type=str, default=None, help="Run a semantic query")
|
| 1041 |
-
parser.add_argument("--top_k", type=int, default=5, help="Top K results")
|
| 1042 |
-
args = parser.parse_args()
|
| 1043 |
-
|
| 1044 |
-
svc = RAGService()
|
| 1045 |
-
if args.ingest:
|
| 1046 |
-
svc.ingest()
|
| 1047 |
-
print("Ingest completed.")
|
| 1048 |
-
if args.query:
|
| 1049 |
-
out = svc.query(args.query, top_k=args.top_k)
|
| 1050 |
-
print("Summary:", out.summary)
|
| 1051 |
-
for r in out.results:
|
| 1052 |
-
print(f"[{r.score:.3f}] {r.row.get('ServiceName','')} | {r.row.get('Text','')[:200]}")
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
if __name__ == "__main__":
|
| 1056 |
-
main()
|
| 1057 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/sentiment.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""Sentiment analysis helpers using Hugging Face transformers.
|
| 4 |
-
|
| 5 |
-
This module provides a cached sentiment pipeline to analyze lists of texts.
|
| 6 |
-
The model used (`cardiffnlp/twitter-xlm-roberta-base-sentiment`) is multilingual and
|
| 7 |
-
works reasonably well for short feedback messages. The pipeline is cached to avoid
|
| 8 |
-
reloading the model for each call.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
from functools import lru_cache
|
| 12 |
-
from typing import List, Dict
|
| 13 |
-
|
| 14 |
-
from transformers import pipeline # type: ignore
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
@lru_cache(maxsize=1)
|
| 18 |
-
def get_sentiment_pipeline():
|
| 19 |
-
"""Load sentiment analysis pipeline with fallback options."""
|
| 20 |
-
import os
|
| 21 |
-
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
# Try DistilBERT which works well for multilingual text (supports Hebrew)
|
| 25 |
-
return pipeline(
|
| 26 |
-
"sentiment-analysis",
|
| 27 |
-
model="nlptown/bert-base-multilingual-uncased-sentiment",
|
| 28 |
-
use_fast=False
|
| 29 |
-
)
|
| 30 |
-
except Exception as e1:
|
| 31 |
-
try:
|
| 32 |
-
# Fallback to simpler model
|
| 33 |
-
return pipeline("text-classification", model="gpt2", use_fast=False)
|
| 34 |
-
except Exception as e2:
|
| 35 |
-
# Final fallback: return a mock pipeline for development
|
| 36 |
-
import warnings
|
| 37 |
-
warnings.warn(f"Could not load sentiment models: {e1}, {e2}. Using mock pipeline.")
|
| 38 |
-
class MockPipeline:
|
| 39 |
-
def __call__(self, texts, **kwargs):
|
| 40 |
-
return [{"label": "NEUTRAL", "score": 0.5} for _ in texts]
|
| 41 |
-
return MockPipeline()
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def analyze_sentiments(texts: List[str]) -> List[Dict[str, float | str]]:
|
| 45 |
-
clf = get_sentiment_pipeline()
|
| 46 |
-
outputs = clf(texts, truncation=True)
|
| 47 |
-
results: List[Dict[str, float | str]] = []
|
| 48 |
-
for out in outputs:
|
| 49 |
-
label = out.get("label", "")
|
| 50 |
-
score = float(out.get("score", 0.0))
|
| 51 |
-
results.append({"label": label, "score": score})
|
| 52 |
-
return results
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/sql_service.py
CHANGED
|
@@ -17,7 +17,6 @@ from dataclasses import dataclass
|
|
| 17 |
from typing import List, Dict, Any, Optional
|
| 18 |
import pandas as pd
|
| 19 |
import sqlite3
|
| 20 |
-
from io import StringIO
|
| 21 |
|
| 22 |
from .config import settings
|
| 23 |
from .data_loader import load_feedback
|
|
@@ -35,7 +34,14 @@ except Exception:
|
|
| 35 |
|
| 36 |
@dataclass
|
| 37 |
class SQLQueryResult:
|
| 38 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
query: str
|
| 40 |
result: pd.DataFrame
|
| 41 |
error: Optional[str] = None
|
|
@@ -43,7 +49,16 @@ class SQLQueryResult:
|
|
| 43 |
|
| 44 |
@dataclass
|
| 45 |
class AnalysisResult:
|
| 46 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
user_query: str
|
| 48 |
sql_queries: List[str]
|
| 49 |
query_results: List[SQLQueryResult]
|
|
@@ -52,14 +67,42 @@ class AnalysisResult:
|
|
| 52 |
|
| 53 |
|
| 54 |
class SQLFeedbackService:
|
| 55 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
self.df: Optional[pd.DataFrame] = None
|
| 59 |
self._load_data()
|
| 60 |
|
| 61 |
-
def _load_data(self):
|
| 62 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
try:
|
| 64 |
self.df = load_feedback()
|
| 65 |
print(f"Loaded {len(self.df)} feedback records", flush=True)
|
|
@@ -68,16 +111,45 @@ class SQLFeedbackService:
|
|
| 68 |
self.df = None
|
| 69 |
|
| 70 |
def _get_schema_info(self) -> str:
|
| 71 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
if self.df is None:
|
| 73 |
return "No data available"
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
schema_info = f"""
|
| 76 |
טבלת Feedback מכילה את השדות הבאים:
|
| 77 |
- ID: מזהה ייחודי של כל משוב (מספר שלם)
|
| 78 |
- ServiceName: שם השירות הדיגיטלי (טקסט)
|
| 79 |
- Level: הציון שהמשתמש נתן לשירות (מספר שלם מ-1 עד 5, כאשר 1=גרוע, 5=מעולה)
|
| 80 |
-
- Text: הטקסט החופשי שהמשתמש הזין כחלק מהפידבק (טקסט)
|
| 81 |
|
| 82 |
סטטיסטיקות כלליות:
|
| 83 |
- סך הכל משובים: {len(self.df)}
|
|
@@ -133,16 +205,18 @@ class SQLFeedbackService:
|
|
| 133 |
|
| 134 |
המשימה שלך: צור 1 עד 5 שאילתות SQL שיעזרו לענות על השאלה. כל שאילתה צריכה להיות שימושית וממוקדת.
|
| 135 |
|
| 136 |
-
כללים חשובים:
|
| 137 |
-
1. השתמש בשמות השדות המדויקים: ID, ServiceName, Level, Text
|
| 138 |
-
2. Level הוא מספר שלם מ-1 עד 5 (1=גרוע, 5=מעולה)
|
| 139 |
-
3. ServiceName הוא טקסט
|
| 140 |
-
4. Text הוא הטקסט החופשי של המשוב
|
| 141 |
-
5.
|
| 142 |
-
6.
|
| 143 |
-
7.
|
| 144 |
-
8. אם השאלה מתייחסת ל
|
| 145 |
-
9. אם השאלה מתייחסת ל
|
|
|
|
|
|
|
| 146 |
|
| 147 |
פורמט התשובה - JSON בלבד:
|
| 148 |
{{
|
|
@@ -159,7 +233,7 @@ class SQLFeedbackService:
|
|
| 159 |
if settings.gemini_api_key and genai is not None:
|
| 160 |
try:
|
| 161 |
genai.configure(api_key=settings.gemini_api_key)
|
| 162 |
-
model = genai.GenerativeModel("gemini-
|
| 163 |
response = model.generate_content(prompt)
|
| 164 |
text = getattr(response, "text", None)
|
| 165 |
if text:
|
|
@@ -186,10 +260,28 @@ class SQLFeedbackService:
|
|
| 186 |
return []
|
| 187 |
|
| 188 |
def _parse_sql_queries(self, text: str) -> List[str]:
|
| 189 |
-
"""
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
try:
|
| 192 |
-
# Remove markdown code blocks if present
|
| 193 |
text = re.sub(r'```json\s*', '', text)
|
| 194 |
text = re.sub(r'```\s*', '', text)
|
| 195 |
text = text.strip()
|
|
@@ -199,34 +291,62 @@ class SQLFeedbackService:
|
|
| 199 |
if isinstance(data, dict) and "queries" in data:
|
| 200 |
queries = data["queries"]
|
| 201 |
if isinstance(queries, list):
|
|
|
|
| 202 |
return [q for q in queries if isinstance(q, str) and q.strip()]
|
| 203 |
except Exception:
|
|
|
|
| 204 |
pass
|
| 205 |
|
| 206 |
-
# Fallback: try to extract SQL queries directly
|
|
|
|
| 207 |
sql_pattern = r'SELECT\s+.*?(?=\n\n|\nSELECT|$)'
|
| 208 |
matches = re.findall(sql_pattern, text, re.IGNORECASE | re.DOTALL)
|
| 209 |
if matches:
|
| 210 |
return [m.strip() for m in matches]
|
| 211 |
|
|
|
|
|
|
|
| 212 |
return []
|
| 213 |
|
| 214 |
def _execute_sql_queries(self, sql_queries: List[str]) -> List[SQLQueryResult]:
|
| 215 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
if self.df is None:
|
| 217 |
return []
|
| 218 |
|
| 219 |
results = []
|
| 220 |
|
| 221 |
# Create in-memory SQLite database
|
|
|
|
| 222 |
conn = sqlite3.connect(':memory:')
|
| 223 |
try:
|
| 224 |
-
# Write DataFrame to SQLite
|
|
|
|
| 225 |
self.df.to_sql('feedback', conn, index=False, if_exists='replace')
|
| 226 |
|
|
|
|
|
|
|
| 227 |
for query in sql_queries:
|
| 228 |
try:
|
| 229 |
-
# Execute query
|
| 230 |
result_df = pd.read_sql_query(query, conn)
|
| 231 |
results.append(SQLQueryResult(
|
| 232 |
query=query,
|
|
@@ -234,23 +354,116 @@ class SQLFeedbackService:
|
|
| 234 |
error=None
|
| 235 |
))
|
| 236 |
except Exception as e:
|
|
|
|
| 237 |
results.append(SQLQueryResult(
|
| 238 |
query=query,
|
| 239 |
-
result=pd.DataFrame(),
|
| 240 |
error=str(e)
|
| 241 |
))
|
| 242 |
finally:
|
|
|
|
| 243 |
conn.close()
|
| 244 |
|
| 245 |
return results
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
def _synthesize_answer(self, query: str, sql_queries: List[str],
|
| 248 |
-
query_results: List[SQLQueryResult]) -> str:
|
| 249 |
"""
|
| 250 |
Use LLM to synthesize a comprehensive answer from:
|
| 251 |
- User query
|
| 252 |
- SQL queries that were executed
|
| 253 |
- Results of those queries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
"""
|
| 255 |
# Format query results for the prompt
|
| 256 |
results_text = ""
|
|
@@ -300,7 +513,7 @@ class SQLFeedbackService:
|
|
| 300 |
if settings.gemini_api_key and genai is not None:
|
| 301 |
try:
|
| 302 |
genai.configure(api_key=settings.gemini_api_key)
|
| 303 |
-
model = genai.GenerativeModel("gemini-
|
| 304 |
generation_config = {
|
| 305 |
"temperature": 0.8,
|
| 306 |
"top_p": 0.95,
|
|
@@ -310,7 +523,49 @@ class SQLFeedbackService:
|
|
| 310 |
response = model.generate_content(prompt, generation_config=generation_config)
|
| 311 |
text = getattr(response, "text", None)
|
| 312 |
if text and text.strip():
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
except Exception as e:
|
| 315 |
print(f"Gemini error in synthesis: {e}", flush=True)
|
| 316 |
|
|
@@ -325,8 +580,47 @@ class SQLFeedbackService:
|
|
| 325 |
max_tokens=3000,
|
| 326 |
)
|
| 327 |
text = response.choices[0].message.content
|
| 328 |
-
if text:
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
except Exception as e:
|
| 331 |
print(f"OpenAI error in synthesis: {e}", flush=True)
|
| 332 |
|
|
@@ -336,11 +630,32 @@ class SQLFeedbackService:
|
|
| 336 |
def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
|
| 337 |
"""
|
| 338 |
Generate visualization specifications for query results.
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
"""
|
| 341 |
visualizations = []
|
| 342 |
|
| 343 |
for i, qr in enumerate(query_results, 1):
|
|
|
|
| 344 |
if qr.error or len(qr.result) == 0:
|
| 345 |
continue
|
| 346 |
|
|
|
|
| 17 |
from typing import List, Dict, Any, Optional
|
| 18 |
import pandas as pd
|
| 19 |
import sqlite3
|
|
|
|
| 20 |
|
| 21 |
from .config import settings
|
| 22 |
from .data_loader import load_feedback
|
|
|
|
| 34 |
|
| 35 |
@dataclass
|
| 36 |
class SQLQueryResult:
|
| 37 |
+
"""
|
| 38 |
+
Result of a single SQL query execution.
|
| 39 |
+
|
| 40 |
+
Attributes:
|
| 41 |
+
query: The SQL query that was executed
|
| 42 |
+
result: DataFrame containing the query results (empty if error occurred)
|
| 43 |
+
error: Error message if query failed, None if successful
|
| 44 |
+
"""
|
| 45 |
query: str
|
| 46 |
result: pd.DataFrame
|
| 47 |
error: Optional[str] = None
|
|
|
|
| 49 |
|
| 50 |
@dataclass
|
| 51 |
class AnalysisResult:
|
| 52 |
+
"""
|
| 53 |
+
Complete analysis result from processing a user query.
|
| 54 |
+
|
| 55 |
+
Attributes:
|
| 56 |
+
user_query: The original question asked by the user
|
| 57 |
+
sql_queries: List of SQL queries that were generated and executed
|
| 58 |
+
query_results: Results from executing each SQL query
|
| 59 |
+
summary: Final synthesized answer in natural language
|
| 60 |
+
visualizations: Optional list of visualization specifications for frontend rendering
|
| 61 |
+
"""
|
| 62 |
user_query: str
|
| 63 |
sql_queries: List[str]
|
| 64 |
query_results: List[SQLQueryResult]
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
class SQLFeedbackService:
|
| 70 |
+
"""
|
| 71 |
+
Main service for SQL-based feedback analysis.
|
| 72 |
+
|
| 73 |
+
This service implements a 4-stage pipeline:
|
| 74 |
+
1. Generate SQL queries from natural language questions (using LLM)
|
| 75 |
+
2. Execute SQL queries on feedback data (using SQLite in-memory)
|
| 76 |
+
3. Synthesize comprehensive answers from query results (using LLM)
|
| 77 |
+
4. Generate visualization specifications for results
|
| 78 |
+
|
| 79 |
+
The service also includes automatic quality evaluation and improvement
|
| 80 |
+
of generated answers to ensure high-quality responses.
|
| 81 |
+
"""
|
| 82 |
|
| 83 |
def __init__(self):
|
| 84 |
+
"""
|
| 85 |
+
Initialize the SQL feedback service.
|
| 86 |
+
|
| 87 |
+
Loads feedback data from CSV into memory. If loading fails,
|
| 88 |
+
the service will still initialize but will raise errors when
|
| 89 |
+
trying to process queries.
|
| 90 |
+
"""
|
| 91 |
self.df: Optional[pd.DataFrame] = None
|
| 92 |
self._load_data()
|
| 93 |
|
| 94 |
+
def _load_data(self) -> None:
|
| 95 |
+
"""
|
| 96 |
+
Load feedback data from CSV file into memory.
|
| 97 |
+
|
| 98 |
+
The data is loaded once at initialization and kept in memory
|
| 99 |
+
for fast query execution. If the CSV file is missing or invalid,
|
| 100 |
+
an error is logged but the service continues to initialize.
|
| 101 |
+
|
| 102 |
+
Raises:
|
| 103 |
+
FileNotFoundError: If CSV file doesn't exist (handled internally)
|
| 104 |
+
ValueError: If CSV is missing required columns (handled internally)
|
| 105 |
+
"""
|
| 106 |
try:
|
| 107 |
self.df = load_feedback()
|
| 108 |
print(f"Loaded {len(self.df)} feedback records", flush=True)
|
|
|
|
| 111 |
self.df = None
|
| 112 |
|
| 113 |
def _get_schema_info(self) -> str:
|
| 114 |
+
"""
|
| 115 |
+
Generate schema information string for the feedback table.
|
| 116 |
+
|
| 117 |
+
This information is provided to the LLM when generating SQL queries
|
| 118 |
+
to help it understand the data structure and available columns.
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
A formatted string describing the table schema, column types,
|
| 122 |
+
and basic statistics. Used in prompts for SQL query generation.
|
| 123 |
+
|
| 124 |
+
Note:
|
| 125 |
+
If CreationDate column exists, the function attempts to parse
|
| 126 |
+
dates and include the date range in the schema info.
|
| 127 |
+
"""
|
| 128 |
if self.df is None:
|
| 129 |
return "No data available"
|
| 130 |
|
| 131 |
+
# Check if CreationDate exists and get date range
|
| 132 |
+
# This helps the LLM understand temporal queries
|
| 133 |
+
date_info = ""
|
| 134 |
+
if 'CreationDate' in self.df.columns:
|
| 135 |
+
try:
|
| 136 |
+
# Try to parse dates to provide useful date range information
|
| 137 |
+
df_dates = pd.to_datetime(self.df['CreationDate'], errors='coerce')
|
| 138 |
+
valid_dates = df_dates.dropna()
|
| 139 |
+
if len(valid_dates) > 0:
|
| 140 |
+
min_date = valid_dates.min()
|
| 141 |
+
max_date = valid_dates.max()
|
| 142 |
+
date_info = f"\n- CreationDate: תאריך וזמן הזנת הפידבק (תאריך/זמן). טווח תאריכים: {min_date.strftime('%Y-%m-%d')} עד {max_date.strftime('%Y-%m-%d')}"
|
| 143 |
+
except Exception:
|
| 144 |
+
# If date parsing fails, still include the column info
|
| 145 |
+
date_info = "\n- CreationDate: תאריך וזמן הזנת הפידבק (תאריך/זמן)"
|
| 146 |
+
|
| 147 |
schema_info = f"""
|
| 148 |
טבלת Feedback מכילה את השדות הבאים:
|
| 149 |
- ID: מזהה ייחודי של כל משוב (מספר שלם)
|
| 150 |
- ServiceName: שם השירות הדיגיטלי (טקסט)
|
| 151 |
- Level: הציון שהמשתמש נתן לשירות (מספר שלם מ-1 עד 5, כאשר 1=גרוע, 5=מעולה)
|
| 152 |
+
- Text: הטקסט החופשי שהמשתמש הזין כחלק מהפידבק (טקסט){date_info}
|
| 153 |
|
| 154 |
סטטיסטיקות כלליות:
|
| 155 |
- סך הכל משובים: {len(self.df)}
|
|
|
|
| 205 |
|
| 206 |
המשימה שלך: צור 1 עד 5 שאילתות SQL שיעזרו לענות על השאלה. כל שאילתה צריכה להיות שימושית וממוקדת.
|
| 207 |
|
| 208 |
+
כללים חשובים:
|
| 209 |
+
1. השתמש בשמות השדות המדויקים: ID, ServiceName, Level, Text, CreationDate
|
| 210 |
+
2. Level הוא מספר שלם מ-1 עד 5 (1=גרוע, 5=מעולה)
|
| 211 |
+
3. ServiceName הוא טקסט
|
| 212 |
+
4. Text הוא הטקסט החופשי של המשוב
|
| 213 |
+
5. CreationDate הוא תאריך וזמן (תאריך/זמן) - ניתן להשתמש בו לשאילתות על תאריכים, תקופות זמן, מגמות לאורך זמן
|
| 214 |
+
6. כל שאילתה צריכה להיות תקפה SQLite
|
| 215 |
+
7. השתמש בפונקציות SQL סטנדרטיות: COUNT, AVG, GROUP BY, WHERE, LIKE, DATE(), strftime(), etc.
|
| 216 |
+
8. אם השאלה מתייחסת לטקסט, השתמש ב-LIKE או INSTR לחיפוש
|
| 217 |
+
9. אם השאלה מתייחסת לדירוגים, השתמש ב-Level עם תנאים מתאימים
|
| 218 |
+
10. אם השאלה מתייחסת לשירותים, השתמש ב-ServiceName
|
| 219 |
+
11. אם השאלה מתייחסת לתאריכים, תקופות זמן, או מגמות לאורך זמן - השתמש ב-CreationDate עם פונקציות תאריך כמו DATE(), strftime('%Y-%m', CreationDate), etc.
|
| 220 |
|
| 221 |
פורמט התשובה - JSON בלבד:
|
| 222 |
{{
|
|
|
|
| 233 |
if settings.gemini_api_key and genai is not None:
|
| 234 |
try:
|
| 235 |
genai.configure(api_key=settings.gemini_api_key)
|
| 236 |
+
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 237 |
response = model.generate_content(prompt)
|
| 238 |
text = getattr(response, "text", None)
|
| 239 |
if text:
|
|
|
|
| 260 |
return []
|
| 261 |
|
| 262 |
def _parse_sql_queries(self, text: str) -> List[str]:
|
| 263 |
+
"""
|
| 264 |
+
Parse SQL queries from LLM response text.
|
| 265 |
+
|
| 266 |
+
The LLM is instructed to return JSON, but sometimes it may include
|
| 267 |
+
markdown formatting or return SQL directly. This function handles
|
| 268 |
+
multiple formats for robustness.
|
| 269 |
+
|
| 270 |
+
Args:
|
| 271 |
+
text: Raw text response from LLM (may be JSON, markdown, or plain SQL)
|
| 272 |
+
|
| 273 |
+
Returns:
|
| 274 |
+
List of SQL query strings, cleaned and validated.
|
| 275 |
+
Empty list if parsing fails completely.
|
| 276 |
+
|
| 277 |
+
Strategy:
|
| 278 |
+
1. First, try to parse as JSON (expected format)
|
| 279 |
+
2. If that fails, try to extract SQL queries using regex
|
| 280 |
+
3. Return empty list if both methods fail
|
| 281 |
+
"""
|
| 282 |
+
# Try to extract JSON first (expected format)
|
| 283 |
try:
|
| 284 |
+
# Remove markdown code blocks if present (LLM sometimes adds these)
|
| 285 |
text = re.sub(r'```json\s*', '', text)
|
| 286 |
text = re.sub(r'```\s*', '', text)
|
| 287 |
text = text.strip()
|
|
|
|
| 291 |
if isinstance(data, dict) and "queries" in data:
|
| 292 |
queries = data["queries"]
|
| 293 |
if isinstance(queries, list):
|
| 294 |
+
# Filter out empty or invalid queries
|
| 295 |
return [q for q in queries if isinstance(q, str) and q.strip()]
|
| 296 |
except Exception:
|
| 297 |
+
# JSON parsing failed, try fallback method
|
| 298 |
pass
|
| 299 |
|
| 300 |
+
# Fallback: try to extract SQL queries directly using regex
|
| 301 |
+
# This handles cases where LLM returns SQL without JSON wrapper
|
| 302 |
sql_pattern = r'SELECT\s+.*?(?=\n\n|\nSELECT|$)'
|
| 303 |
matches = re.findall(sql_pattern, text, re.IGNORECASE | re.DOTALL)
|
| 304 |
if matches:
|
| 305 |
return [m.strip() for m in matches]
|
| 306 |
|
| 307 |
+
# If all parsing methods fail, return empty list
|
| 308 |
+
# The calling function will handle this gracefully
|
| 309 |
return []
|
| 310 |
|
| 311 |
def _execute_sql_queries(self, sql_queries: List[str]) -> List[SQLQueryResult]:
|
| 312 |
+
"""
|
| 313 |
+
Execute SQL queries on the feedback DataFrame using SQLite in-memory database.
|
| 314 |
+
|
| 315 |
+
This method creates a temporary SQLite database in memory, loads the
|
| 316 |
+
feedback DataFrame into it, and executes each SQL query. Errors are
|
| 317 |
+
caught per-query so one failing query doesn't stop the others.
|
| 318 |
+
|
| 319 |
+
Args:
|
| 320 |
+
sql_queries: List of SQL query strings to execute
|
| 321 |
+
|
| 322 |
+
Returns:
|
| 323 |
+
List of SQLQueryResult objects, one per query. Each result contains
|
| 324 |
+
either the query results (DataFrame) or an error message.
|
| 325 |
+
|
| 326 |
+
Implementation details:
|
| 327 |
+
- Uses SQLite in-memory database (':memory:') for fast execution
|
| 328 |
+
- DataFrame is loaded into table named 'feedback'
|
| 329 |
+
- Each query is executed independently (errors don't cascade)
|
| 330 |
+
- Connection is always closed in finally block for safety
|
| 331 |
+
"""
|
| 332 |
if self.df is None:
|
| 333 |
return []
|
| 334 |
|
| 335 |
results = []
|
| 336 |
|
| 337 |
# Create in-memory SQLite database
|
| 338 |
+
# Using in-memory is fast and doesn't require disk I/O
|
| 339 |
conn = sqlite3.connect(':memory:')
|
| 340 |
try:
|
| 341 |
+
# Write DataFrame to SQLite table named 'feedback'
|
| 342 |
+
# if_exists='replace' ensures clean state on each execution
|
| 343 |
self.df.to_sql('feedback', conn, index=False, if_exists='replace')
|
| 344 |
|
| 345 |
+
# Execute each query independently
|
| 346 |
+
# This allows partial success - if one query fails, others can still succeed
|
| 347 |
for query in sql_queries:
|
| 348 |
try:
|
| 349 |
+
# Execute query and get results as DataFrame
|
| 350 |
result_df = pd.read_sql_query(query, conn)
|
| 351 |
results.append(SQLQueryResult(
|
| 352 |
query=query,
|
|
|
|
| 354 |
error=None
|
| 355 |
))
|
| 356 |
except Exception as e:
|
| 357 |
+
# Store error but continue with other queries
|
| 358 |
results.append(SQLQueryResult(
|
| 359 |
query=query,
|
| 360 |
+
result=pd.DataFrame(), # Empty DataFrame on error
|
| 361 |
error=str(e)
|
| 362 |
))
|
| 363 |
finally:
|
| 364 |
+
# Always close connection, even if errors occur
|
| 365 |
conn.close()
|
| 366 |
|
| 367 |
return results
|
| 368 |
|
| 369 |
+
def _evaluate_answer_quality(self, query: str, answer: str) -> tuple[float, str]:
|
| 370 |
+
"""
|
| 371 |
+
Evaluate the quality of an answer using an LLM reviewer.
|
| 372 |
+
|
| 373 |
+
Returns:
|
| 374 |
+
tuple: (score 0-100, feedback/reasoning)
|
| 375 |
+
"""
|
| 376 |
+
evaluation_prompt = f"""אתה בודק איכות תשובות. הערך את התשובה הבאה:
|
| 377 |
+
|
| 378 |
+
שאלת המשתמש: {query}
|
| 379 |
+
|
| 380 |
+
התשובה שניתנה:
|
| 381 |
+
{answer}
|
| 382 |
+
|
| 383 |
+
הערך את התשובה לפי הקריטריונים הבאים (0-100):
|
| 384 |
+
1. האם התשובה עונה ישירות על השאלה? (0-30 נקודות)
|
| 385 |
+
2. האם התשובה מבוססת על הנתונים? (0-25 נקודות)
|
| 386 |
+
3. האם התשובה מפורטת ומקיפה? (0-20 נקודות)
|
| 387 |
+
4. האם התשובה ברורה ומובנת? (0-15 נקודות)
|
| 388 |
+
5. האם התשובה כוללת תובנות עסקיות? (0-10 נקודות)
|
| 389 |
+
|
| 390 |
+
תן ציון כולל (0-100) והסבר קצר (2-3 משפטים) למה הציון הזה.
|
| 391 |
+
|
| 392 |
+
פורמט התשובה - JSON בלבד:
|
| 393 |
+
{{
|
| 394 |
+
"score": <מספר 0-100>,
|
| 395 |
+
"reasoning": "<הסבר קצר>"
|
| 396 |
+
}}
|
| 397 |
+
|
| 398 |
+
תן רק את ה-JSON, ללא טקסט נוסף."""
|
| 399 |
+
|
| 400 |
+
# Try Gemini first
|
| 401 |
+
if settings.gemini_api_key and genai is not None:
|
| 402 |
+
try:
|
| 403 |
+
genai.configure(api_key=settings.gemini_api_key)
|
| 404 |
+
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 405 |
+
response = model.generate_content(evaluation_prompt)
|
| 406 |
+
text = getattr(response, "text", None)
|
| 407 |
+
if text:
|
| 408 |
+
# Try to parse JSON from response
|
| 409 |
+
# Extract JSON (may be wrapped in markdown or other text)
|
| 410 |
+
json_match = re.search(r'\{[^}]+\}', text, re.DOTALL)
|
| 411 |
+
if json_match:
|
| 412 |
+
try:
|
| 413 |
+
data = json.loads(json_match.group())
|
| 414 |
+
score = float(data.get('score', 0))
|
| 415 |
+
reasoning = data.get('reasoning', '')
|
| 416 |
+
return score, reasoning
|
| 417 |
+
except (json.JSONDecodeError, ValueError, KeyError):
|
| 418 |
+
pass
|
| 419 |
+
except Exception as e:
|
| 420 |
+
print(f"Gemini error in evaluation: {e}", flush=True)
|
| 421 |
+
|
| 422 |
+
# Fallback to OpenAI
|
| 423 |
+
if settings.openai_api_key and OpenAI is not None:
|
| 424 |
+
try:
|
| 425 |
+
client = OpenAI(api_key=settings.openai_api_key)
|
| 426 |
+
response = client.chat.completions.create(
|
| 427 |
+
model="gpt-4o-mini",
|
| 428 |
+
messages=[{"role": "user", "content": evaluation_prompt}],
|
| 429 |
+
temperature=0.3,
|
| 430 |
+
)
|
| 431 |
+
text = response.choices[0].message.content
|
| 432 |
+
if text:
|
| 433 |
+
# Try to parse JSON from response
|
| 434 |
+
json_match = re.search(r'\{[^}]+\}', text, re.DOTALL)
|
| 435 |
+
if json_match:
|
| 436 |
+
try:
|
| 437 |
+
data = json.loads(json_match.group())
|
| 438 |
+
score = float(data.get('score', 0))
|
| 439 |
+
reasoning = data.get('reasoning', '')
|
| 440 |
+
return score, reasoning
|
| 441 |
+
except (json.JSONDecodeError, ValueError, KeyError):
|
| 442 |
+
pass
|
| 443 |
+
except Exception as e:
|
| 444 |
+
print(f"OpenAI error in evaluation: {e}", flush=True)
|
| 445 |
+
|
| 446 |
+
# Default: return high score if evaluation fails (don't block)
|
| 447 |
+
return 85.0, "לא ניתן להעריך - מחזיר ציון ברירת מחדל"
|
| 448 |
+
|
| 449 |
def _synthesize_answer(self, query: str, sql_queries: List[str],
|
| 450 |
+
query_results: List[SQLQueryResult], max_retries: int = 2) -> str:
|
| 451 |
"""
|
| 452 |
Use LLM to synthesize a comprehensive answer from:
|
| 453 |
- User query
|
| 454 |
- SQL queries that were executed
|
| 455 |
- Results of those queries
|
| 456 |
+
|
| 457 |
+
Includes quality evaluation and automatic improvement if score < 80.
|
| 458 |
+
|
| 459 |
+
Args:
|
| 460 |
+
query: The user's original question
|
| 461 |
+
sql_queries: List of SQL queries that were executed
|
| 462 |
+
query_results: Results from executing those queries
|
| 463 |
+
max_retries: Maximum number of retry attempts if quality is low
|
| 464 |
+
|
| 465 |
+
Returns:
|
| 466 |
+
Final synthesized answer
|
| 467 |
"""
|
| 468 |
# Format query results for the prompt
|
| 469 |
results_text = ""
|
|
|
|
| 513 |
if settings.gemini_api_key and genai is not None:
|
| 514 |
try:
|
| 515 |
genai.configure(api_key=settings.gemini_api_key)
|
| 516 |
+
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 517 |
generation_config = {
|
| 518 |
"temperature": 0.8,
|
| 519 |
"top_p": 0.95,
|
|
|
|
| 523 |
response = model.generate_content(prompt, generation_config=generation_config)
|
| 524 |
text = getattr(response, "text", None)
|
| 525 |
if text and text.strip():
|
| 526 |
+
answer = text.strip()
|
| 527 |
+
|
| 528 |
+
# Evaluate answer quality
|
| 529 |
+
score, reasoning = self._evaluate_answer_quality(query, answer)
|
| 530 |
+
print(f"Answer quality score: {score:.1f}/100 - {reasoning}", flush=True)
|
| 531 |
+
|
| 532 |
+
# If score is below 80, try to improve
|
| 533 |
+
if score < 80 and max_retries > 0:
|
| 534 |
+
print(f"Answer quality below threshold (80). Attempting improvement...", flush=True)
|
| 535 |
+
improvement_prompt = f"""התשובה הקודמת קיבלה ציון {score}/100. הסיבה: {reasoning}
|
| 536 |
+
|
| 537 |
+
שאלת המשתמש: {query}
|
| 538 |
+
|
| 539 |
+
התשובה הקודמת:
|
| 540 |
+
{answer}
|
| 541 |
+
|
| 542 |
+
תוצאות השאילתות:
|
| 543 |
+
{results_text}
|
| 544 |
+
|
| 545 |
+
כתוב תשובה משופרת שמתמקדת יותר בשאלה המקורית, מבוססת יותר על הנתונים, ומפורטת יותר.
|
| 546 |
+
התשובה חייבת לענות ישירות על השאלה: {query}
|
| 547 |
+
|
| 548 |
+
דרישות:
|
| 549 |
+
1. תשובה מפורטת ומקיפה (5-7 פסקאות, 400-600 מילים)
|
| 550 |
+
2. תשובה שמתמקדת ישירות בשאלה שנשאלה
|
| 551 |
+
3. כלול מספרים מדויקים מהתוצאות
|
| 552 |
+
4. הסבר את המשמעות העסקית של הממצאים
|
| 553 |
+
5. כלול המלצות מעשיות לשיפור
|
| 554 |
+
6. כתוב בעברית מקצועית וקולחת"""
|
| 555 |
+
|
| 556 |
+
try:
|
| 557 |
+
response = model.generate_content(improvement_prompt, generation_config=generation_config)
|
| 558 |
+
improved_text = getattr(response, "text", None)
|
| 559 |
+
if improved_text and improved_text.strip():
|
| 560 |
+
# Re-evaluate improved answer
|
| 561 |
+
improved_score, improved_reasoning = self._evaluate_answer_quality(query, improved_text.strip())
|
| 562 |
+
print(f"Improved answer quality score: {improved_score:.1f}/100 - {improved_reasoning}", flush=True)
|
| 563 |
+
if improved_score > score:
|
| 564 |
+
return improved_text.strip()
|
| 565 |
+
except Exception as e:
|
| 566 |
+
print(f"Error improving answer: {e}", flush=True)
|
| 567 |
+
|
| 568 |
+
return answer
|
| 569 |
except Exception as e:
|
| 570 |
print(f"Gemini error in synthesis: {e}", flush=True)
|
| 571 |
|
|
|
|
| 580 |
max_tokens=3000,
|
| 581 |
)
|
| 582 |
text = response.choices[0].message.content
|
| 583 |
+
if text and text.strip():
|
| 584 |
+
answer = text.strip()
|
| 585 |
+
|
| 586 |
+
# Evaluate answer quality
|
| 587 |
+
score, reasoning = self._evaluate_answer_quality(query, answer)
|
| 588 |
+
print(f"Answer quality score: {score:.1f}/100 - {reasoning}", flush=True)
|
| 589 |
+
|
| 590 |
+
# If score is below 80, try to improve
|
| 591 |
+
if score < 80 and max_retries > 0:
|
| 592 |
+
print(f"Answer quality below threshold (80). Attempting improvement...", flush=True)
|
| 593 |
+
improvement_prompt = f"""התשובה הקודמת קיבלה ציון {score}/100. הסיבה: {reasoning}
|
| 594 |
+
|
| 595 |
+
שאלת המשתמש: {query}
|
| 596 |
+
|
| 597 |
+
התשובה הקודמת:
|
| 598 |
+
{answer}
|
| 599 |
+
|
| 600 |
+
תוצאות השאילתות:
|
| 601 |
+
{results_text}
|
| 602 |
+
|
| 603 |
+
כתוב תשובה משופרת שמתמקדת יותר בשאלה המקורית, מבוססת יותר על הנתונים, ומפורטת יותר.
|
| 604 |
+
התשובה חייבת לענות ישירות על השאלה: {query}"""
|
| 605 |
+
|
| 606 |
+
try:
|
| 607 |
+
response = client.chat.completions.create(
|
| 608 |
+
model="gpt-4o-mini",
|
| 609 |
+
messages=[{"role": "user", "content": improvement_prompt}],
|
| 610 |
+
temperature=0.8,
|
| 611 |
+
max_tokens=3000,
|
| 612 |
+
)
|
| 613 |
+
improved_text = response.choices[0].message.content
|
| 614 |
+
if improved_text and improved_text.strip():
|
| 615 |
+
# Re-evaluate improved answer
|
| 616 |
+
improved_score, improved_reasoning = self._evaluate_answer_quality(query, improved_text.strip())
|
| 617 |
+
print(f"Improved answer quality score: {improved_score:.1f}/100 - {improved_reasoning}", flush=True)
|
| 618 |
+
if improved_score > score:
|
| 619 |
+
return improved_text.strip()
|
| 620 |
+
except Exception as e:
|
| 621 |
+
print(f"Error improving answer: {e}", flush=True)
|
| 622 |
+
|
| 623 |
+
return answer
|
| 624 |
except Exception as e:
|
| 625 |
print(f"OpenAI error in synthesis: {e}", flush=True)
|
| 626 |
|
|
|
|
| 630 |
def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
|
| 631 |
"""
|
| 632 |
Generate visualization specifications for query results.
|
| 633 |
+
|
| 634 |
+
This function analyzes the structure of query results and automatically
|
| 635 |
+
determines the best visualization type (bar, line, scatter, histogram).
|
| 636 |
+
The specifications are returned as dictionaries that the frontend can
|
| 637 |
+
use with Chart.js to render the visualizations.
|
| 638 |
+
|
| 639 |
+
Args:
|
| 640 |
+
query_results: List of SQL query results to visualize
|
| 641 |
+
|
| 642 |
+
Returns:
|
| 643 |
+
List of visualization specification dictionaries, or None if no
|
| 644 |
+
visualizations can be generated. Each dict contains:
|
| 645 |
+
- type: Chart type (bar, line, scatter, histogram)
|
| 646 |
+
- title: Display title
|
| 647 |
+
- x, y: Column names for axes
|
| 648 |
+
- data: The actual data to visualize
|
| 649 |
+
|
| 650 |
+
Visualization selection logic:
|
| 651 |
+
- 2 columns: bar chart (categorical + numeric) or line chart (time series)
|
| 652 |
+
- 1 column: histogram (if numeric)
|
| 653 |
+
- 3+ columns: bar chart (first categorical + first numeric)
|
| 654 |
"""
|
| 655 |
visualizations = []
|
| 656 |
|
| 657 |
for i, qr in enumerate(query_results, 1):
|
| 658 |
+
# Skip queries that failed or returned no results
|
| 659 |
if qr.error or len(qr.result) == 0:
|
| 660 |
continue
|
| 661 |
|
app/static/app.js
CHANGED
|
@@ -60,9 +60,6 @@ async function sendQuery() {
|
|
| 60 |
return;
|
| 61 |
}
|
| 62 |
|
| 63 |
-
// Check which approach to use
|
| 64 |
-
const approach = document.querySelector('input[name="approach"]:checked')?.value || 'sql';
|
| 65 |
-
|
| 66 |
// Show loading state
|
| 67 |
const sendBtn = document.getElementById('send');
|
| 68 |
const originalText = sendBtn.textContent;
|
|
@@ -70,10 +67,9 @@ async function sendQuery() {
|
|
| 70 |
sendBtn.textContent = '⏳ שולח...';
|
| 71 |
|
| 72 |
try {
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
: { query: q, top_k: 100 };
|
| 77 |
|
| 78 |
const r = await fetch(endpoint, {
|
| 79 |
method: 'POST',
|
|
@@ -104,12 +100,9 @@ async function sendQuery() {
|
|
| 104 |
const sourcesDiv = document.getElementById('resp-sources');
|
| 105 |
|
| 106 |
if (showSources) {
|
| 107 |
-
if (
|
| 108 |
sourcesDiv.style.display = 'block';
|
| 109 |
sourcesDiv.innerHTML = formatSQLResults(j);
|
| 110 |
-
} else if (approach === 'rag' && j.results && j.results.length > 0) {
|
| 111 |
-
sourcesDiv.style.display = 'block';
|
| 112 |
-
sourcesDiv.innerHTML = formatSources(j.results);
|
| 113 |
} else {
|
| 114 |
if (sourcesDiv) sourcesDiv.style.display = 'none';
|
| 115 |
}
|
|
@@ -117,8 +110,8 @@ async function sendQuery() {
|
|
| 117 |
if (sourcesDiv) sourcesDiv.style.display = 'none';
|
| 118 |
}
|
| 119 |
|
| 120 |
-
// Show visualizations if
|
| 121 |
-
if (
|
| 122 |
showVisualizations(j.visualizations);
|
| 123 |
}
|
| 124 |
|
|
@@ -197,25 +190,56 @@ function showVisualizations(visualizations) {
|
|
| 197 |
if (!vizContainer) {
|
| 198 |
vizContainer = document.createElement('div');
|
| 199 |
vizContainer.id = 'resp-visualizations';
|
| 200 |
-
vizContainer.
|
|
|
|
| 201 |
document.getElementById('last-response').appendChild(vizContainer);
|
| 202 |
}
|
| 203 |
|
| 204 |
// Clear previous visualizations
|
| 205 |
-
vizContainer.innerHTML = '<h4
|
| 206 |
vizContainer.style.display = 'block';
|
| 207 |
|
| 208 |
visualizations.forEach((viz, idx) => {
|
| 209 |
const vizDiv = document.createElement('div');
|
| 210 |
-
vizDiv.style.marginBottom = '
|
| 211 |
-
vizDiv.style.padding = '
|
| 212 |
-
vizDiv.style.background = '#f8f9fa';
|
| 213 |
-
vizDiv.style.borderRadius = '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
vizDiv.innerHTML = `<h5 style="margin-top: 0; color: #1976d2;">${escapeHtml(viz.title)}</h5>`;
|
| 216 |
const canvasDiv = document.createElement('div');
|
| 217 |
canvasDiv.style.position = 'relative';
|
| 218 |
-
canvasDiv.style.height = '
|
|
|
|
|
|
|
|
|
|
| 219 |
canvasDiv.innerHTML = `<canvas id="chart-${idx}"></canvas>`;
|
| 220 |
vizDiv.appendChild(canvasDiv);
|
| 221 |
|
|
@@ -241,8 +265,20 @@ function getChartConfig(viz, idx) {
|
|
| 241 |
const xLabel = viz.x_label || viz.x || 'X';
|
| 242 |
const yLabel = viz.y_label || viz.y || 'Y';
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
switch (viz.type) {
|
| 245 |
case 'bar':
|
|
|
|
| 246 |
return {
|
| 247 |
type: 'bar',
|
| 248 |
data: {
|
|
@@ -250,9 +286,11 @@ function getChartConfig(viz, idx) {
|
|
| 250 |
datasets: [{
|
| 251 |
label: yLabel,
|
| 252 |
data: viz.data.map(d => d[viz.y]),
|
| 253 |
-
backgroundColor:
|
| 254 |
-
borderColor:
|
| 255 |
-
borderWidth:
|
|
|
|
|
|
|
| 256 |
}]
|
| 257 |
},
|
| 258 |
options: {
|
|
@@ -261,10 +299,24 @@ function getChartConfig(viz, idx) {
|
|
| 261 |
plugins: {
|
| 262 |
legend: {
|
| 263 |
display: true,
|
| 264 |
-
position: 'top'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
},
|
| 266 |
title: {
|
| 267 |
display: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
}
|
| 269 |
},
|
| 270 |
scales: {
|
|
@@ -272,13 +324,33 @@ function getChartConfig(viz, idx) {
|
|
| 272 |
beginAtZero: true,
|
| 273 |
title: {
|
| 274 |
display: true,
|
| 275 |
-
text: yLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
}
|
| 277 |
},
|
| 278 |
x: {
|
| 279 |
title: {
|
| 280 |
display: true,
|
| 281 |
-
text: xLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
}
|
| 283 |
}
|
| 284 |
}
|
|
@@ -294,10 +366,18 @@ function getChartConfig(viz, idx) {
|
|
| 294 |
label: yLabel,
|
| 295 |
data: viz.data.map(d => d[viz.y]),
|
| 296 |
borderColor: 'rgba(25, 118, 210, 1)',
|
| 297 |
-
backgroundColor: 'rgba(25, 118, 210, 0.
|
| 298 |
-
borderWidth:
|
| 299 |
fill: true,
|
| 300 |
-
tension: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
}]
|
| 302 |
},
|
| 303 |
options: {
|
|
@@ -306,7 +386,21 @@ function getChartConfig(viz, idx) {
|
|
| 306 |
plugins: {
|
| 307 |
legend: {
|
| 308 |
display: true,
|
| 309 |
-
position: 'top'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
}
|
| 311 |
},
|
| 312 |
scales: {
|
|
@@ -314,13 +408,33 @@ function getChartConfig(viz, idx) {
|
|
| 314 |
beginAtZero: true,
|
| 315 |
title: {
|
| 316 |
display: true,
|
| 317 |
-
text: yLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
}
|
| 319 |
},
|
| 320 |
x: {
|
| 321 |
title: {
|
| 322 |
display: true,
|
| 323 |
-
text: xLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
}
|
| 325 |
}
|
| 326 |
}
|
|
@@ -337,9 +451,14 @@ function getChartConfig(viz, idx) {
|
|
| 337 |
x: d[viz.x],
|
| 338 |
y: d[viz.y]
|
| 339 |
})),
|
| 340 |
-
backgroundColor: 'rgba(25, 118, 210, 0.
|
| 341 |
borderColor: 'rgba(25, 118, 210, 1)',
|
| 342 |
-
borderWidth:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
}]
|
| 344 |
},
|
| 345 |
options: {
|
|
@@ -348,7 +467,21 @@ function getChartConfig(viz, idx) {
|
|
| 348 |
plugins: {
|
| 349 |
legend: {
|
| 350 |
display: true,
|
| 351 |
-
position: 'top'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
}
|
| 353 |
},
|
| 354 |
scales: {
|
|
@@ -356,14 +489,34 @@ function getChartConfig(viz, idx) {
|
|
| 356 |
beginAtZero: true,
|
| 357 |
title: {
|
| 358 |
display: true,
|
| 359 |
-
text: yLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
}
|
| 361 |
},
|
| 362 |
x: {
|
| 363 |
beginAtZero: true,
|
| 364 |
title: {
|
| 365 |
display: true,
|
| 366 |
-
text: xLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
}
|
| 368 |
}
|
| 369 |
}
|
|
@@ -392,6 +545,11 @@ function getChartConfig(viz, idx) {
|
|
| 392 |
bins[binIndex]++;
|
| 393 |
});
|
| 394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
return {
|
| 396 |
type: 'bar',
|
| 397 |
data: {
|
|
@@ -399,9 +557,11 @@ function getChartConfig(viz, idx) {
|
|
| 399 |
datasets: [{
|
| 400 |
label: xLabel,
|
| 401 |
data: bins,
|
| 402 |
-
backgroundColor:
|
| 403 |
-
borderColor:
|
| 404 |
-
borderWidth:
|
|
|
|
|
|
|
| 405 |
}]
|
| 406 |
},
|
| 407 |
options: {
|
|
@@ -410,7 +570,21 @@ function getChartConfig(viz, idx) {
|
|
| 410 |
plugins: {
|
| 411 |
legend: {
|
| 412 |
display: true,
|
| 413 |
-
position: 'top'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
}
|
| 415 |
},
|
| 416 |
scales: {
|
|
@@ -418,13 +592,33 @@ function getChartConfig(viz, idx) {
|
|
| 418 |
beginAtZero: true,
|
| 419 |
title: {
|
| 420 |
display: true,
|
| 421 |
-
text: 'תדירות'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
}
|
| 423 |
},
|
| 424 |
x: {
|
| 425 |
title: {
|
| 426 |
display: true,
|
| 427 |
-
text: xLabel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
}
|
| 429 |
}
|
| 430 |
}
|
|
@@ -450,21 +644,7 @@ function formatResponse(text) {
|
|
| 450 |
return formatted;
|
| 451 |
}
|
| 452 |
|
| 453 |
-
|
| 454 |
-
if (!results || results.length === 0) return '';
|
| 455 |
-
let html = '<h4 style="margin-top: 20px; color: #1976d2;">דוגמאות מהנתונים:</h4>';
|
| 456 |
-
results.slice(0, 5).forEach((r, idx) => {
|
| 457 |
-
html += `
|
| 458 |
-
<div style="margin: 12px 0; padding: 12px; background: #f5f5f5; border-radius: 8px; border-right: 4px solid #1976d2;">
|
| 459 |
-
<div style="font-size: 12px; color: #666; margin-bottom: 4px;">
|
| 460 |
-
דוגמה ${idx + 1} | שירות: ${escapeHtml(r.service || 'N/A')} | ציון: ${r.level || 'N/A'} | דמיון: ${(r.score * 100).toFixed(1)}%
|
| 461 |
-
</div>
|
| 462 |
-
<div style="color: #333;">${escapeHtml(r.text || '').substring(0, 300)}${r.text && r.text.length > 300 ? '...' : ''}</div>
|
| 463 |
-
</div>
|
| 464 |
-
`;
|
| 465 |
-
});
|
| 466 |
-
return html;
|
| 467 |
-
}
|
| 468 |
|
| 469 |
async function clearHistory() {
|
| 470 |
try {
|
|
|
|
| 60 |
return;
|
| 61 |
}
|
| 62 |
|
|
|
|
|
|
|
|
|
|
| 63 |
// Show loading state
|
| 64 |
const sendBtn = document.getElementById('send');
|
| 65 |
const originalText = sendBtn.textContent;
|
|
|
|
| 67 |
sendBtn.textContent = '⏳ שולח...';
|
| 68 |
|
| 69 |
try {
|
| 70 |
+
// Always use SQL-based approach
|
| 71 |
+
let endpoint = '/query-sql';
|
| 72 |
+
const body = { query: q, top_k: 5 };
|
|
|
|
| 73 |
|
| 74 |
const r = await fetch(endpoint, {
|
| 75 |
method: 'POST',
|
|
|
|
| 100 |
const sourcesDiv = document.getElementById('resp-sources');
|
| 101 |
|
| 102 |
if (showSources) {
|
| 103 |
+
if (j.query_results && j.query_results.length > 0) {
|
| 104 |
sourcesDiv.style.display = 'block';
|
| 105 |
sourcesDiv.innerHTML = formatSQLResults(j);
|
|
|
|
|
|
|
|
|
|
| 106 |
} else {
|
| 107 |
if (sourcesDiv) sourcesDiv.style.display = 'none';
|
| 108 |
}
|
|
|
|
| 110 |
if (sourcesDiv) sourcesDiv.style.display = 'none';
|
| 111 |
}
|
| 112 |
|
| 113 |
+
// Show visualizations if available
|
| 114 |
+
if (j.visualizations && j.visualizations.length > 0) {
|
| 115 |
showVisualizations(j.visualizations);
|
| 116 |
}
|
| 117 |
|
|
|
|
| 190 |
if (!vizContainer) {
|
| 191 |
vizContainer = document.createElement('div');
|
| 192 |
vizContainer.id = 'resp-visualizations';
|
| 193 |
+
vizContainer.className = 'viz-container';
|
| 194 |
+
vizContainer.style.marginTop = '24px';
|
| 195 |
document.getElementById('last-response').appendChild(vizContainer);
|
| 196 |
}
|
| 197 |
|
| 198 |
// Clear previous visualizations
|
| 199 |
+
vizContainer.innerHTML = '<h4 class="viz-title">📊 גרפיקות ויזואליזציות</h4>';
|
| 200 |
vizContainer.style.display = 'block';
|
| 201 |
|
| 202 |
visualizations.forEach((viz, idx) => {
|
| 203 |
const vizDiv = document.createElement('div');
|
| 204 |
+
vizDiv.style.marginBottom = '32px';
|
| 205 |
+
vizDiv.style.padding = '20px';
|
| 206 |
+
vizDiv.style.background = 'linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%)';
|
| 207 |
+
vizDiv.style.borderRadius = '16px';
|
| 208 |
+
vizDiv.style.boxShadow = '0 4px 16px rgba(0,0,0,0.08)';
|
| 209 |
+
vizDiv.style.border = '1px solid rgba(25, 118, 210, 0.1)';
|
| 210 |
+
|
| 211 |
+
// Add explanation based on chart type
|
| 212 |
+
let explanation = '';
|
| 213 |
+
switch(viz.type) {
|
| 214 |
+
case 'bar':
|
| 215 |
+
explanation = '📊 <strong>גרף עמודות:</strong> מציג את הנתונים בצורה ויזואלית ברורה. כל עמודה מייצגת קטגוריה, והגובה שלה מייצג את הערך. זה עוזר להשוות בין קטגוריות שונות ולהבין את ההבדלים ביניהן.';
|
| 216 |
+
break;
|
| 217 |
+
case 'line':
|
| 218 |
+
explanation = '📈 <strong>גרף קו:</strong> מציג מגמות ושינויים לאורך זמן. הקו עולה כשיש עלייה בערכים ויורד כשיש ירידה. זה עוזר לזהות דפוסים, שינויים תקופתיים, ומגמות ארוכות טווח.';
|
| 219 |
+
break;
|
| 220 |
+
case 'scatter':
|
| 221 |
+
explanation = '🔵 <strong>גרף פיזור:</strong> מציג את הקשר בין שני משתנים. כל נקודה מייצגת תצפית אחת. זה עוזר לזהות קשרים, מתאמים, וחריגים בנתונים.';
|
| 222 |
+
break;
|
| 223 |
+
case 'histogram':
|
| 224 |
+
explanation = '📊 <strong>היסטוגרמה:</strong> מציגה את התפלגות הנתונים. כל עמודה מייצגת טווח ערכים, והגובה שלה מייצג כמה תצפיות נפלו בטווח הזה. זה עוזר להבין את הצורה של ההתפלגות - האם היא סימטרית, מוטה, או יש לה כמה פסגות.';
|
| 225 |
+
break;
|
| 226 |
+
default:
|
| 227 |
+
explanation = '📊 <strong>ויזואליזציה:</strong> מציגה את הנתונים בצורה גרפית כדי להקל על הבנה וניתוח.';
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
vizDiv.innerHTML = `
|
| 231 |
+
<h5 style="margin-top: 0; color: #1976d2; font-size: 18px; font-weight: 700; margin-bottom: 16px;">
|
| 232 |
+
${escapeHtml(viz.title)}
|
| 233 |
+
</h5>
|
| 234 |
+
<div class="viz-explanation">${explanation}</div>
|
| 235 |
+
`;
|
| 236 |
|
|
|
|
| 237 |
const canvasDiv = document.createElement('div');
|
| 238 |
canvasDiv.style.position = 'relative';
|
| 239 |
+
canvasDiv.style.height = '350px';
|
| 240 |
+
canvasDiv.style.background = '#ffffff';
|
| 241 |
+
canvasDiv.style.borderRadius = '12px';
|
| 242 |
+
canvasDiv.style.padding = '16px';
|
| 243 |
canvasDiv.innerHTML = `<canvas id="chart-${idx}"></canvas>`;
|
| 244 |
vizDiv.appendChild(canvasDiv);
|
| 245 |
|
|
|
|
| 265 |
const xLabel = viz.x_label || viz.x || 'X';
|
| 266 |
const yLabel = viz.y_label || viz.y || 'Y';
|
| 267 |
|
| 268 |
+
// Color palettes for different chart types
|
| 269 |
+
const colorPalettes = {
|
| 270 |
+
bar: [
|
| 271 |
+
'rgba(25, 118, 210, 0.8)', 'rgba(76, 175, 80, 0.8)', 'rgba(255, 152, 0, 0.8)',
|
| 272 |
+
'rgba(156, 39, 176, 0.8)', 'rgba(244, 67, 54, 0.8)', 'rgba(0, 188, 212, 0.8)',
|
| 273 |
+
'rgba(255, 193, 7, 0.8)', 'rgba(121, 85, 72, 0.8)'
|
| 274 |
+
],
|
| 275 |
+
line: ['rgba(25, 118, 210, 1)', 'rgba(76, 175, 80, 1)', 'rgba(255, 152, 0, 1)'],
|
| 276 |
+
scatter: ['rgba(25, 118, 210, 0.7)', 'rgba(76, 175, 80, 0.7)', 'rgba(255, 152, 0, 0.7)']
|
| 277 |
+
};
|
| 278 |
+
|
| 279 |
switch (viz.type) {
|
| 280 |
case 'bar':
|
| 281 |
+
const barColors = viz.data.map((_, i) => colorPalettes.bar[i % colorPalettes.bar.length]);
|
| 282 |
return {
|
| 283 |
type: 'bar',
|
| 284 |
data: {
|
|
|
|
| 286 |
datasets: [{
|
| 287 |
label: yLabel,
|
| 288 |
data: viz.data.map(d => d[viz.y]),
|
| 289 |
+
backgroundColor: barColors,
|
| 290 |
+
borderColor: barColors.map(c => c.replace('0.8', '1')),
|
| 291 |
+
borderWidth: 2,
|
| 292 |
+
borderRadius: 8,
|
| 293 |
+
borderSkipped: false,
|
| 294 |
}]
|
| 295 |
},
|
| 296 |
options: {
|
|
|
|
| 299 |
plugins: {
|
| 300 |
legend: {
|
| 301 |
display: true,
|
| 302 |
+
position: 'top',
|
| 303 |
+
labels: {
|
| 304 |
+
font: { size: 14, weight: 'bold' },
|
| 305 |
+
padding: 15,
|
| 306 |
+
usePointStyle: true
|
| 307 |
+
}
|
| 308 |
},
|
| 309 |
title: {
|
| 310 |
display: false
|
| 311 |
+
},
|
| 312 |
+
tooltip: {
|
| 313 |
+
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
| 314 |
+
padding: 12,
|
| 315 |
+
titleFont: { size: 14, weight: 'bold' },
|
| 316 |
+
bodyFont: { size: 13 },
|
| 317 |
+
borderColor: 'rgba(25, 118, 210, 0.8)',
|
| 318 |
+
borderWidth: 2,
|
| 319 |
+
cornerRadius: 8
|
| 320 |
}
|
| 321 |
},
|
| 322 |
scales: {
|
|
|
|
| 324 |
beginAtZero: true,
|
| 325 |
title: {
|
| 326 |
display: true,
|
| 327 |
+
text: yLabel,
|
| 328 |
+
font: { size: 14, weight: 'bold' },
|
| 329 |
+
color: '#1976d2'
|
| 330 |
+
},
|
| 331 |
+
grid: {
|
| 332 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 333 |
+
lineWidth: 1
|
| 334 |
+
},
|
| 335 |
+
ticks: {
|
| 336 |
+
font: { size: 12 },
|
| 337 |
+
color: '#555'
|
| 338 |
}
|
| 339 |
},
|
| 340 |
x: {
|
| 341 |
title: {
|
| 342 |
display: true,
|
| 343 |
+
text: xLabel,
|
| 344 |
+
font: { size: 14, weight: 'bold' },
|
| 345 |
+
color: '#1976d2'
|
| 346 |
+
},
|
| 347 |
+
grid: {
|
| 348 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 349 |
+
lineWidth: 1
|
| 350 |
+
},
|
| 351 |
+
ticks: {
|
| 352 |
+
font: { size: 12 },
|
| 353 |
+
color: '#555'
|
| 354 |
}
|
| 355 |
}
|
| 356 |
}
|
|
|
|
| 366 |
label: yLabel,
|
| 367 |
data: viz.data.map(d => d[viz.y]),
|
| 368 |
borderColor: 'rgba(25, 118, 210, 1)',
|
| 369 |
+
backgroundColor: 'rgba(25, 118, 210, 0.15)',
|
| 370 |
+
borderWidth: 3,
|
| 371 |
fill: true,
|
| 372 |
+
tension: 0.5,
|
| 373 |
+
pointRadius: 5,
|
| 374 |
+
pointHoverRadius: 7,
|
| 375 |
+
pointBackgroundColor: 'rgba(25, 118, 210, 1)',
|
| 376 |
+
pointBorderColor: '#ffffff',
|
| 377 |
+
pointBorderWidth: 2,
|
| 378 |
+
pointHoverBackgroundColor: 'rgba(25, 118, 210, 1)',
|
| 379 |
+
pointHoverBorderColor: '#ffffff',
|
| 380 |
+
pointHoverBorderWidth: 3
|
| 381 |
}]
|
| 382 |
},
|
| 383 |
options: {
|
|
|
|
| 386 |
plugins: {
|
| 387 |
legend: {
|
| 388 |
display: true,
|
| 389 |
+
position: 'top',
|
| 390 |
+
labels: {
|
| 391 |
+
font: { size: 14, weight: 'bold' },
|
| 392 |
+
padding: 15,
|
| 393 |
+
usePointStyle: true
|
| 394 |
+
}
|
| 395 |
+
},
|
| 396 |
+
tooltip: {
|
| 397 |
+
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
| 398 |
+
padding: 12,
|
| 399 |
+
titleFont: { size: 14, weight: 'bold' },
|
| 400 |
+
bodyFont: { size: 13 },
|
| 401 |
+
borderColor: 'rgba(25, 118, 210, 0.8)',
|
| 402 |
+
borderWidth: 2,
|
| 403 |
+
cornerRadius: 8
|
| 404 |
}
|
| 405 |
},
|
| 406 |
scales: {
|
|
|
|
| 408 |
beginAtZero: true,
|
| 409 |
title: {
|
| 410 |
display: true,
|
| 411 |
+
text: yLabel,
|
| 412 |
+
font: { size: 14, weight: 'bold' },
|
| 413 |
+
color: '#1976d2'
|
| 414 |
+
},
|
| 415 |
+
grid: {
|
| 416 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 417 |
+
lineWidth: 1
|
| 418 |
+
},
|
| 419 |
+
ticks: {
|
| 420 |
+
font: { size: 12 },
|
| 421 |
+
color: '#555'
|
| 422 |
}
|
| 423 |
},
|
| 424 |
x: {
|
| 425 |
title: {
|
| 426 |
display: true,
|
| 427 |
+
text: xLabel,
|
| 428 |
+
font: { size: 14, weight: 'bold' },
|
| 429 |
+
color: '#1976d2'
|
| 430 |
+
},
|
| 431 |
+
grid: {
|
| 432 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 433 |
+
lineWidth: 1
|
| 434 |
+
},
|
| 435 |
+
ticks: {
|
| 436 |
+
font: { size: 12 },
|
| 437 |
+
color: '#555'
|
| 438 |
}
|
| 439 |
}
|
| 440 |
}
|
|
|
|
| 451 |
x: d[viz.x],
|
| 452 |
y: d[viz.y]
|
| 453 |
})),
|
| 454 |
+
backgroundColor: 'rgba(25, 118, 210, 0.7)',
|
| 455 |
borderColor: 'rgba(25, 118, 210, 1)',
|
| 456 |
+
borderWidth: 2,
|
| 457 |
+
pointRadius: 6,
|
| 458 |
+
pointHoverRadius: 8,
|
| 459 |
+
pointHoverBackgroundColor: 'rgba(76, 175, 80, 0.8)',
|
| 460 |
+
pointHoverBorderColor: '#ffffff',
|
| 461 |
+
pointHoverBorderWidth: 2
|
| 462 |
}]
|
| 463 |
},
|
| 464 |
options: {
|
|
|
|
| 467 |
plugins: {
|
| 468 |
legend: {
|
| 469 |
display: true,
|
| 470 |
+
position: 'top',
|
| 471 |
+
labels: {
|
| 472 |
+
font: { size: 14, weight: 'bold' },
|
| 473 |
+
padding: 15,
|
| 474 |
+
usePointStyle: true
|
| 475 |
+
}
|
| 476 |
+
},
|
| 477 |
+
tooltip: {
|
| 478 |
+
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
| 479 |
+
padding: 12,
|
| 480 |
+
titleFont: { size: 14, weight: 'bold' },
|
| 481 |
+
bodyFont: { size: 13 },
|
| 482 |
+
borderColor: 'rgba(25, 118, 210, 0.8)',
|
| 483 |
+
borderWidth: 2,
|
| 484 |
+
cornerRadius: 8
|
| 485 |
}
|
| 486 |
},
|
| 487 |
scales: {
|
|
|
|
| 489 |
beginAtZero: true,
|
| 490 |
title: {
|
| 491 |
display: true,
|
| 492 |
+
text: yLabel,
|
| 493 |
+
font: { size: 14, weight: 'bold' },
|
| 494 |
+
color: '#1976d2'
|
| 495 |
+
},
|
| 496 |
+
grid: {
|
| 497 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 498 |
+
lineWidth: 1
|
| 499 |
+
},
|
| 500 |
+
ticks: {
|
| 501 |
+
font: { size: 12 },
|
| 502 |
+
color: '#555'
|
| 503 |
}
|
| 504 |
},
|
| 505 |
x: {
|
| 506 |
beginAtZero: true,
|
| 507 |
title: {
|
| 508 |
display: true,
|
| 509 |
+
text: xLabel,
|
| 510 |
+
font: { size: 14, weight: 'bold' },
|
| 511 |
+
color: '#1976d2'
|
| 512 |
+
},
|
| 513 |
+
grid: {
|
| 514 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 515 |
+
lineWidth: 1
|
| 516 |
+
},
|
| 517 |
+
ticks: {
|
| 518 |
+
font: { size: 12 },
|
| 519 |
+
color: '#555'
|
| 520 |
}
|
| 521 |
}
|
| 522 |
}
|
|
|
|
| 545 |
bins[binIndex]++;
|
| 546 |
});
|
| 547 |
|
| 548 |
+
const histColors = bins.map((_, i) => {
|
| 549 |
+
const ratio = i / binCount;
|
| 550 |
+
return `rgba(${25 + Math.floor(ratio * 180)}, ${118 + Math.floor(ratio * 100)}, ${210 - Math.floor(ratio * 100)}, 0.8)`;
|
| 551 |
+
});
|
| 552 |
+
|
| 553 |
return {
|
| 554 |
type: 'bar',
|
| 555 |
data: {
|
|
|
|
| 557 |
datasets: [{
|
| 558 |
label: xLabel,
|
| 559 |
data: bins,
|
| 560 |
+
backgroundColor: histColors,
|
| 561 |
+
borderColor: histColors.map(c => c.replace('0.8', '1')),
|
| 562 |
+
borderWidth: 2,
|
| 563 |
+
borderRadius: 4,
|
| 564 |
+
borderSkipped: false,
|
| 565 |
}]
|
| 566 |
},
|
| 567 |
options: {
|
|
|
|
| 570 |
plugins: {
|
| 571 |
legend: {
|
| 572 |
display: true,
|
| 573 |
+
position: 'top',
|
| 574 |
+
labels: {
|
| 575 |
+
font: { size: 14, weight: 'bold' },
|
| 576 |
+
padding: 15,
|
| 577 |
+
usePointStyle: true
|
| 578 |
+
}
|
| 579 |
+
},
|
| 580 |
+
tooltip: {
|
| 581 |
+
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
| 582 |
+
padding: 12,
|
| 583 |
+
titleFont: { size: 14, weight: 'bold' },
|
| 584 |
+
bodyFont: { size: 13 },
|
| 585 |
+
borderColor: 'rgba(25, 118, 210, 0.8)',
|
| 586 |
+
borderWidth: 2,
|
| 587 |
+
cornerRadius: 8
|
| 588 |
}
|
| 589 |
},
|
| 590 |
scales: {
|
|
|
|
| 592 |
beginAtZero: true,
|
| 593 |
title: {
|
| 594 |
display: true,
|
| 595 |
+
text: 'תדירות',
|
| 596 |
+
font: { size: 14, weight: 'bold' },
|
| 597 |
+
color: '#1976d2'
|
| 598 |
+
},
|
| 599 |
+
grid: {
|
| 600 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 601 |
+
lineWidth: 1
|
| 602 |
+
},
|
| 603 |
+
ticks: {
|
| 604 |
+
font: { size: 12 },
|
| 605 |
+
color: '#555'
|
| 606 |
}
|
| 607 |
},
|
| 608 |
x: {
|
| 609 |
title: {
|
| 610 |
display: true,
|
| 611 |
+
text: xLabel,
|
| 612 |
+
font: { size: 14, weight: 'bold' },
|
| 613 |
+
color: '#1976d2'
|
| 614 |
+
},
|
| 615 |
+
grid: {
|
| 616 |
+
color: 'rgba(25, 118, 210, 0.1)',
|
| 617 |
+
lineWidth: 1
|
| 618 |
+
},
|
| 619 |
+
ticks: {
|
| 620 |
+
font: { size: 12 },
|
| 621 |
+
color: '#555'
|
| 622 |
}
|
| 623 |
}
|
| 624 |
}
|
|
|
|
| 644 |
return formatted;
|
| 645 |
}
|
| 646 |
|
| 647 |
+
// formatSources function removed - no longer needed (RAG approach deprecated)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
|
| 649 |
async function clearHistory() {
|
| 650 |
try {
|
app/static/index.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
-
<title>Feedback
|
| 7 |
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
|
| 8 |
<style>
|
| 9 |
* { box-sizing: border-box; }
|
|
@@ -11,7 +11,7 @@
|
|
| 11 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Noto Sans Hebrew', 'Arial Hebrew', sans-serif;
|
| 12 |
margin: 0;
|
| 13 |
direction: rtl;
|
| 14 |
-
background: linear-gradient(135deg, #
|
| 15 |
min-height: 100vh;
|
| 16 |
color: #0b2545;
|
| 17 |
padding-bottom: 40px;
|
|
@@ -105,27 +105,57 @@
|
|
| 105 |
transform: translateY(-2px);
|
| 106 |
}
|
| 107 |
.card {
|
| 108 |
-
border-radius:
|
| 109 |
-
padding:
|
| 110 |
-
margin-top:
|
| 111 |
-
background:
|
| 112 |
-
box-shadow: 0
|
| 113 |
-
transition: transform 0.
|
|
|
|
| 114 |
}
|
| 115 |
.card:hover {
|
| 116 |
-
transform: translateY(-
|
| 117 |
-
box-shadow: 0
|
| 118 |
}
|
| 119 |
.summary {
|
| 120 |
font-size: 17px;
|
| 121 |
-
line-height: 1.
|
| 122 |
color: #073763;
|
| 123 |
white-space: pre-wrap;
|
| 124 |
word-wrap: break-word;
|
| 125 |
-
background: #f8f9fa;
|
| 126 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
border-radius: 12px;
|
|
|
|
| 128 |
border-right: 4px solid #1976d2;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
}
|
| 130 |
.result { white-space: pre-wrap; }
|
| 131 |
header .title { font-size: 20px; margin:0 }
|
|
@@ -193,8 +223,12 @@
|
|
| 193 |
<body>
|
| 194 |
<div class="container">
|
| 195 |
<header>
|
| 196 |
-
<h1>Feedback
|
| 197 |
-
<div
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
</header>
|
| 199 |
|
| 200 |
<section class="card">
|
|
@@ -204,13 +238,6 @@
|
|
| 204 |
<label><input type="checkbox" id="show-sources" /> הצג דוגמאות מהנתונים</label>
|
| 205 |
<span class="small" style="margin-left:12px;">ברירת מחדל: מוסתר — יוצג רק הסיכום האנליטי</span>
|
| 206 |
</div>
|
| 207 |
-
<div style="margin-top:12px;">
|
| 208 |
-
<label style="font-weight: 600;">גישת ניתוח:</label>
|
| 209 |
-
<div style="margin-top:8px;">
|
| 210 |
-
<label><input type="radio" name="approach" value="sql" checked /> SQL-based (מומלץ - חדש)</label>
|
| 211 |
-
<label style="margin-right:20px;"><input type="radio" name="approach" value="rag" /> RAG-based (ישן)</label>
|
| 212 |
-
</div>
|
| 213 |
-
</div>
|
| 214 |
<div style="display:flex;gap:8px;margin-top:12px;">
|
| 215 |
<button id="send" class="primary">🔍 שאל</button>
|
| 216 |
<button id="clear-history" class="muted">🗑️ נקה היסטוריה</button>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
+
<title>Feedback Analysis — Frontend</title>
|
| 7 |
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
|
| 8 |
<style>
|
| 9 |
* { box-sizing: border-box; }
|
|
|
|
| 11 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Noto Sans Hebrew', 'Arial Hebrew', sans-serif;
|
| 12 |
margin: 0;
|
| 13 |
direction: rtl;
|
| 14 |
+
background: linear-gradient(135deg, #1976d2 0%, #1565c0 50%, #0d47a1 100%);
|
| 15 |
min-height: 100vh;
|
| 16 |
color: #0b2545;
|
| 17 |
padding-bottom: 40px;
|
|
|
|
| 105 |
transform: translateY(-2px);
|
| 106 |
}
|
| 107 |
.card {
|
| 108 |
+
border-radius: 20px;
|
| 109 |
+
padding: 28px;
|
| 110 |
+
margin-top: 24px;
|
| 111 |
+
background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%);
|
| 112 |
+
box-shadow: 0 12px 48px rgba(0,0,0,0.12), 0 4px 16px rgba(0,0,0,0.08);
|
| 113 |
+
transition: transform 0.3s ease, box-shadow 0.3s ease;
|
| 114 |
+
border: 1px solid rgba(255,255,255,0.8);
|
| 115 |
}
|
| 116 |
.card:hover {
|
| 117 |
+
transform: translateY(-4px);
|
| 118 |
+
box-shadow: 0 16px 64px rgba(0,0,0,0.18), 0 6px 24px rgba(0,0,0,0.12);
|
| 119 |
}
|
| 120 |
.summary {
|
| 121 |
font-size: 17px;
|
| 122 |
+
line-height: 1.9;
|
| 123 |
color: #073763;
|
| 124 |
white-space: pre-wrap;
|
| 125 |
word-wrap: break-word;
|
| 126 |
+
background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%);
|
| 127 |
+
padding: 24px;
|
| 128 |
+
border-radius: 16px;
|
| 129 |
+
border-right: 5px solid #1976d2;
|
| 130 |
+
box-shadow: inset 0 2px 8px rgba(0,0,0,0.05);
|
| 131 |
+
}
|
| 132 |
+
.viz-container {
|
| 133 |
+
background: linear-gradient(135deg, #ffffff 0%, #f0f7ff 100%);
|
| 134 |
+
border-radius: 16px;
|
| 135 |
+
padding: 24px;
|
| 136 |
+
margin-top: 24px;
|
| 137 |
+
box-shadow: 0 8px 32px rgba(25, 118, 210, 0.1);
|
| 138 |
+
border: 2px solid rgba(25, 118, 210, 0.15);
|
| 139 |
+
}
|
| 140 |
+
.viz-explanation {
|
| 141 |
+
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 142 |
+
padding: 16px 20px;
|
| 143 |
border-radius: 12px;
|
| 144 |
+
margin-bottom: 20px;
|
| 145 |
border-right: 4px solid #1976d2;
|
| 146 |
+
color: #0d47a1;
|
| 147 |
+
font-size: 15px;
|
| 148 |
+
line-height: 1.7;
|
| 149 |
+
box-shadow: 0 2px 8px rgba(25, 118, 210, 0.15);
|
| 150 |
+
}
|
| 151 |
+
.viz-title {
|
| 152 |
+
color: #1976d2;
|
| 153 |
+
font-size: 20px;
|
| 154 |
+
font-weight: 700;
|
| 155 |
+
margin-bottom: 16px;
|
| 156 |
+
display: flex;
|
| 157 |
+
align-items: center;
|
| 158 |
+
gap: 8px;
|
| 159 |
}
|
| 160 |
.result { white-space: pre-wrap; }
|
| 161 |
header .title { font-size: 20px; margin:0 }
|
|
|
|
| 223 |
<body>
|
| 224 |
<div class="container">
|
| 225 |
<header>
|
| 226 |
+
<h1>Feedback Analysis — ממשק</h1>
|
| 227 |
+
<div style="display: flex; gap: 16px; align-items: center;">
|
| 228 |
+
<div class="small">שרת: <span id="server-status">...בדיקה</span></div>
|
| 229 |
+
<a href="https://github.com" target="_blank" style="color: white; text-decoration: none; font-size: 14px; padding: 6px 12px; background: rgba(255,255,255,0.2); border-radius: 6px; transition: all 0.2s;">🔗 GitHub</a>
|
| 230 |
+
<a href="https://ynet.co.il" target="_blank" style="color: white; text-decoration: none; font-size: 14px; padding: 6px 12px; background: rgba(255,255,255,0.2); border-radius: 6px; transition: all 0.2s;">📄 קורות חיים</a>
|
| 231 |
+
</div>
|
| 232 |
</header>
|
| 233 |
|
| 234 |
<section class="card">
|
|
|
|
| 238 |
<label><input type="checkbox" id="show-sources" /> הצג דוגמאות מהנתונים</label>
|
| 239 |
<span class="small" style="margin-left:12px;">ברירת מחדל: מוסתר — יוצג רק הסיכום האנליטי</span>
|
| 240 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
<div style="display:flex;gap:8px;margin-top:12px;">
|
| 242 |
<button id="send" class="primary">🔍 שאל</button>
|
| 243 |
<button id="clear-history" class="muted">🗑️ נקה היסטוריה</button>
|
app/topics.py
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from dataclasses import dataclass
|
| 4 |
-
from typing import List, Dict
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
from sklearn.cluster import KMeans # type: ignore
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
@dataclass
|
| 11 |
-
class TopicResult:
|
| 12 |
-
labels: List[int]
|
| 13 |
-
centroids: np.ndarray
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
def kmeans_topics(embeddings: np.ndarray, num_topics: int = 8, seed: int = 42) -> TopicResult:
|
| 17 |
-
if len(embeddings) == 0:
|
| 18 |
-
return TopicResult(labels=[], centroids=np.empty((0, embeddings.shape[1])))
|
| 19 |
-
km = KMeans(n_clusters=num_topics, random_state=seed, n_init="auto")
|
| 20 |
-
labels = km.fit_predict(embeddings)
|
| 21 |
-
return TopicResult(labels=list(map(int, labels)), centroids=km.cluster_centers_)
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/vector_store.py
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""A thin wrapper around FAISS index and a Pandas DataFrame for metadata.
|
| 4 |
-
|
| 5 |
-
FaissVectorStore provides methods to add vectors, perform nearest-neighbor search,
|
| 6 |
-
and persist both the FAISS index and the accompanying metadata (as a parquet file).
|
| 7 |
-
|
| 8 |
-
SearchResult holds the matched index, similarity score and the original metadata row.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
import os
|
| 12 |
-
from dataclasses import dataclass
|
| 13 |
-
from typing import List, Tuple, Optional
|
| 14 |
-
|
| 15 |
-
import faiss # type: ignore
|
| 16 |
-
import numpy as np
|
| 17 |
-
import pandas as pd
|
| 18 |
-
|
| 19 |
-
from .config import settings
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
@dataclass
|
| 23 |
-
class SearchResult:
|
| 24 |
-
index: int
|
| 25 |
-
score: float
|
| 26 |
-
row: pd.Series
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class FaissVectorStore:
|
| 30 |
-
def __init__(self, dim: int) -> None:
|
| 31 |
-
self.dim = dim
|
| 32 |
-
self.index = faiss.IndexFlatIP(dim)
|
| 33 |
-
self.metadata: Optional[pd.DataFrame] = None
|
| 34 |
-
|
| 35 |
-
def add(self, vectors: np.ndarray, metadata: pd.DataFrame) -> None:
|
| 36 |
-
if vectors.dtype != np.float32:
|
| 37 |
-
vectors = vectors.astype(np.float32)
|
| 38 |
-
if self.metadata is None:
|
| 39 |
-
self.metadata = metadata.reset_index(drop=True)
|
| 40 |
-
else:
|
| 41 |
-
self.metadata = pd.concat([self.metadata, metadata], ignore_index=True)
|
| 42 |
-
self.index.add(vectors)
|
| 43 |
-
|
| 44 |
-
def search(self, query_vector: np.ndarray, top_k: int = 5) -> List[SearchResult]:
|
| 45 |
-
q = query_vector.astype(np.float32).reshape(1, -1)
|
| 46 |
-
scores, idxs = self.index.search(q, top_k)
|
| 47 |
-
results: List[SearchResult] = []
|
| 48 |
-
for score, idx in zip(scores[0], idxs[0]):
|
| 49 |
-
if idx < 0 or self.metadata is None:
|
| 50 |
-
continue
|
| 51 |
-
results.append(SearchResult(index=int(idx), score=float(score), row=self.metadata.iloc[int(idx)]))
|
| 52 |
-
return results
|
| 53 |
-
|
| 54 |
-
def save(self, vector_path: str, meta_path: str) -> None:
|
| 55 |
-
os.makedirs(os.path.dirname(vector_path), exist_ok=True)
|
| 56 |
-
faiss.write_index(self.index, vector_path)
|
| 57 |
-
if self.metadata is not None:
|
| 58 |
-
self.metadata.to_parquet(meta_path, index=False)
|
| 59 |
-
|
| 60 |
-
@classmethod
|
| 61 |
-
def load(cls, vector_path: str, meta_path: str) -> "FaissVectorStore":
|
| 62 |
-
index = faiss.read_index(vector_path)
|
| 63 |
-
dim = index.d
|
| 64 |
-
store = cls(dim=dim)
|
| 65 |
-
store.index = index
|
| 66 |
-
if os.path.exists(meta_path):
|
| 67 |
-
store.metadata = pd.read_parquet(meta_path)
|
| 68 |
-
return store
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,20 +1,15 @@
|
|
|
|
|
| 1 |
fastapi==0.115.5
|
| 2 |
uvicorn[standard]==0.32.0
|
| 3 |
pandas==2.2.3
|
| 4 |
numpy==1.26.4
|
| 5 |
-
scikit-learn==1.5.2
|
| 6 |
-
faiss-cpu==1.8.0.post1
|
| 7 |
-
sentence-transformers==3.1.1
|
| 8 |
-
transformers==4.45.2
|
| 9 |
-
torch==2.4.1
|
| 10 |
-
langdetect==1.0.9
|
| 11 |
-
openai==1.52.2
|
| 12 |
python-dotenv==1.0.1
|
| 13 |
pydantic==2.9.2
|
| 14 |
orjson==3.10.7
|
|
|
|
|
|
|
|
|
|
| 15 |
google-generativeai==0.6.0
|
| 16 |
-
pyarrow==14.0.2
|
| 17 |
-
tiktoken==0.7.0
|
| 18 |
|
| 19 |
# Dev / test dependencies
|
| 20 |
pytest==7.4.0
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
fastapi==0.115.5
|
| 3 |
uvicorn[standard]==0.32.0
|
| 4 |
pandas==2.2.3
|
| 5 |
numpy==1.26.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
python-dotenv==1.0.1
|
| 7 |
pydantic==2.9.2
|
| 8 |
orjson==3.10.7
|
| 9 |
+
|
| 10 |
+
# LLM providers (at least one required)
|
| 11 |
+
openai==1.52.2
|
| 12 |
google-generativeai==0.6.0
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Dev / test dependencies
|
| 15 |
pytest==7.4.0
|
scripts/precompute_index.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""Script to precompute the FAISS vector index locally.
|
| 4 |
-
|
| 5 |
-
When deploying to Runpod it's often useful to precompute embeddings and store
|
| 6 |
-
the FAISS index so the server can start quickly without re-embedding the
|
| 7 |
-
entire dataset on first boot. This script writes the index and metadata to
|
| 8 |
-
the configured `VECTOR_INDEX_PATH` and `VECTOR_METADATA_PATH`.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
import os
|
| 12 |
-
from pathlib import Path
|
| 13 |
-
|
| 14 |
-
from app.rag_service import RAGService
|
| 15 |
-
from app.config import settings
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def main() -> None:
|
| 19 |
-
out_dir = Path(settings.vector_index_path).parent
|
| 20 |
-
out_dir.mkdir(parents=True, exist_ok=True)
|
| 21 |
-
svc = RAGService()
|
| 22 |
-
svc.ingest()
|
| 23 |
-
print(f"Index written to: {settings.vector_index_path}")
|
| 24 |
-
print(f"Metadata written to: {settings.vector_metadata_path}")
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
if __name__ == "__main__":
|
| 28 |
-
main()
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/smoke_check.py
CHANGED
|
@@ -16,8 +16,9 @@ def get_root() -> str:
|
|
| 16 |
|
| 17 |
|
| 18 |
def post_query(q: str):
|
|
|
|
| 19 |
data = json.dumps({"query": q, "top_k": 5}).encode("utf-8")
|
| 20 |
-
req = urllib.request.Request("http://127.0.0.1:8000/query", data=data, headers={"Content-Type": "application/json"})
|
| 21 |
with urllib.request.urlopen(req, timeout=30) as resp:
|
| 22 |
return json.load(resp)
|
| 23 |
|
|
@@ -32,13 +33,15 @@ def main() -> None:
|
|
| 32 |
return
|
| 33 |
|
| 34 |
sample_q = "מה הבעיות העיקריות שמשתמשים מציינים?"
|
| 35 |
-
print("Posting sample query to /query ...")
|
| 36 |
try:
|
| 37 |
resp = post_query(sample_q)
|
| 38 |
print("Query response keys:", list(resp.keys()))
|
| 39 |
print("Summary (truncated):\n", (resp.get("summary") or "(no summary)")[:800])
|
|
|
|
|
|
|
| 40 |
except Exception as e:
|
| 41 |
-
print("Failed to POST /query:", e)
|
| 42 |
|
| 43 |
|
| 44 |
if __name__ == "__main__":
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def post_query(q: str):
|
| 19 |
+
"""Test SQL-based query endpoint."""
|
| 20 |
data = json.dumps({"query": q, "top_k": 5}).encode("utf-8")
|
| 21 |
+
req = urllib.request.Request("http://127.0.0.1:8000/query-sql", data=data, headers={"Content-Type": "application/json"})
|
| 22 |
with urllib.request.urlopen(req, timeout=30) as resp:
|
| 23 |
return json.load(resp)
|
| 24 |
|
|
|
|
| 33 |
return
|
| 34 |
|
| 35 |
sample_q = "מה הבעיות העיקריות שמשתמשים מציינים?"
|
| 36 |
+
print("Posting sample query to /query-sql ...")
|
| 37 |
try:
|
| 38 |
resp = post_query(sample_q)
|
| 39 |
print("Query response keys:", list(resp.keys()))
|
| 40 |
print("Summary (truncated):\n", (resp.get("summary") or "(no summary)")[:800])
|
| 41 |
+
if resp.get("sql_queries"):
|
| 42 |
+
print(f"Generated {len(resp['sql_queries'])} SQL queries")
|
| 43 |
except Exception as e:
|
| 44 |
+
print("Failed to POST /query-sql:", e)
|
| 45 |
|
| 46 |
|
| 47 |
if __name__ == "__main__":
|
scripts/test_queries.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
"""Small harness to demonstrate query type detection and quick counts.
|
| 2 |
-
|
| 3 |
-
This script intentionally keeps heavy dependencies optional: it runs the
|
| 4 |
-
lightweight count logic (keyword-based) directly from the CSV. If the FAISS
|
| 5 |
-
index and embedding dependencies are available, it will also show example
|
| 6 |
-
contexts from semantic retrieval.
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
from __future__ import annotations
|
| 10 |
-
|
| 11 |
-
from app.data_loader import load_feedback
|
| 12 |
-
from app.analysis import detect_query_type, resolve_count_from_type
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
def run_examples():
|
| 16 |
-
examples = [
|
| 17 |
-
"כמה משתמשים מתלוננים על אלמנטים שלא עובדים להם במערכת",
|
| 18 |
-
"כמה משתמשים כתבו תודה",
|
| 19 |
-
"יש תקלות בשירות ההרשמה",
|
| 20 |
-
"מה הבעיות העיקריות שמשתמשים מציינים?",
|
| 21 |
-
]
|
| 22 |
-
|
| 23 |
-
df = load_feedback()
|
| 24 |
-
|
| 25 |
-
for q in examples:
|
| 26 |
-
print("\nQuery:", q)
|
| 27 |
-
qtype, target = detect_query_type(q)
|
| 28 |
-
print("Detected type:", qtype, "target:", target)
|
| 29 |
-
resolved = resolve_count_from_type(df, qtype, target)
|
| 30 |
-
if resolved.get("type") == "count":
|
| 31 |
-
print("Count result:", resolved.get("count"), resolved.get("label"))
|
| 32 |
-
else:
|
| 33 |
-
# Fallback to semantic answer (may require heavy deps and a built index). Try to import and run if available.
|
| 34 |
-
try:
|
| 35 |
-
from app.rag_service import RAGService
|
| 36 |
-
svc = RAGService()
|
| 37 |
-
out = svc.answer(q, top_k=3)
|
| 38 |
-
print("Summary:", out.summary)
|
| 39 |
-
for r in out.results:
|
| 40 |
-
print(f"- [{r.score:.3f}] {r.row.get('ServiceName','')} | {r.row.get('Text','')[:120]}")
|
| 41 |
-
except FileNotFoundError:
|
| 42 |
-
print("Vector index not found. Run /ingest or precompute index to see examples.")
|
| 43 |
-
except Exception as e:
|
| 44 |
-
print("Semantic retrieval unavailable (missing packages or other error):", e)
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
if __name__ == "__main__":
|
| 48 |
-
run_examples()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/validate_local.py
DELETED
|
@@ -1,314 +0,0 @@
|
|
| 1 |
-
"""Complete validation and testing harness for local development.
|
| 2 |
-
|
| 3 |
-
This script:
|
| 4 |
-
1. Checks dependencies
|
| 5 |
-
2. Validates the CSV and index
|
| 6 |
-
3. Tests all API endpoints
|
| 7 |
-
4. Provides clear pass/fail feedback
|
| 8 |
-
|
| 9 |
-
Run this BEFORE testing manually to ensure everything works correctly.
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
from __future__ import annotations
|
| 13 |
-
|
| 14 |
-
import sys
|
| 15 |
-
import time
|
| 16 |
-
from pathlib import Path
|
| 17 |
-
|
| 18 |
-
# Color codes for terminal output
|
| 19 |
-
GREEN = "\033[92m"
|
| 20 |
-
RED = "\033[91m"
|
| 21 |
-
YELLOW = "\033[93m"
|
| 22 |
-
BLUE = "\033[94m"
|
| 23 |
-
RESET = "\033[0m"
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def print_status(message: str, status: str = "INFO") -> None:
|
| 27 |
-
"""Print colored status messages."""
|
| 28 |
-
colors = {
|
| 29 |
-
"PASS": GREEN,
|
| 30 |
-
"FAIL": RED,
|
| 31 |
-
"WARN": YELLOW,
|
| 32 |
-
"INFO": BLUE,
|
| 33 |
-
}
|
| 34 |
-
color = colors.get(status, RESET)
|
| 35 |
-
print(f"{color}[{status}]{RESET} {message}")
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
def check_dependencies() -> bool:
|
| 39 |
-
"""Verify all required packages are installed."""
|
| 40 |
-
print_status("Checking dependencies...", "INFO")
|
| 41 |
-
required = [
|
| 42 |
-
("pandas", "pandas"),
|
| 43 |
-
("fastapi", "fastapi"),
|
| 44 |
-
("pydantic", "pydantic"),
|
| 45 |
-
("sentence_transformers", "sentence_transformers"),
|
| 46 |
-
("transformers", "transformers"),
|
| 47 |
-
("faiss", "faiss"),
|
| 48 |
-
("numpy", "numpy"),
|
| 49 |
-
]
|
| 50 |
-
|
| 51 |
-
missing = []
|
| 52 |
-
for pkg_name, import_name in required:
|
| 53 |
-
try:
|
| 54 |
-
__import__(import_name)
|
| 55 |
-
print_status(f"✓ {pkg_name}", "PASS")
|
| 56 |
-
except ImportError:
|
| 57 |
-
print_status(f"✗ {pkg_name} NOT FOUND", "FAIL")
|
| 58 |
-
missing.append(pkg_name)
|
| 59 |
-
|
| 60 |
-
if missing:
|
| 61 |
-
print_status(
|
| 62 |
-
f"Missing packages: {', '.join(missing)}. "
|
| 63 |
-
"Run: pip install -r requirements.txt",
|
| 64 |
-
"FAIL"
|
| 65 |
-
)
|
| 66 |
-
return False
|
| 67 |
-
return True
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def check_csv() -> bool:
|
| 71 |
-
"""Verify CSV exists and has required columns."""
|
| 72 |
-
print_status("Checking CSV...", "INFO")
|
| 73 |
-
csv_path = Path("Feedback.csv")
|
| 74 |
-
|
| 75 |
-
if not csv_path.exists():
|
| 76 |
-
print_status(f"CSV not found at {csv_path}", "FAIL")
|
| 77 |
-
return False
|
| 78 |
-
|
| 79 |
-
try:
|
| 80 |
-
import pandas as pd
|
| 81 |
-
df = pd.read_csv(csv_path)
|
| 82 |
-
required_cols = ["ID", "ServiceName", "Level", "Text"]
|
| 83 |
-
missing_cols = [c for c in required_cols if c not in df.columns]
|
| 84 |
-
|
| 85 |
-
if missing_cols:
|
| 86 |
-
print_status(f"Missing columns: {missing_cols}", "FAIL")
|
| 87 |
-
return False
|
| 88 |
-
|
| 89 |
-
print_status(f"✓ CSV valid: {len(df)} rows, {len(df.columns)} columns", "PASS")
|
| 90 |
-
return True
|
| 91 |
-
except Exception as e:
|
| 92 |
-
print_status(f"Error reading CSV: {e}", "FAIL")
|
| 93 |
-
return False
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def check_index() -> bool:
|
| 97 |
-
"""Verify FAISS index is precomputed."""
|
| 98 |
-
print_status("Checking FAISS index...", "INFO")
|
| 99 |
-
|
| 100 |
-
index_path = Path(".vector_index/faiss.index")
|
| 101 |
-
meta_path = Path(".vector_index/meta.parquet")
|
| 102 |
-
|
| 103 |
-
if not index_path.exists():
|
| 104 |
-
print_status(
|
| 105 |
-
f"Index not found at {index_path}. "
|
| 106 |
-
"Run: python scripts/precompute_index.py",
|
| 107 |
-
"WARN"
|
| 108 |
-
)
|
| 109 |
-
return False
|
| 110 |
-
|
| 111 |
-
if not meta_path.exists():
|
| 112 |
-
print_status(f"Metadata not found at {meta_path}", "FAIL")
|
| 113 |
-
return False
|
| 114 |
-
|
| 115 |
-
try:
|
| 116 |
-
index_size = index_path.stat().st_size / (1024 * 1024) # MB
|
| 117 |
-
print_status(f"✓ Index found ({index_size:.1f} MB)", "PASS")
|
| 118 |
-
return True
|
| 119 |
-
except Exception as e:
|
| 120 |
-
print_status(f"Error checking index: {e}", "FAIL")
|
| 121 |
-
return False
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
def test_imports() -> bool:
|
| 125 |
-
"""Test that all app modules import correctly."""
|
| 126 |
-
print_status("Testing app imports...", "INFO")
|
| 127 |
-
|
| 128 |
-
try:
|
| 129 |
-
from app.config import settings
|
| 130 |
-
from app.data_loader import load_feedback
|
| 131 |
-
from app.analysis import detect_query_type, resolve_count_from_type
|
| 132 |
-
from app.rag_service import RAGService
|
| 133 |
-
from app.api import app
|
| 134 |
-
|
| 135 |
-
print_status("✓ All imports successful", "PASS")
|
| 136 |
-
return True
|
| 137 |
-
except Exception as e:
|
| 138 |
-
print_status(f"Import error: {e}", "FAIL")
|
| 139 |
-
return False
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
def test_analysis_logic() -> bool:
|
| 143 |
-
"""Test query analysis and counting logic (no embeddings needed)."""
|
| 144 |
-
print_status("Testing analysis logic (lightweight)...", "INFO")
|
| 145 |
-
|
| 146 |
-
try:
|
| 147 |
-
from app.data_loader import load_feedback
|
| 148 |
-
from app.analysis import detect_query_type, resolve_count_from_type
|
| 149 |
-
|
| 150 |
-
df = load_feedback()
|
| 151 |
-
|
| 152 |
-
# Test 1: Count thanks
|
| 153 |
-
qtype, target = detect_query_type("כמה משתמשים כתבו תודה")
|
| 154 |
-
result = resolve_count_from_type(df, qtype, target)
|
| 155 |
-
assert result["type"] == "count"
|
| 156 |
-
thanks_count = result["count"]
|
| 157 |
-
print_status(f"✓ Thanks count: {thanks_count}", "PASS")
|
| 158 |
-
|
| 159 |
-
# Test 2: Count complaints
|
| 160 |
-
qtype, target = detect_query_type("כמה משתמשים מתלוננים על אלמנטים שלא עובדים")
|
| 161 |
-
result = resolve_count_from_type(df, qtype, target)
|
| 162 |
-
assert result["type"] == "count"
|
| 163 |
-
complaint_count = result["count"]
|
| 164 |
-
print_status(f"✓ Complaint count: {complaint_count}", "PASS")
|
| 165 |
-
|
| 166 |
-
return True
|
| 167 |
-
except Exception as e:
|
| 168 |
-
print_status(f"Analysis test error: {e}", "FAIL")
|
| 169 |
-
return False
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
def test_rag_service() -> bool:
|
| 173 |
-
"""Test RAGService with precomputed index."""
|
| 174 |
-
print_status("Testing RAGService...", "INFO")
|
| 175 |
-
|
| 176 |
-
try:
|
| 177 |
-
from app.rag_service import RAGService
|
| 178 |
-
|
| 179 |
-
svc = RAGService()
|
| 180 |
-
print_status("✓ RAGService initialized", "PASS")
|
| 181 |
-
|
| 182 |
-
# Test query (should use precomputed index)
|
| 183 |
-
result = svc.answer("כמה משתמשים כתבו תודה", top_k=3)
|
| 184 |
-
|
| 185 |
-
if result.summary:
|
| 186 |
-
print_status(f"✓ Query response: {result.summary[:60]}...", "PASS")
|
| 187 |
-
else:
|
| 188 |
-
print_status("Query returned empty summary", "WARN")
|
| 189 |
-
|
| 190 |
-
if result.results:
|
| 191 |
-
print_status(f"✓ Retrieved {len(result.results)} results", "PASS")
|
| 192 |
-
else:
|
| 193 |
-
print_status("No results retrieved (may be expected if index small)", "WARN")
|
| 194 |
-
|
| 195 |
-
return True
|
| 196 |
-
except Exception as e:
|
| 197 |
-
print_status(f"RAGService error: {e}", "FAIL")
|
| 198 |
-
return False
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
def test_api_endpoints() -> bool:
|
| 202 |
-
"""Test FastAPI endpoints locally."""
|
| 203 |
-
print_status("Testing API endpoints...", "INFO")
|
| 204 |
-
|
| 205 |
-
try:
|
| 206 |
-
from fastapi.testclient import TestClient
|
| 207 |
-
from app.api import app
|
| 208 |
-
|
| 209 |
-
client = TestClient(app)
|
| 210 |
-
|
| 211 |
-
# Test /health
|
| 212 |
-
resp = client.post("/health")
|
| 213 |
-
assert resp.status_code == 200, f"Health check failed: {resp.status_code}"
|
| 214 |
-
print_status("✓ POST /health works", "PASS")
|
| 215 |
-
|
| 216 |
-
# Test /query
|
| 217 |
-
resp = client.post("/query", json={"query": "כמה משתמשים כתבו תודה", "top_k": 3})
|
| 218 |
-
assert resp.status_code == 200, f"Query failed: {resp.status_code}"
|
| 219 |
-
data = resp.json()
|
| 220 |
-
assert "summary" in data, "Query response missing summary"
|
| 221 |
-
print_status(f"✓ POST /query works (summary: {data['summary'][:50]}...)", "PASS")
|
| 222 |
-
|
| 223 |
-
# Test /topics
|
| 224 |
-
resp = client.post("/topics", json={"num_topics": 3})
|
| 225 |
-
assert resp.status_code == 200, f"Topics failed: {resp.status_code}"
|
| 226 |
-
data = resp.json()
|
| 227 |
-
assert "topics" in data, "Topics response missing topics"
|
| 228 |
-
print_status(f"✓ POST /topics works ({len(data.get('topics', {}))} topics)", "PASS")
|
| 229 |
-
|
| 230 |
-
# Test /sentiment
|
| 231 |
-
resp = client.post("/sentiment", json={"limit": 50})
|
| 232 |
-
assert resp.status_code == 200, f"Sentiment failed: {resp.status_code}"
|
| 233 |
-
data = resp.json()
|
| 234 |
-
assert "results" in data, "Sentiment response missing results"
|
| 235 |
-
print_status(f"✓ POST /sentiment works ({data['count']} results)", "PASS")
|
| 236 |
-
|
| 237 |
-
# Test /ingest (will try to rebuild index)
|
| 238 |
-
print_status("Testing /ingest (will rebuild index)...", "WARN")
|
| 239 |
-
start = time.time()
|
| 240 |
-
resp = client.post("/ingest")
|
| 241 |
-
elapsed = time.time() - start
|
| 242 |
-
assert resp.status_code == 200, f"Ingest failed: {resp.status_code}"
|
| 243 |
-
print_status(f"✓ POST /ingest works (took {elapsed:.1f}s)", "PASS")
|
| 244 |
-
|
| 245 |
-
return True
|
| 246 |
-
except Exception as e:
|
| 247 |
-
print_status(f"API test error: {e}", "FAIL")
|
| 248 |
-
import traceback
|
| 249 |
-
traceback.print_exc()
|
| 250 |
-
return False
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
def main() -> None:
|
| 254 |
-
"""Run all validations."""
|
| 255 |
-
print(f"\n{BLUE}{'='*60}")
|
| 256 |
-
print("FEEDBACK ANALYSIS RAG AGENT - LOCAL VALIDATION")
|
| 257 |
-
print(f"{'='*60}{RESET}\n")
|
| 258 |
-
|
| 259 |
-
checks = [
|
| 260 |
-
("Dependencies", check_dependencies),
|
| 261 |
-
("CSV file", check_csv),
|
| 262 |
-
("FAISS Index", check_index),
|
| 263 |
-
("App imports", test_imports),
|
| 264 |
-
("Analysis logic", test_analysis_logic),
|
| 265 |
-
("RAGService", test_rag_service),
|
| 266 |
-
("API endpoints", test_api_endpoints),
|
| 267 |
-
]
|
| 268 |
-
|
| 269 |
-
results = []
|
| 270 |
-
for name, check_func in checks:
|
| 271 |
-
print(f"\n{name}:")
|
| 272 |
-
print("-" * 60)
|
| 273 |
-
try:
|
| 274 |
-
passed = check_func()
|
| 275 |
-
results.append((name, passed))
|
| 276 |
-
except Exception as e:
|
| 277 |
-
print_status(f"Unexpected error: {e}", "FAIL")
|
| 278 |
-
results.append((name, False))
|
| 279 |
-
import traceback
|
| 280 |
-
traceback.print_exc()
|
| 281 |
-
|
| 282 |
-
# Summary
|
| 283 |
-
print(f"\n{BLUE}{'='*60}")
|
| 284 |
-
print("VALIDATION SUMMARY")
|
| 285 |
-
print(f"{'='*60}{RESET}\n")
|
| 286 |
-
|
| 287 |
-
passed_count = sum(1 for _, p in results if p)
|
| 288 |
-
total_count = len(results)
|
| 289 |
-
|
| 290 |
-
for name, passed in results:
|
| 291 |
-
status = "PASS" if passed else "FAIL"
|
| 292 |
-
color = GREEN if passed else RED
|
| 293 |
-
print(f"{color}[{status}]{RESET} {name}")
|
| 294 |
-
|
| 295 |
-
print(f"\n{'-'*60}")
|
| 296 |
-
if passed_count == total_count:
|
| 297 |
-
print_status(f"All {total_count} checks PASSED! Ready for local testing.", "PASS")
|
| 298 |
-
print("\nNext steps:")
|
| 299 |
-
print(" 1. Run: python run.py")
|
| 300 |
-
print(" 2. Open: http://localhost:8000/docs")
|
| 301 |
-
print(" 3. Or use curl (see QUICK_START.md)")
|
| 302 |
-
sys.exit(0)
|
| 303 |
-
else:
|
| 304 |
-
print_status(
|
| 305 |
-
f"{passed_count}/{total_count} checks passed. "
|
| 306 |
-
f"{total_count - passed_count} checks FAILED.",
|
| 307 |
-
"FAIL"
|
| 308 |
-
)
|
| 309 |
-
print("\nPlease fix the errors above before testing.")
|
| 310 |
-
sys.exit(1)
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
if __name__ == "__main__":
|
| 314 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|