GuestUser33 commited on
Commit
78f88a1
·
verified ·
1 Parent(s): db62194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +590 -172
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, [[[]]]
2
  os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
3
  import glob
4
  import json
@@ -69,44 +69,46 @@ class PersonalizedLearningTracker:
69
  grammar_learned INTEGER DEFAULT 0,
70
  questions_asked INTEGER DEFAULT 0
71
  )
72
- ''')
73
 
74
  cursor.execute('''
75
- CREATE TABLE IF NOT EXISTS word_progress (
76
- id INTEGER PRIMARY KEY AUTOINCREMENT,
77
- user_id TEXT NOT NULL,
78
- word TEXT NOT NULL,
79
- definition TEXT NOT NULL,
80
- category TEXT NOT NULL,
81
- first_encountered TEXT NOT NULL,
82
- last_reviewed TEXT NOT NULL,
83
- encounter_count INTEGER DEFAULT 1,
84
- mastery_level INTEGER DEFAULT 0,
85
- correct_answers INTEGER DEFAULT 0,
86
- total_questions INTEGER DEFAULT 0,
87
- UNIQUE(user_id, word, category)
88
- )
 
 
89
  ''')
90
 
91
  cursor.execute('''
92
- CREATE TABLE IF NOT EXISTS learning_analytics (
93
- id INTEGER PRIMARY KEY AUTOINCREMENT,
94
- user_id TEXT NOT NULL,
95
- date TEXT NOT NULL,
96
- metric_name TEXT NOT NULL,
97
- metric_value REAL NOT NULL
98
- )
99
  ''')
100
 
101
  cursor.execute('''
102
- CREATE TABLE IF NOT EXISTS user_sessions (
103
- user_id TEXT NOT NULL,
104
- session_token TEXT NOT NULL,
105
- created_at TEXT NOT NULL,
106
- last_activity TEXT NOT NULL,
107
- is_active BOOLEAN DEFAULT 1,
108
- PRIMARY KEY (user_id, session_token)
109
- )
110
  ''')
111
 
112
  conn.commit()
@@ -198,66 +200,71 @@ class PersonalizedLearningTracker:
198
  conn.close()
199
 
200
  def track_word_encounter(self, user_id: str, word: str, definition: str, category: str):
201
- """Track when a user encounters a word or idiom"""
202
  conn = sqlite3.connect(self.db_path)
203
  cursor = conn.cursor()
204
 
 
 
205
  cursor.execute('''
206
- SELECT * FROM word_progress
207
  WHERE user_id = ? AND word = ? AND category = ?
208
- ''', (user_id, word, category))
209
 
210
  existing = cursor.fetchone()
211
  now = datetime.now().isoformat()
212
 
213
  if existing:
 
214
  cursor.execute('''
215
  UPDATE word_progress
216
- SET last_reviewed = ?, encounter_count = encounter_count + 1
 
 
 
 
217
  WHERE user_id = ? AND word = ? AND category = ?
218
- ''', (now, user_id, word, category))
 
219
  else:
220
- cursor.execute ('''
221
- INSERT INTO word_progress
222
- (user_id, word, definition, category, first_encountered, last_reviewed)
223
- VALUES (?, ?, ?, ?, ?, ?)
224
- ''', (user_id, word, definition, category, now, now))
 
225
 
 
226
  cursor.execute('''
227
- SELECT encounter_count FROM word_progress
 
228
  WHERE user_id = ? AND word = ? AND category = ?
229
- ''', (user_id, word, category))
230
- encounter_count = cursor.fetchone()[0]
231
-
232
- if encounter_count >= 3:
233
- cursor.execute('''
234
- UPDATE word_progress
235
- SET mastery_level = ?
236
- WHERE user_id = ? AND word = ? AND category = ?
237
- ''', (3, user_id, word, category))
238
 
239
  conn.commit()
240
  conn.close()
241
 
242
  def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
243
- """Update mastery level based on user performance"""
244
  conn = sqlite3.connect(self.db_path)
245
  cursor = conn.cursor()
246
 
247
  cursor.execute('''
248
- SELECT mastery_level, correct_answers, total_questions
249
  FROM word_progress
250
  WHERE user_id = ? AND word = ? AND category = ?
251
  ''', (user_id, word, category))
252
 
253
  result = cursor.fetchone()
254
  if result:
255
- current_mastery, correct_answers, total_questions = result
256
  new_correct = correct_answers + (1 if correct else 0)
257
  new_total = total_questions + 1
258
 
259
- accuracy = new_correct / new_total if new_total > 0 else 0
260
- new_mastery = min(5, int(accuracy * 5) + (1 if new_total >= 3 else 0))
 
 
 
261
 
262
  cursor.execute('''
263
  UPDATE word_progress
@@ -320,41 +327,40 @@ class PersonalizedLearningTracker:
320
  cursor = conn.cursor()
321
 
322
  cursor.execute('''
323
- SELECT word, definition, category, mastery_level, last_reviewed
324
  FROM word_progress
325
- WHERE user_id = ? AND (
326
- mastery_level < 3 OR
327
- last_reviewed < datetime('now', '-2 days')
328
- )
329
  ORDER BY mastery_level ASC, last_reviewed ASC
330
  LIMIT ?
331
  ''', (user_id, limit))
332
 
333
  words = []
334
- for word, definition, category, mastery, last_reviewed in cursor.fetchall():
335
  words.append({
336
  'word': word,
337
  'definition': definition,
338
  'category': category,
339
  'mastery_level': mastery,
340
- 'last_reviewed': last_reviewed
 
341
  })
342
 
343
  conn.close()
344
  return words
345
 
346
- def get_mastered_words(self, user_id: str, limit: int = 10) -> List[Dict]:
347
- """Get words with mastery level greater than 0"""
348
  conn = sqlite3.connect(self.db_path)
349
  cursor = conn.cursor()
350
 
 
351
  cursor.execute('''
352
  SELECT word, definition, category, mastery_level, encounter_count
353
  FROM word_progress
354
- WHERE user_id = ? AND mastery_level > 0
355
  ORDER BY mastery_level DESC, encounter_count DESC
356
- LIMIT ?
357
- ''', (user_id, limit))
358
 
359
  words = []
360
  for word, definition, category, mastery, encounter_count in cursor.fetchall():
@@ -388,6 +394,33 @@ class PersonalizedLearningTracker:
388
  recommendations.append("You haven't practiced recently - consistency is key to language learning!")
389
 
390
  return recommendations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
  class PersonalizedKazakhAssistant:
393
  def __init__(self):
@@ -401,7 +434,9 @@ class PersonalizedKazakhAssistant:
401
 
402
  def setup_environment(self):
403
  """Setup environment and configuration"""
404
- self.google_api_key = os.getenv("GOOGLE_API_KEY")
 
 
405
  self.MODEL = "gemini-1.5-flash"
406
  self.db_name = "vector_db"
407
 
@@ -425,7 +460,6 @@ class PersonalizedKazakhAssistant:
425
  documents.append(doc)
426
 
427
  self.known_terms.clear()
428
- common_words = {'бас', 'сөз', 'адам', 'жол', 'күн', 'су', 'жер', 'қол', 'тұр', 'бер'}
429
  for doc in documents:
430
  doc_type = doc.metadata.get('doc_type', '').lower()
431
  lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
@@ -434,11 +468,7 @@ class PersonalizedKazakhAssistant:
434
  if line and " - " in line:
435
  term = line.split(" - ")[0].strip().lower()
436
 
437
- if term and (
438
- doc_type in ['idioms', 'grammar'] or
439
- (doc_type == 'words' and len(term.split()) > 1) or
440
- term not in common_words
441
- ):
442
  self.known_terms.add(term)
443
 
444
  print(f"Loaded {len(self.known_terms)} known terms: {list(self.known_terms)[:10]}")
@@ -491,9 +521,9 @@ class PersonalizedKazakhAssistant:
491
  return ' '.join(term.lower().strip().split())
492
 
493
  def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
494
- """Extract meaningful Kazakh terms using document metadata to determine category"""
495
  terms = []
496
- seen_terms = set()
497
 
498
  try:
499
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
@@ -502,25 +532,218 @@ class PersonalizedKazakhAssistant:
502
  message_normalized = self.normalize_term(message)
503
 
504
  is_multi_term_query = any(keyword in message_normalized for keyword in ['мысал', 'тіркестер', 'пример'])
505
-
506
- common_words = {'бас', 'сөз', 'адам', 'жол', 'күн', 'су', 'жер', 'қол', 'тұр', 'бер'}
507
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  for known_term in self.known_terms:
509
  normalized_known_term = self.normalize_term(known_term)
510
  if normalized_known_term in response_normalized and normalized_known_term not in seen_terms:
511
-
512
- if normalized_known_term in common_words and not (
513
- normalized_known_term in message_normalized or is_multi_term_query
514
- ):
515
- print(f"Skipped common term: {known_term}")
 
 
 
516
  continue
517
-
518
- if normalized_known_term in message_normalized or any(
519
  normalized_known_term in self.normalize_term(doc.page_content) for doc in retrieved_docs
520
  ):
521
- category = "idiom"
522
  definition = ""
523
-
524
  for doc in retrieved_docs:
525
  if normalized_known_term in self.normalize_term(doc.page_content):
526
  doc_type = doc.metadata.get('doc_type', '').lower()
@@ -532,90 +755,34 @@ class PersonalizedKazakhAssistant:
532
  category = "word"
533
  definition = self.extract_clean_definition(normalized_known_term, doc.page_content, response)
534
  break
535
-
 
536
  if definition and len(normalized_known_term.split()) <= 10:
537
  terms.append((known_term, category, definition))
538
  seen_terms.add(normalized_known_term)
539
- print(f"Added term: {known_term}, category: {category}, definition: {definition}")
540
-
541
- if not is_multi_term_query and normalized_known_term not in message_normalized:
542
  return terms
543
-
544
- if not terms and not is_multi_term_query:
545
- kazakh_phrases = re.findall(
546
- r'[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s\-]+[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*',
547
- response
548
- )
549
-
550
- for phrase in kazakh_phrases:
551
- normalized_phrase = self.normalize_term(phrase)
552
-
553
- if normalized_phrase in seen_terms:
554
- continue
555
-
556
- if len(normalized_phrase) <= 2 or len(normalized_phrase) > 100:
557
- print(f"Skipped phrase {normalized_phrase}: Invalid length")
558
- continue
559
-
560
- skip_words = ['деген', 'деп', 'берілген', 'мәтінде', 'мағынасы', 'дегеннің',
561
- 'түсіндірілген', 'келтірілген', 'болып', 'табылады', 'ауруы',
562
- 'мынадай', 'тақырыбына', 'тіркестер', 'арналған', 'байланысты']
563
-
564
- if any(skip in normalized_phrase for skip in skip_words):
565
- print(f"Skipped phrase {normalized_phrase}: Contains skip word")
566
- continue
567
-
568
- if normalized_phrase in common_words and normalized_phrase not in message_normalized:
569
- print(f"Skipped common phrase: {normalized_phrase}")
570
- continue
571
-
572
- if normalized_phrase not in self.known_terms:
573
- print(f"Warning: {normalized_phrase} not in known_terms, but processing anyway")
574
-
575
- category = "word"
576
- definition = ""
577
-
578
- for doc in retrieved_docs:
579
- if normalized_phrase in self.normalize_term(doc.page_content):
580
- doc_type = doc.metadata.get('doc_type', '').lower()
581
- if 'idiom' in doc_type or 'тіркес' in doc_type:
582
- category = "idiom"
583
- elif 'grammar' in doc_type:
584
- category = "grammar"
585
- else:
586
- category = "word"
587
-
588
- definition = self.extract_clean_definition(normalized_phrase, doc.page_content, response)
589
- break
590
-
591
- if definition and len(normalized_phrase.split()) <= 6:
592
- if not any(normalized_phrase.startswith(q) for q in ['қалай', 'қандай', 'қайда', 'неше', 'қашан']):
593
- terms.append((phrase, category, definition))
594
- seen_terms.add(normalized_phrase)
595
- print(f"Added term: {phrase}, category: {category}, definition: {definition}")
596
- break
597
-
598
  except Exception as e:
599
  print(f"Error extracting terms: {e}")
600
-
601
  return terms
602
 
603
  def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
604
- """Extract clean definition for a term, avoiding storing definitions as terms"""
605
  normalized_term = self.normalize_term(term)
606
 
607
- sentences = response.split('.')
608
- for sentence in sentences:
609
- sentence = sentence.strip()
610
- if normalized_term in self.normalize_term(sentence) and len(sentence) > 10 and len(sentence) < 150:
611
- if not any(word in sentence.lower() for word in ['деген не', 'қалай аталады', 'нені білдіреді']):
612
- return sentence
613
-
614
- doc_sentences = doc_content.split('.')
615
- for sentence in doc_sentences:
616
- sentence = sentence.strip()
617
- if normalized_term in self.normalize_term(sentence) and len(sentence) > 10 and len(sentence) < 150:
618
- return sentence
619
 
620
  return f"Definition for {term}"
621
 
@@ -659,6 +826,20 @@ class PersonalizedKazakhAssistant:
659
  return self.get_review_words(user_id)
660
  elif message.lower().startswith('/mastered'):
661
  return self.get_mastered_words(user_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  elif message.lower().startswith('/help'):
663
  return self.get_help_message()
664
 
@@ -735,26 +916,44 @@ class PersonalizedKazakhAssistant:
735
  response = "📚 **Қайталауға арналған сөздер / Words to Review**:\n\n"
736
  for word_info in words_to_review:
737
  emoji = "📝" if word_info['category'] == "word" else "🎭"
738
- mastery_stars = "⭐" * word_info['mastery_level'] + "☆" * (5 - word_info['mastery_level'])
739
- response += f"{emoji} **{word_info['word']}** - {mastery_stars}\n"
740
 
741
  definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
742
  response += f" {definition_preview}\n\n"
743
 
744
  return response
745
 
746
- def get_mastered_words(self, user_id: str) -> str:
747
- """Get words that have been mastered (mastery level > 0) for specific user"""
748
- mastered_words = self.tracker.get_mastered_words(user_id, 10)
749
 
750
  if not mastered_words:
751
  return "Сізде әзірге меңгерілген сөздер жоқ. Терминдерді қайталауды жалғастырыңыз, сонда олар осында пайда болады! 🌟\n\nYou haven't mastered any words yet. Keep reviewing terms, and they'll appear here! 🌟"
752
 
753
- response = "🏆 **Меңгерілген сөздер / Mastered Words**:\n\n"
754
  for word_info in mastered_words:
755
  emoji = "📝" if word_info['category'] == "word" else "🎭"
 
 
756
 
757
- mastery_stars = "🟊" * word_info['mastery_level'] + "" * (5 - word_info['mastery_level'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  response += f"{emoji} **{word_info['word']}** - {mastery_stars} (Кездесу саны / Encounters: {word_info['encounter_count']})\n"
759
 
760
  definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
@@ -762,6 +961,68 @@ class PersonalizedKazakhAssistant:
762
 
763
  return response
764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  def get_help_message(self) -> str:
766
  """Get help message with available commands"""
767
  return """
@@ -838,7 +1099,7 @@ assistant = PersonalizedKazakhAssistant()
838
  def chat_interface(message, history, use_direct_gemini, target_language):
839
  """Chat interface for Gradio with toggle for direct Gemini mode"""
840
  try:
841
- web_user_id = "web_user_default" # Consistent ID
842
  response = assistant.process_message(message, web_user_id, use_direct_gemini=use_direct_gemini, target_language=target_language)
843
  return response
844
  except Exception as e:
@@ -960,6 +1221,100 @@ def api_mastered_words(user_id: str, session_token: str = None) -> dict:
960
  "error": str(e)
961
  }
962
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
  with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
964
  gr.Markdown("# 🇰🇿 Personalized Kazakh Learning Assistant")
965
  gr.Markdown("### Multi-User Chat Interface + API Endpoints for Mobile Integration")
@@ -1382,6 +1737,9 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1382
  - **Recommendations:** `/api/predict` with `fn_index=3`
1383
  - **Review Words:** `/api/predict` with `fn_index=4`
1384
  - **Mastered Words:** `/api/predict` with `fn_index=5`
 
 
 
1385
  """)
1386
 
1387
  with gr.Row():
@@ -1391,6 +1749,8 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1391
  message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
1392
  use_direct_gemini_api = gr.Checkbox(label="Direct Gemini Mode (No RAG/Tracking)", value=False)
1393
  target_language_api = gr.Dropdown(label="Explanation Language", choices=["English", "Kazakh", "Russian"], value="English")
 
 
1394
 
1395
  with gr.Row():
1396
  login_btn = gr.Button("🔑 Test Login API")
@@ -1399,10 +1759,12 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1399
  recommendations_btn = gr.Button("💡 Test Recommendations API")
1400
  review_btn = gr.Button("📚 Test Review Words API")
1401
  mastered_btn = gr.Button("🏆 Test Mastered Words API")
 
 
 
1402
 
1403
  api_output = gr.JSON(label="API Response")
1404
 
1405
- # Configure API functions as Gradio interfaces (these create the actual API endpoints)
1406
  login_interface = gr.Interface(
1407
  fn=api_login,
1408
  inputs=gr.Textbox(label="User ID"),
@@ -1475,6 +1837,44 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1475
  allow_flagging="never"
1476
  )
1477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1478
  # Connect buttons to test the APIs
1479
  login_btn.click(
1480
  fn=api_login,
@@ -1511,9 +1911,27 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1511
  inputs=[user_id_input, session_token_input],
1512
  outputs=api_output
1513
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1514
 
1515
  if __name__ == "__main__":
1516
  demo.launch(
1517
- show_api=True, # This enables the /api/predict endpoints
1518
  share=False
1519
  )
 
1
+ import os
2
  os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
3
  import glob
4
  import json
 
69
  grammar_learned INTEGER DEFAULT 0,
70
  questions_asked INTEGER DEFAULT 0
71
  )
72
+ ''')
73
 
74
  cursor.execute('''
75
+ CREATE TABLE IF NOT EXISTS word_progress (
76
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
77
+ user_id TEXT NOT NULL,
78
+ word TEXT NOT NULL,
79
+ definition TEXT NOT NULL,
80
+ category TEXT NOT NULL,
81
+ first_encountered TEXT NOT NULL,
82
+ last_reviewed TEXT NOT NULL,
83
+ encounter_count INTEGER DEFAULT 1,
84
+ mastery_level INTEGER DEFAULT 0,
85
+ correct_answers INTEGER DEFAULT 0,
86
+ total_questions INTEGER DEFAULT 0,
87
+ is_shown BOOLEAN DEFAULT 0,
88
+ is_mastered BOOLEAN DEFAULT 0,
89
+ UNIQUE(user_id, word, category)
90
+ )
91
  ''')
92
 
93
  cursor.execute('''
94
+ CREATE TABLE IF NOT EXISTS learning_analytics (
95
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
96
+ user_id TEXT NOT NULL,
97
+ date TEXT NOT NULL,
98
+ metric_name TEXT NOT NULL,
99
+ metric_value REAL NOT NULL
100
+ )
101
  ''')
102
 
103
  cursor.execute('''
104
+ CREATE TABLE IF NOT EXISTS user_sessions (
105
+ user_id TEXT NOT NULL,
106
+ session_token TEXT NOT NULL,
107
+ created_at TEXT NOT NULL,
108
+ last_activity TEXT NOT NULL,
109
+ is_active BOOLEAN DEFAULT 1,
110
+ PRIMARY KEY (user_id, session_token)
111
+ )
112
  ''')
113
 
114
  conn.commit()
 
200
  conn.close()
201
 
202
  def track_word_encounter(self, user_id: str, word: str, definition: str, category: str):
 
203
  conn = sqlite3.connect(self.db_path)
204
  cursor = conn.cursor()
205
 
206
+ normalized_word = word.lower()
207
+
208
  cursor.execute('''
209
+ SELECT word, encounter_count FROM word_progress
210
  WHERE user_id = ? AND word = ? AND category = ?
211
+ ''', (user_id, normalized_word, category))
212
 
213
  existing = cursor.fetchone()
214
  now = datetime.now().isoformat()
215
 
216
  if existing:
217
+ original_word, encounter_count = existing
218
  cursor.execute('''
219
  UPDATE word_progress
220
+ SET last_reviewed = ?,
221
+ encounter_count = encounter_count + 1,
222
+ definition = ?,
223
+ is_shown = 1,
224
+ is_mastered = CASE WHEN encounter_count + 1 >= 5 THEN 1 ELSE 0 END
225
  WHERE user_id = ? AND word = ? AND category = ?
226
+ ''', (now, definition, user_id, original_word, category))
227
+ encounter_count += 1
228
  else:
229
+ cursor.execute('''
230
+ INSERT OR IGNORE INTO word_progress
231
+ (user_id, word, definition, category, first_encountered, last_reviewed, is_shown)
232
+ VALUES (?, ?, ?, ?, ?, ?, ?)
233
+ ''', (user_id, word, definition, category, now, now, 1))
234
+ encounter_count = 1
235
 
236
+ mastery_level = min(5, encounter_count)
237
  cursor.execute('''
238
+ UPDATE word_progress
239
+ SET mastery_level = ?
240
  WHERE user_id = ? AND word = ? AND category = ?
241
+ ''', (mastery_level, user_id, normalized_word, category))
 
 
 
 
 
 
 
 
242
 
243
  conn.commit()
244
  conn.close()
245
 
246
  def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
247
+ """Update mastery level based on user performance for mastered terms"""
248
  conn = sqlite3.connect(self.db_path)
249
  cursor = conn.cursor()
250
 
251
  cursor.execute('''
252
+ SELECT mastery_level, correct_answers, total_questions, encounter_count
253
  FROM word_progress
254
  WHERE user_id = ? AND word = ? AND category = ?
255
  ''', (user_id, word, category))
256
 
257
  result = cursor.fetchone()
258
  if result:
259
+ current_mastery, correct_answers, total_questions, encounter_count = result
260
  new_correct = correct_answers + (1 if correct else 0)
261
  new_total = total_questions + 1
262
 
263
+ if encounter_count >= 5:
264
+
265
+ new_mastery = min(5, (encounter_count - 5) * 0.5)
266
+ else:
267
+ new_mastery = min(5, encounter_count)
268
 
269
  cursor.execute('''
270
  UPDATE word_progress
 
327
  cursor = conn.cursor()
328
 
329
  cursor.execute('''
330
+ SELECT word, definition, category, mastery_level, last_reviewed, encounter_count
331
  FROM word_progress
332
+ WHERE user_id = ? AND is_shown = 1 AND is_mastered = 0
 
 
 
333
  ORDER BY mastery_level ASC, last_reviewed ASC
334
  LIMIT ?
335
  ''', (user_id, limit))
336
 
337
  words = []
338
+ for word, definition, category, mastery, last_reviewed, encounter_count in cursor.fetchall():
339
  words.append({
340
  'word': word,
341
  'definition': definition,
342
  'category': category,
343
  'mastery_level': mastery,
344
+ 'last_reviewed': last_reviewed,
345
+ 'encounter_count': encounter_count
346
  })
347
 
348
  conn.close()
349
  return words
350
 
351
+ def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
352
+ """Get words with is_mastered = 1, with pagination"""
353
  conn = sqlite3.connect(self.db_path)
354
  cursor = conn.cursor()
355
 
356
+ offset = (page - 1) * page_size
357
  cursor.execute('''
358
  SELECT word, definition, category, mastery_level, encounter_count
359
  FROM word_progress
360
+ WHERE user_id = ? AND is_mastered = 1
361
  ORDER BY mastery_level DESC, encounter_count DESC
362
+ LIMIT ? OFFSET ?
363
+ ''', (user_id, page_size, offset))
364
 
365
  words = []
366
  for word, definition, category, mastery, encounter_count in cursor.fetchall():
 
394
  recommendations.append("You haven't practiced recently - consistency is key to language learning!")
395
 
396
  return recommendations
397
+
398
+ def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
399
+ """Get all words and idioms in learning phase, with pagination"""
400
+ conn = sqlite3.connect(self.db_path)
401
+ cursor = conn.cursor()
402
+
403
+ offset = (page - 1) * page_size
404
+ cursor.execute('''
405
+ SELECT word, definition, category, mastery_level, encounter_count
406
+ FROM word_progress
407
+ WHERE user_id = ? AND is_shown = 1 AND is_mastered = 0
408
+ ORDER BY last_reviewed DESC
409
+ LIMIT ? OFFSET ?
410
+ ''', (user_id, page_size, offset))
411
+
412
+ words = []
413
+ for word, definition, category, mastery, encounter_count in cursor.fetchall():
414
+ words.append({
415
+ 'word': word,
416
+ 'definition': definition,
417
+ 'category': category,
418
+ 'mastery_level': mastery,
419
+ 'encounter_count': encounter_count
420
+ })
421
+
422
+ conn.close()
423
+ return words
424
 
425
  class PersonalizedKazakhAssistant:
426
  def __init__(self):
 
434
 
435
  def setup_environment(self):
436
  """Setup environment and configuration"""
437
+ # self.google_api_key = os.getenv("GOOGLE_API_KEY")
438
+ load_dotenv()
439
+ os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")
440
  self.MODEL = "gemini-1.5-flash"
441
  self.db_name = "vector_db"
442
 
 
460
  documents.append(doc)
461
 
462
  self.known_terms.clear()
 
463
  for doc in documents:
464
  doc_type = doc.metadata.get('doc_type', '').lower()
465
  lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
 
468
  if line and " - " in line:
469
  term = line.split(" - ")[0].strip().lower()
470
 
471
+ if term:
 
 
 
 
472
  self.known_terms.add(term)
473
 
474
  print(f"Loaded {len(self.known_terms)} known terms: {list(self.known_terms)[:10]}")
 
521
  return ' '.join(term.lower().strip().split())
522
 
523
  def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
524
+ """Extract meaningful Kazakh terms, prioritizing response terms and full idioms."""
525
  terms = []
526
+ seen_terms = set()
527
 
528
  try:
529
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
 
532
  message_normalized = self.normalize_term(message)
533
 
534
  is_multi_term_query = any(keyword in message_normalized for keyword in ['мысал', 'тіркестер', 'пример'])
535
+ is_definition_query = any(keyword in message_normalized for keyword in ['деген не', 'мағынасы', 'қалай аталады'])
536
+
537
+ # Step 1: For definition queries, prioritize response's primary term
538
+ if is_definition_query and not is_multi_term_query:
539
+ # Check if response is a single word
540
+ response_words = response_normalized.split()
541
+ if len(response_words) == 1:
542
+ term = response.strip()
543
+ normalized_term = self.normalize_term(term)
544
+ if normalized_term in self.known_terms and normalized_term not in seen_terms and len(normalized_term) > 2 and len(normalized_term) <= 100:
545
+ category = "word"
546
+ definition = ""
547
+ for doc in retrieved_docs:
548
+ if normalized_term in self.normalize_term(doc.page_content):
549
+ doc_type = doc.metadata.get('doc_type', '').lower()
550
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
551
+ category = "idiom"
552
+ elif 'grammar' in doc_type:
553
+ category = "grammar"
554
+ else:
555
+ category = "word"
556
+ definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
557
+ break
558
+ if not definition:
559
+ definition = self.extract_clean_definition(normalized_term, "", response)
560
+ if definition:
561
+ terms.append((term, category, definition))
562
+ seen_terms.add(normalized_term)
563
+ print(f"Added single response term: {term}, category: {category}, definition: {definition}")
564
+ return terms
565
+
566
+ # Look for quoted term in response (e.g., "басыр" in "Берілген мәтін бойынша, 'басыр' - көз ауруы")
567
+ quoted_pattern = r'[\'\"]([А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s-][А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*)[\'\"]'
568
+ quoted_matches = re.findall(quoted_pattern, response)
569
+ if quoted_matches:
570
+ term = quoted_matches[0]
571
+ normalized_term = self.normalize_term(term)
572
+ if normalized_term in self.known_terms and normalized_term not in seen_terms and len(normalized_term) > 2 and len(normalized_term) <= 100:
573
+ category = "word"
574
+ definition = ""
575
+ for doc in retrieved_docs:
576
+ if normalized_term in self.normalize_term(doc.page_content):
577
+ doc_type = doc.metadata.get('doc_type', '').lower()
578
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
579
+ category = "idiom"
580
+ elif 'grammar' in doc_type:
581
+ category = "grammar"
582
+ else:
583
+ category = "word"
584
+ definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
585
+ break
586
+ if not definition:
587
+ definition = self.extract_clean_definition(normalized_term, "", response)
588
+ if definition:
589
+ terms.append((term, category, definition))
590
+ seen_terms.add(normalized_term)
591
+ print(f"Added quoted term: {term}, category: {category}, definition: {definition}")
592
+ return terms
593
+
594
+ # Look for term before hyphen (e.g., "басыр — көз ауруы")
595
+ hyphen_pattern = r'^([А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s-][А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*)\s*[-–—]\s*(.+)$'
596
+ hyphen_matches = re.match(hyphen_pattern, response.strip(), re.MULTILINE)
597
+ if hyphen_matches:
598
+ term = hyphen_matches.group(1).strip()
599
+ definition_part = hyphen_matches.group(2).strip()
600
+ normalized_term = self.normalize_term(term)
601
+ if normalized_term in self.known_terms and normalized_term not in seen_terms and len(normalized_term) > 2 and len(normalized_term) <= 100:
602
+ category = "word"
603
+ definition = definition_part
604
+ for doc in retrieved_docs:
605
+ if normalized_term in self.normalize_term(doc.page_content):
606
+ doc_type = doc.metadata.get('doc_type', '').lower()
607
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
608
+ category = "idiom"
609
+ elif 'grammar' in doc_type:
610
+ category = "grammar"
611
+ else:
612
+ category = "word"
613
+ definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
614
+ break
615
+ if not definition:
616
+ definition = definition_part
617
+ if definition:
618
+ terms.append((term, category, definition))
619
+ seen_terms.add(normalized_term)
620
+ print(f"Added hyphen term: {term}, category: {category}, definition: {definition}")
621
+ return terms
622
+
623
+ # Check query term, but only if it’s the primary term in the response
624
+ query_words = message_normalized.split()
625
+ for word in query_words:
626
+ normalized_word = self.normalize_term(word)
627
+ if normalized_word in self.known_terms and normalized_word not in seen_terms:
628
+ # Ensure the query term is the primary term in the response
629
+ sentences = response.split('.')
630
+ for sentence in sentences:
631
+ sentence = sentence.strip()
632
+ if not sentence:
633
+ continue
634
+ if normalized_word in self.normalize_term(sentence):
635
+ category = "word"
636
+ definition = ""
637
+ for doc in retrieved_docs:
638
+ if normalized_word in self.normalize_term(doc.page_content):
639
+ doc_type = doc.metadata.get('doc_type', '').lower()
640
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
641
+ category = "idiom"
642
+ elif 'grammar' in doc_type:
643
+ category = "grammar"
644
+ else:
645
+ category = "word"
646
+ definition = self.extract_clean_definition(normalized_word, doc.page_content, response)
647
+ break
648
+ if not definition:
649
+ definition = self.extract_clean_definition(normalized_word, "", response)
650
+ if definition:
651
+ terms.append((word, category, definition))
652
+ seen_terms.add(normalized_word)
653
+ print(f"Added query term: {word}, category: {category}, definition: {definition}")
654
+ return terms
655
+
656
+ # Fallback to primary term in response (e.g., "абыз" in "Ел атасы данагөйді абыз деп атайды")
657
+ sentences = response.split('.')
658
+ for sentence in sentences:
659
+ sentence = sentence.strip()
660
+ if not sentence:
661
+ continue
662
+ kazakh_phrases = re.findall(
663
+ r'[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s-][А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+){0,2}',
664
+ sentence
665
+ )
666
+ for phrase in kazakh_phrases:
667
+ normalized_phrase = self.normalize_term(phrase)
668
+ if normalized_phrase in seen_terms or len(normalized_phrase) <= 2 or len(normalized_phrase) > 100:
669
+ print(f"Skipped phrase {normalized_phrase}: Invalid length or already seen")
670
+ continue
671
+ if normalized_phrase in self.known_terms and any(
672
+ normalized_phrase in self.normalize_term(doc.page_content) for doc in retrieved_docs
673
+ ):
674
+ category = "word"
675
+ definition = ""
676
+ for doc in retrieved_docs:
677
+ if normalized_phrase in self.normalize_term(doc.page_content):
678
+ doc_type = doc.metadata.get('doc_type', '').lower()
679
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
680
+ category = "idiom"
681
+ elif 'grammar' in doc_type:
682
+ category = "grammar"
683
+ else:
684
+ category = "word"
685
+ definition = self.extract_clean_definition(normalized_phrase, doc.page_content, response)
686
+ break
687
+ if not definition:
688
+ definition = self.extract_clean_definition(normalized_phrase, "", response)
689
+ if definition:
690
+ terms.append((phrase, category, definition))
691
+ seen_terms.add(normalized_phrase)
692
+ print(f"Added phrase: {phrase}, category: {category}, definition: {definition}")
693
+ return terms
694
+
695
+ # Step 2: For multi-term queries, prioritize full idioms from response
696
+ if is_multi_term_query:
697
+ kazakh_phrases = re.findall(
698
+ r'[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s,-]+[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*',
699
+ response
700
+ )
701
+ for phrase in kazakh_phrases:
702
+ normalized_phrase = self.normalize_term(phrase)
703
+ if normalized_phrase in seen_terms or len(normalized_phrase) <= 2 or len(normalized_phrase) > 100:
704
+ print(f"Skipped phrase {normalized_phrase}: Invalid length or already seen")
705
+ continue
706
+ if normalized_phrase in self.known_terms or any(
707
+ normalized_phrase in self.normalize_term(doc.page_content) for doc in retrieved_docs
708
+ ):
709
+ category = "word"
710
+ definition = ""
711
+ for doc in retrieved_docs:
712
+ if normalized_phrase in self.normalize_term(doc.page_content):
713
+ doc_type = doc.metadata.get('doc_type', '').lower()
714
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
715
+ category = "idiom"
716
+ elif 'grammar' in doc_type:
717
+ category = "grammar"
718
+ else:
719
+ category = "word"
720
+ definition = self.extract_clean_definition(normalized_phrase, doc.page_content, response)
721
+ break
722
+ if not definition:
723
+ definition = self.extract_clean_definition(normalized_phrase, "", response)
724
+ if definition and len(normalized_phrase.split()) <= 6:
725
+ terms.append((phrase, category, definition))
726
+ seen_terms.add(normalized_phrase)
727
+ print(f"Added phrase: {phrase}, category: {category}, definition: {definition}")
728
+ return terms
729
+
730
  for known_term in self.known_terms:
731
  normalized_known_term = self.normalize_term(known_term)
732
  if normalized_known_term in response_normalized and normalized_known_term not in seen_terms:
733
+
734
+ is_part_of_idiom = any(
735
+ normalized_known_term in self.normalize_term(idiom) and len(idiom.split()) > 1
736
+ for idiom in self.known_terms
737
+ if idiom != normalized_known_term
738
+ )
739
+ if is_part_of_idiom:
740
+ print(f"Skipped term {known_term}: Part of a larger idiom")
741
  continue
742
+ if normalized_known_term in self.known_terms and any(
 
743
  normalized_known_term in self.normalize_term(doc.page_content) for doc in retrieved_docs
744
  ):
745
+ category = "word"
746
  definition = ""
 
747
  for doc in retrieved_docs:
748
  if normalized_known_term in self.normalize_term(doc.page_content):
749
  doc_type = doc.metadata.get('doc_type', '').lower()
 
755
  category = "word"
756
  definition = self.extract_clean_definition(normalized_known_term, doc.page_content, response)
757
  break
758
+ if not definition:
759
+ definition = self.extract_clean_definition(normalized_known_term, "", response)
760
  if definition and len(normalized_known_term.split()) <= 10:
761
  terms.append((known_term, category, definition))
762
  seen_terms.add(normalized_known_term)
763
+ print(f"Added known term: {known_term}, category: {category}, definition: {definition}")
764
+ if not is_multi_term_query:
 
765
  return terms
766
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  except Exception as e:
768
  print(f"Error extracting terms: {e}")
769
+
770
  return terms
771
 
772
  def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
773
+ """Extract a clean definition for a term from the knowledge base."""
774
  normalized_term = self.normalize_term(term)
775
 
776
+ # Search through retrieved documents for the term's definition
777
+ retrieved_docs = self.vectorstore.similarity_search(term, k=5)
778
+ for doc in retrieved_docs:
779
+ lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
780
+ for line in lines:
781
+ line = line.strip()
782
+ if line and " - " in line:
783
+ doc_term, doc_definition = [part.strip() for part in line.split(" - ", 1)]
784
+ if self.normalize_term(doc_term) == normalized_term:
785
+ return doc_definition
 
 
786
 
787
  return f"Definition for {term}"
788
 
 
826
  return self.get_review_words(user_id)
827
  elif message.lower().startswith('/mastered'):
828
  return self.get_mastered_words(user_id)
829
+ elif message.lower().startswith('/learning'):
830
+ return self.get_learning_words(user_id)
831
+ elif message.lower().startswith('/newword'):
832
+ new_word = self.get_new_word(user_id)
833
+ if not new_word:
834
+ return "Қазір жаңа сөздер жоқ. Басқа сөздерді қайталаңыз! 🌟\n\nNo new words available right now. Review other words! 🌟"
835
+ self.tracker.track_word_encounter(user_id, new_word['word'], new_word['definition'], new_word['category'])
836
+ return f"📝 **Жаңа сөз / New Word**: {new_word['word']}\n\nМағынасы / Meaning: {new_word['definition']}"
837
+ elif message.lower().startswith('/newidiom'):
838
+ new_idiom = self.get_new_idiom(user_id)
839
+ if not new_idiom:
840
+ return "Қазір жаңа тіркестер жоқ. Басқа тіркестерді қайталаңыз! 🌟\n\nNo new idioms available right now. Review other idioms! 🌟"
841
+ self.tracker.track_word_encounter(user_id, new_idiom['word'], new_idiom['definition'], new_idiom['category'])
842
+ return f"🎭 **Жаңа тіркес / New Idiom**: {new_idiom['word']}\n\nМағынасы / Meaning: {new_idiom['definition']}"
843
  elif message.lower().startswith('/help'):
844
  return self.get_help_message()
845
 
 
916
  response = "📚 **Қайталауға арналған сөздер / Words to Review**:\n\n"
917
  for word_info in words_to_review:
918
  emoji = "📝" if word_info['category'] == "word" else "🎭"
919
+ mastery_stars = "⭐" * min(word_info['encounter_count'], 5) + "☆" * (5 - min(word_info['encounter_count'], 5))
920
+ response += f"{emoji} **{word_info['word']}** - {mastery_stars} (Кездесу саны / Encounters: {word_info['encounter_count']})\n"
921
 
922
  definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
923
  response += f" {definition_preview}\n\n"
924
 
925
  return response
926
 
927
+ def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
928
+ """Get words that have been mastered (is_mastered = 1) for specific user"""
929
+ mastered_words = self.tracker.get_mastered_words(user_id, page, page_size)
930
 
931
  if not mastered_words:
932
  return "Сізде әзірге меңгерілген сөздер жоқ. Терминдерді қайталауды жалғастырыңыз, сонда олар осында пайда болады! 🌟\n\nYou haven't mastered any words yet. Keep reviewing terms, and they'll appear here! 🌟"
933
 
934
+ response = f"🏆 **Меңгерілген сөздер / Mastered Words** (Бет / Page: {page}):\n\n"
935
  for word_info in mastered_words:
936
  emoji = "📝" if word_info['category'] == "word" else "🎭"
937
+ mastery_stars = "🟊" * int(word_info['mastery_level'] * 2) + "⬜" * (10 - int(word_info['mastery_level'] * 2))
938
+ response += f"{emoji} **{word_info['word']}** - {mastery_stars} (Кездесу саны / Encounters: {word_info['encounter_count']})\n"
939
 
940
+ definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
941
+ response += f" {definition_preview}\n\n"
942
+
943
+ return response
944
+
945
+ def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
946
+ """Get all words and idioms in learning phase for specific user"""
947
+ learning_words = self.tracker.get_learning_words(user_id, page, page_size)
948
+
949
+ if not learning_words:
950
+ return "Сізде қазір үйрену кезеңінде сөздер жоқ. Жаңа сөздерді немесе тіркестерді сұраңыз! 🌟\n\nYou don't have any words in the learning phase right now. Ask about new words or idioms! 🌟"
951
+
952
+ response = f"📖 **Үйрену кезеңіндегі сөздер / Words in Learning** (Бет / Page: {page}):\n\n"
953
+ for word_info in learning_words:
954
+ emoji = "📝" if word_info['category'] == "word" else "🎭"
955
+
956
+ mastery_stars = "⭐" * min(word_info['encounter_count'], 5) + "☆" * (5 - min(word_info['encounter_count'], 5))
957
  response += f"{emoji} **{word_info['word']}** - {mastery_stars} (Кездесу саны / Encounters: {word_info['encounter_count']})\n"
958
 
959
  definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
 
961
 
962
  return response
963
 
964
+ def get_new_word(self, user_id: str) -> Optional[Dict]:
965
+ """Retrieve a new, unshown word from the knowledge base"""
966
+ conn = sqlite3.connect(self.tracker.db_path)
967
+ cursor = conn.cursor()
968
+
969
+ cursor.execute('''
970
+ SELECT LOWER(word) FROM word_progress
971
+ WHERE user_id = ? AND category = 'word' AND is_shown = 1
972
+ ''', (user_id,))
973
+ shown_words = {row[0] for row in cursor.fetchall()}
974
+ conn.close()
975
+
976
+ for term in sorted(self.known_terms):
977
+ normalized_term = self.normalize_term(term)
978
+ if normalized_term not in shown_words and len(term.split()) == 1:
979
+ retrieved_docs = self.vectorstore.similarity_search(term, k=5)
980
+ for doc in retrieved_docs:
981
+ lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
982
+ for line in lines:
983
+ line = line.strip()
984
+ if line and " - " in line:
985
+ doc_term, doc_definition = [part.strip() for part in line.split(" - ", 1)]
986
+ if self.normalize_term(doc_term) == normalized_term:
987
+ return {
988
+ 'word': doc_term,
989
+ 'definition': doc_definition,
990
+ 'category': 'word'
991
+ }
992
+
993
+ return None
994
+
995
+ def get_new_idiom(self, user_id: str) -> Optional[Dict]:
996
+ """Retrieve a new, unshown idiom from the knowledge base"""
997
+ conn = sqlite3.connect(self.tracker.db_path)
998
+ cursor = conn.cursor()
999
+
1000
+ cursor.execute('''
1001
+ SELECT LOWER(word) FROM word_progress
1002
+ WHERE user_id = ? AND category = 'idiom' AND is_shown = 1
1003
+ ''', (user_id,))
1004
+ shown_idioms = {row[0] for row in cursor.fetchall()}
1005
+ conn.close()
1006
+
1007
+ for term in sorted(self.known_terms):
1008
+ normalized_term = self.normalize_term(term)
1009
+ if normalized_term not in shown_idioms and len(term.split()) > 1:
1010
+ retrieved_docs = self.vectorstore.similarity_search(term, k=5)
1011
+ for doc in retrieved_docs:
1012
+ lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
1013
+ for line in lines:
1014
+ line = line.strip()
1015
+ if line and " - " in line:
1016
+ doc_term, doc_definition = [part.strip() for part in line.split(" - ", 1)]
1017
+ if self.normalize_term(doc_term) == normalized_term:
1018
+ return {
1019
+ 'word': doc_term,
1020
+ 'definition': doc_definition,
1021
+ 'category': 'idiom'
1022
+ }
1023
+
1024
+ return None
1025
+
1026
  def get_help_message(self) -> str:
1027
  """Get help message with available commands"""
1028
  return """
 
1099
  def chat_interface(message, history, use_direct_gemini, target_language):
1100
  """Chat interface for Gradio with toggle for direct Gemini mode"""
1101
  try:
1102
+ web_user_id = "web_user_default"
1103
  response = assistant.process_message(message, web_user_id, use_direct_gemini=use_direct_gemini, target_language=target_language)
1104
  return response
1105
  except Exception as e:
 
1221
  "error": str(e)
1222
  }
1223
 
1224
+ def api_new_word(user_id: str, session_token: str = None) -> dict:
1225
+ """API endpoint to retrieve a new, unshown word"""
1226
+ try:
1227
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
1228
+ return {"success": False, "error": "Invalid session"}
1229
+
1230
+ new_word = assistant.get_new_word(user_id)
1231
+ if not new_word:
1232
+ return {
1233
+ "success": False,
1234
+ "error": "No new words available",
1235
+ "user_id": user_id
1236
+ }
1237
+
1238
+ assistant.tracker.track_word_encounter(
1239
+ user_id,
1240
+ new_word['word'],
1241
+ new_word['definition'],
1242
+ new_word['category']
1243
+ )
1244
+
1245
+ return {
1246
+ "success": True,
1247
+ "word": new_word['word'],
1248
+ "definition": new_word['definition'],
1249
+ "category": new_word['category'],
1250
+ "user_id": user_id
1251
+ }
1252
+ except Exception as e:
1253
+ return {
1254
+ "success": False,
1255
+ "error": str(e),
1256
+ "user_id": user_id
1257
+ }
1258
+
1259
+ def api_new_idiom(user_id: str, session_token: str = None) -> dict:
1260
+ """API endpoint to retrieve a new, unshown idiom"""
1261
+ try:
1262
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
1263
+ return {"success": False, "error": "Invalid session"}
1264
+
1265
+ new_idiom = assistant.get_new_idiom(user_id)
1266
+ if not new_idiom:
1267
+ return {
1268
+ "success": False,
1269
+ "error": "No new idioms available",
1270
+ "user_id": user_id
1271
+ }
1272
+
1273
+ assistant.tracker.track_word_encounter(
1274
+ user_id,
1275
+ new_idiom['word'],
1276
+ new_idiom['definition'],
1277
+ new_idiom['category']
1278
+ )
1279
+
1280
+ return {
1281
+ "success": True,
1282
+ "word": new_idiom['word'],
1283
+ "definition": new_idiom['definition'],
1284
+ "category": new_idiom['category'],
1285
+ "user_id": user_id
1286
+ }
1287
+ except Exception as e:
1288
+ return {
1289
+ "success": False,
1290
+ "error": str(e),
1291
+ "user_id": user_id
1292
+ }
1293
+
1294
+ def api_learning_words(user_id: str, session_token: str = None, page: int = 1, page_size: int = 10) -> dict:
1295
+ """API endpoint for all words in learning phase with pagination"""
1296
+ try:
1297
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
1298
+ return {"success": False, "error": "Invalid session"}
1299
+
1300
+ learning_text = assistant.get_learning_words(user_id, page, page_size)
1301
+ learning_data = assistant.tracker.get_learning_words(user_id, page, page_size)
1302
+
1303
+ return {
1304
+ "success": True,
1305
+ "learning_text": learning_text,
1306
+ "learning_data": learning_data,
1307
+ "user_id": user_id,
1308
+ "page": page,
1309
+ "page_size": page_size
1310
+ }
1311
+ except Exception as e:
1312
+ return {
1313
+ "success": False,
1314
+ "error": str(e),
1315
+ "user_id": user_id
1316
+ }
1317
+
1318
  with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1319
  gr.Markdown("# 🇰🇿 Personalized Kazakh Learning Assistant")
1320
  gr.Markdown("### Multi-User Chat Interface + API Endpoints for Mobile Integration")
 
1737
  - **Recommendations:** `/api/predict` with `fn_index=3`
1738
  - **Review Words:** `/api/predict` with `fn_index=4`
1739
  - **Mastered Words:** `/api/predict` with `fn_index=5`
1740
+ - **New Word:** `/api/predict` with `fn_index=6`
1741
+ - **New Idiom:** `/api/predict` with `fn_index=7`
1742
+ - **Learning Words:** `/api/predict` with `fn_index=8`
1743
  """)
1744
 
1745
  with gr.Row():
 
1749
  message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
1750
  use_direct_gemini_api = gr.Checkbox(label="Direct Gemini Mode (No RAG/Tracking)", value=False)
1751
  target_language_api = gr.Dropdown(label="Explanation Language", choices=["English", "Kazakh", "Russian"], value="English")
1752
+ page_input = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
1753
+ page_size_input = gr.Number(label="Page Size", value=10, minimum=1, precision=0)
1754
 
1755
  with gr.Row():
1756
  login_btn = gr.Button("🔑 Test Login API")
 
1759
  recommendations_btn = gr.Button("💡 Test Recommendations API")
1760
  review_btn = gr.Button("📚 Test Review Words API")
1761
  mastered_btn = gr.Button("🏆 Test Mastered Words API")
1762
+ new_word_btn = gr.Button("📝 Test New Word API")
1763
+ new_idiom_btn = gr.Button("🎭 Test New Idiom API")
1764
+ learning_btn = gr.Button("📖 Test Learning Words API")
1765
 
1766
  api_output = gr.JSON(label="API Response")
1767
 
 
1768
  login_interface = gr.Interface(
1769
  fn=api_login,
1770
  inputs=gr.Textbox(label="User ID"),
 
1837
  allow_flagging="never"
1838
  )
1839
 
1840
+ new_word_interface = gr.Interface(
1841
+ fn=api_new_word,
1842
+ inputs=[
1843
+ gr.Textbox(label="User ID"),
1844
+ gr.Textbox(label="Session Token")
1845
+ ],
1846
+ outputs=gr.JSON(label="Response"),
1847
+ title="New Word API",
1848
+ description="New word endpoint",
1849
+ allow_flagging="never"
1850
+ )
1851
+
1852
+ new_idiom_interface = gr.Interface(
1853
+ fn=api_new_idiom,
1854
+ inputs=[
1855
+ gr.Textbox(label="User ID"),
1856
+ gr.Textbox(label="Session Token")
1857
+ ],
1858
+ outputs=gr.JSON(label="Response"),
1859
+ title="New Idiom API",
1860
+ description="New idiom endpoint",
1861
+ allow_flagging="never"
1862
+ )
1863
+
1864
+ learning_interface = gr.Interface(
1865
+ fn=api_learning_words,
1866
+ inputs=[
1867
+ gr.Textbox(label="User ID"),
1868
+ gr.Textbox(label="Session Token"),
1869
+ gr.Number(label="Page Number"),
1870
+ gr.Number(label="Page Size")
1871
+ ],
1872
+ outputs=gr.JSON(label="Response"),
1873
+ title="Learning Words API",
1874
+ description="Learning words endpoint",
1875
+ allow_flagging="never"
1876
+ )
1877
+
1878
  # Connect buttons to test the APIs
1879
  login_btn.click(
1880
  fn=api_login,
 
1911
  inputs=[user_id_input, session_token_input],
1912
  outputs=api_output
1913
  )
1914
+
1915
+ new_word_btn.click(
1916
+ fn=api_new_word,
1917
+ inputs=[user_id_input, session_token_input],
1918
+ outputs=api_output
1919
+ )
1920
+
1921
+ new_idiom_btn.click(
1922
+ fn=api_new_idiom,
1923
+ inputs=[user_id_input, session_token_input],
1924
+ outputs=api_output
1925
+ )
1926
+
1927
+ learning_btn.click(
1928
+ fn=api_learning_words,
1929
+ inputs=[user_id_input, session_token_input, page_input, page_size_input],
1930
+ outputs=api_output
1931
+ )
1932
 
1933
  if __name__ == "__main__":
1934
  demo.launch(
1935
+ show_api=True,
1936
  share=False
1937
  )