GuestUser33 commited on
Commit
0653775
·
verified ·
1 Parent(s): 24ea7f3

Slight update

Browse files
Files changed (1) hide show
  1. app.py +30 -856
app.py CHANGED
@@ -9,7 +9,6 @@ from dataclasses import dataclass, asdict
9
  from collections import defaultdict
10
  import re
11
  import uuid
12
- import hashlib
13
  import google.generativeai as genai
14
 
15
  from dotenv import load_dotenv
@@ -53,7 +52,6 @@ class PersonalizedLearningTracker:
53
  self.init_database()
54
 
55
  def init_database(self):
56
- """Initialize SQLite database for tracking learning progress"""
57
  conn = sqlite3.connect(self.db_path)
58
  cursor = conn.cursor()
59
 
@@ -114,7 +112,6 @@ class PersonalizedLearningTracker:
114
  conn.close()
115
 
116
  def create_user_session(self, user_id: str) -> str:
117
- """Create a new session token for a user"""
118
  session_token = str(uuid.uuid4())
119
  now = datetime.now().isoformat()
120
 
@@ -138,7 +135,6 @@ class PersonalizedLearningTracker:
138
  return session_token
139
 
140
  def validate_session(self, user_id: str, session_token: str) -> bool:
141
- """Validate if a session is active and belongs to the user"""
142
  conn = sqlite3.connect(self.db_path)
143
  cursor = conn.cursor()
144
 
@@ -153,7 +149,6 @@ class PersonalizedLearningTracker:
153
  return result is not None and result[0] == 1
154
 
155
  def update_session_activity(self, user_id: str, session_token: str):
156
- """Update last activity time for a session"""
157
  conn = sqlite3.connect(self.db_path)
158
  cursor = conn.cursor()
159
 
@@ -167,7 +162,6 @@ class PersonalizedLearningTracker:
167
  conn.close()
168
 
169
  def start_session(self, user_id: str) -> str:
170
- """Start a new learning session"""
171
  session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
172
  session = LearningSession(
173
  session_id=session_id,
@@ -187,7 +181,6 @@ class PersonalizedLearningTracker:
187
  return session_id
188
 
189
  def end_session(self, session_id: str):
190
- """End a learning session"""
191
  conn = sqlite3.connect(self.db_path)
192
  cursor = conn.cursor()
193
  cursor.execute('''
@@ -243,7 +236,6 @@ class PersonalizedLearningTracker:
243
  conn.close()
244
 
245
  def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
246
- """Update mastery level based on user performance for mastered terms"""
247
  conn = sqlite3.connect(self.db_path)
248
  cursor = conn.cursor()
249
 
@@ -275,7 +267,6 @@ class PersonalizedLearningTracker:
275
  conn.close()
276
 
277
  def get_user_progress(self, user_id: str) -> Dict:
278
- """Get comprehensive user progress statistics"""
279
  conn = sqlite3.connect(self.db_path)
280
  cursor = conn.cursor()
281
 
@@ -321,7 +312,6 @@ class PersonalizedLearningTracker:
321
  }
322
 
323
  def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
324
- """Get words that need review based on spaced repetition"""
325
  conn = sqlite3.connect(self.db_path)
326
  cursor = conn.cursor()
327
 
@@ -348,7 +338,6 @@ class PersonalizedLearningTracker:
348
  return words
349
 
350
  def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
351
- """Get words with is_mastered = 1, with pagination"""
352
  conn = sqlite3.connect(self.db_path)
353
  cursor = conn.cursor()
354
 
@@ -375,7 +364,6 @@ class PersonalizedLearningTracker:
375
  return words
376
 
377
  def get_learning_recommendations(self, user_id: str) -> List[str]:
378
- """Get personalized learning recommendations"""
379
  progress = self.get_user_progress(user_id)
380
  recommendations = []
381
 
@@ -395,7 +383,6 @@ class PersonalizedLearningTracker:
395
  return recommendations
396
 
397
  def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
398
- """Get all words and idioms in learning phase, with pagination"""
399
  conn = sqlite3.connect(self.db_path)
400
  cursor = conn.cursor()
401
 
@@ -432,14 +419,12 @@ class PersonalizedKazakhAssistant:
432
  self.user_memories = {}
433
 
434
  def setup_environment(self):
435
- """Setup environment and configuration"""
436
  load_dotenv()
437
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
438
  self.MODEL = "gemini-1.5-flash"
439
  self.db_name = "vector_db"
440
 
441
  def setup_vectorstore(self):
442
- """Setup document loading and vector store"""
443
  folders = glob.glob("knowledge-base/*")
444
  text_loader_kwargs = {'encoding': 'utf-8'}
445
  documents = []
@@ -485,7 +470,6 @@ class PersonalizedKazakhAssistant:
485
  print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
486
 
487
  def setup_llm(self, target_language: str = "English"):
488
- """Setup Gemini model with system prompt formatted with target language"""
489
  self.system_prompt = f"""
490
  You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations. Respond in {target_language}.
491
 
@@ -503,7 +487,7 @@ class PersonalizedKazakhAssistant:
503
  - Always identify the main Kazakh word/idiom for progress tracking.
504
  - **RAG Usage**:
505
  - Use Retrieval-Augmented Generation (RAG) only when the query explicitly asks for explanations of specific Kazakh terms or idioms (e.g., "What does сәлем mean?") or when the context strongly suggests a need for knowledge base information (e.g., queries about specific words or idioms).
506
- - When using RAG to explain terms (e.g., nouns, idioms), limit examples to 3-4 relevant ones. Do not list all or many examples or all matches from the knowledge base if not explicitly asked (only 3,4).
507
  - For general queries (e.g., greetings, procedural questions, or commands like /progress) or grammar-related queries (e.g., "explain me nouns"), rely on your general knowledge and do not use RAG unless the knowledge base contains relevant information.
508
  - Since the knowledge base contains only words and idioms, grammar explanations (e.g., about nouns, verbs) should be provided using your own knowledge, without relying on RAG, unless the query specifically involves terms in the knowledge base.
509
  - Be encouraging and supportive.
@@ -523,7 +507,6 @@ class PersonalizedKazakhAssistant:
523
  )
524
 
525
  def normalize_term(self, term: str) -> str:
526
- """Normalize term by converting to lowercase and removing extra spaces"""
527
  return ' '.join(term.lower().strip().split())
528
 
529
  def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
@@ -536,23 +519,20 @@ class PersonalizedKazakhAssistant:
536
  bold_matches = re.findall(bold_pattern, response)
537
 
538
  for term in bold_matches:
539
- normalized_term = self.normalize_term(term) # Normalize to lowercase
540
  if normalized_term in seen_terms or len(normalized_term) <= 2 or len(normalized_term) > 100:
541
  print(f"Skipped term {normalized_term}: Invalid length or already seen")
542
  continue
543
 
544
- # Initialize category and definition
545
- category = "word" # Default to word
546
  definition = ""
547
  term_matched = False
548
- original_term = term # Preserve original case for tracking
549
 
550
- # Check for exact match in known terms (case-insensitive)
551
  for known_term in self.known_terms:
552
  if normalized_term == self.normalize_term(known_term):
553
  term_matched = True
554
- original_term = known_term # Use the known term's original case
555
- # Determine category based on known term's source
556
  for doc in retrieved_docs:
557
  doc_type = doc.metadata.get('doc_type', '').lower()
558
  if normalized_term in self.normalize_term(doc.page_content):
@@ -564,23 +544,19 @@ class PersonalizedKazakhAssistant:
564
  category = "grammar"
565
  definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
566
  break
567
- # If no document match, check term length for idiom likelihood
568
  if not definition and len(known_term.split()) > 1:
569
  category = "idiom"
570
  definition = self.extract_clean_definition(normalized_term, "", response)
571
  break
572
 
573
- # If no exact match, try fuzzy matching for idioms with suffixes
574
  if not term_matched:
575
  for known_term in self.known_terms:
576
  normalized_known = self.normalize_term(known_term)
577
- # Check if the bolded term is a close match to a known term
578
- # Allow up to 4 extra characters (e.g., grammatical endings)
579
  if (normalized_term.startswith(normalized_known) and
580
  len(normalized_term) <= len(normalized_known) + 4):
581
  term_matched = True
582
- normalized_term = normalized_known # Use the base known term
583
- original_term = known_term # Use the original known term for tracking
584
  for doc in retrieved_docs:
585
  if normalized_known in self.normalize_term(doc.page_content):
586
  doc_type = doc.metadata.get('doc_type', '').lower()
@@ -592,13 +568,11 @@ class PersonalizedKazakhAssistant:
592
  category = "grammar"
593
  definition = self.extract_clean_definition(normalized_known, doc.page_content, response)
594
  break
595
- # If no document match, assume idiom for multi-word terms
596
  if not definition and len(known_term.split()) > 1:
597
  category = "idiom"
598
  definition = self.extract_clean_definition(normalized_known, "", response)
599
  break
600
-
601
- # Additional check: single-word terms from words folder should not be idioms
602
  if term_matched and len(original_term.split()) == 1 and any('words' in doc.metadata.get('doc_type', '').lower() for doc in retrieved_docs):
603
  category = "word"
604
 
@@ -617,10 +591,8 @@ class PersonalizedKazakhAssistant:
617
  return terms
618
 
619
  def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
620
- """Extract a clean definition for a term from the knowledge base."""
621
  normalized_term = self.normalize_term(term)
622
 
623
- # Search through retrieved documents for the term's definition
624
  retrieved_docs = self.vectorstore.similarity_search(term, k=5)
625
  for doc in retrieved_docs:
626
  lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
@@ -634,7 +606,6 @@ class PersonalizedKazakhAssistant:
634
  return f"Definition for {term}"
635
 
636
  def get_user_memory(self, user_id: str):
637
- """Get or create conversation memory for a specific user"""
638
  if user_id not in self.user_memories:
639
  self.user_memories[user_id] = ConversationBufferMemory(
640
  memory_key='chat_history',
@@ -644,7 +615,6 @@ class PersonalizedKazakhAssistant:
644
  return self.user_memories[user_id]
645
 
646
  def get_user_chain(self, user_id: str):
647
- """Get or create conversation chain for a specific user"""
648
  memory = self.get_user_memory(user_id)
649
  retriever = self.vectorstore.as_retriever()
650
  return ConversationalRetrievalChain.from_llm(
@@ -654,8 +624,6 @@ class PersonalizedKazakhAssistant:
654
  )
655
 
656
  def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, target_language: str = "English") -> str:
657
- """Process user message with proper user session management"""
658
-
659
  if session_token and not self.tracker.validate_session(user_id, session_token):
660
  return f"Session expired. Please login again in {target_language}."
661
 
@@ -665,10 +633,8 @@ class PersonalizedKazakhAssistant:
665
  if user_id not in self.user_sessions:
666
  self.user_sessions[user_id] = self.tracker.start_session(user_id)
667
 
668
- # Set up LLM with the specified target language
669
  self.setup_llm(target_language)
670
 
671
- # Handle special commands
672
  if message.lower().startswith('/progress'):
673
  return self.get_progress_report(user_id)
674
  elif message.lower().startswith('/recommendations'):
@@ -694,11 +660,9 @@ class PersonalizedKazakhAssistant:
694
  elif message.lower().startswith('/help'):
695
  return self.get_help_message()
696
 
697
- # Retrieve relevant documents from vectorstore
698
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
699
  context = "\n".join([doc.page_content for doc in retrieved_docs])
700
 
701
- # Get conversation history
702
  memory = self.get_user_memory(user_id)
703
  chat_history = ""
704
  for msg in memory.chat_memory.messages[-10:]:
@@ -707,7 +671,6 @@ class PersonalizedKazakhAssistant:
707
  elif isinstance(msg, AIMessage):
708
  chat_history += f"Assistant: {msg.content}\n"
709
 
710
- # Retrieve user progress from SQLite database
711
  progress = self.tracker.get_user_progress(user_id)
712
  words_to_review = self.tracker.get_words_to_review(user_id, 5)
713
  mastered_words = self.tracker.get_mastered_words(user_id, page=1, page_size=5)
@@ -734,7 +697,6 @@ class PersonalizedKazakhAssistant:
734
  for word in mastered_words])
735
  )
736
 
737
- # Construct prompt with context, history, and progress
738
  full_prompt = f"""
739
  {self.system_prompt}
740
 
@@ -751,14 +713,11 @@ class PersonalizedKazakhAssistant:
751
  Respond in {target_language}. If explaining a Kazakh word or idiom retrieved from the context, **bold** the term (e.g., **күләпара**) in your response to highlight it. Only bold the main term being explained.
752
  """
753
 
754
- # Call Gemini API
755
  response = self.llm.generate_content(full_prompt).text
756
 
757
- # Add to conversation memory
758
  memory.chat_memory.add_user_message(message)
759
  memory.chat_memory.add_ai_message(response)
760
 
761
- # Extract and track terms, ensuring each term is stored only once per response
762
  extracted_terms = self.extract_kazakh_terms(message, response)
763
  unique_terms = {}
764
  for term, category, definition in extracted_terms:
@@ -772,7 +731,6 @@ class PersonalizedKazakhAssistant:
772
  return response
773
 
774
  def get_progress_report(self, user_id: str) -> str:
775
- """Generate a comprehensive progress report for specific user"""
776
  progress = self.tracker.get_user_progress(user_id)
777
 
778
  if progress['total_words'] == 0:
@@ -802,7 +760,6 @@ class PersonalizedKazakhAssistant:
802
  return report
803
 
804
  def get_recommendations(self, user_id: str) -> str:
805
- """Get personalized learning recommendations for specific user"""
806
  recommendations = self.tracker.get_learning_recommendations(user_id)
807
 
808
  if not recommendations:
@@ -815,7 +772,6 @@ class PersonalizedKazakhAssistant:
815
  return response
816
 
817
  def get_review_words(self, user_id: str) -> str:
818
- """Get words that need review for specific user"""
819
  words_to_review = self.tracker.get_words_to_review(user_id, 10)
820
 
821
  if not words_to_review:
@@ -833,7 +789,6 @@ class PersonalizedKazakhAssistant:
833
  return response
834
 
835
  def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
836
- """Get words that have been mastered (is_mastered = 1) for specific user"""
837
  mastered_words = self.tracker.get_mastered_words(user_id, page, page_size)
838
 
839
  if not mastered_words:
@@ -851,7 +806,6 @@ class PersonalizedKazakhAssistant:
851
  return response
852
 
853
  def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
854
- """Get all words and idioms in learning phase for specific user"""
855
  learning_words = self.tracker.get_learning_words(user_id, page, page_size)
856
 
857
  if not learning_words:
@@ -870,7 +824,6 @@ class PersonalizedKazakhAssistant:
870
  return response
871
 
872
  def get_new_word(self, user_id: str) -> Optional[Dict]:
873
- """Retrieve a new, unshown word from the knowledge base"""
874
  conn = sqlite3.connect(self.tracker.db_path)
875
  cursor = conn.cursor()
876
 
@@ -901,7 +854,6 @@ class PersonalizedKazakhAssistant:
901
  return None
902
 
903
  def get_new_idiom(self, user_id: str) -> Optional[Dict]:
904
- """Retrieve a new, unshown idiom from the knowledge base"""
905
  conn = sqlite3.connect(self.tracker.db_path)
906
  cursor = conn.cursor()
907
 
@@ -934,31 +886,30 @@ class PersonalizedKazakhAssistant:
934
  def get_help_message(self) -> str:
935
  """Get help message with available commands"""
936
  return """
937
- 🎓 **Kazakh Learning Assistant Help**
938
-
939
- **Available Commands**:
940
- - `/progress` - View your detailed learning progress
941
- - `/recommendations` - Get personalized learning suggestions
942
- - `/review` - See words that need review
943
- - `/mastered` - See words you've mastered (mastery level > 0)
944
- - `/help` - Show this help message
945
-
946
- **How to Use**:
947
- - Ask about any Kazakh word or idiom for definitions and examples
948
- - Your progress is automatically tracked as you learn
949
- - Regular practice improves your mastery levels
950
- - Use commands to monitor your learning journey
951
-
952
- **Examples**:
953
- - "What does 'сәлем' mean?"
954
- - "Tell me about Kazakh idioms"
955
- - "How do you say 'thank you' in Kazakh?"
956
-
957
- Start learning by asking about any Kazakh term! 🌟
958
- """
959
 
960
  def login_user(self, user_id: str) -> str:
961
- """Create a session token for user authentication"""
962
  session_token = self.tracker.create_user_session(user_id)
963
  return session_token
964
 
@@ -966,7 +917,6 @@ Start learning by asking about any Kazakh term! 🌟
966
  assistant = PersonalizedKazakhAssistant()
967
 
968
  def chat_interface(message, history, target_language):
969
- """Chat interface for Gradio"""
970
  try:
971
  web_user_id = "web_user_default"
972
  response = assistant.process_message(message, web_user_id, target_language=target_language)
@@ -975,7 +925,6 @@ def chat_interface(message, history, target_language):
975
  return f"Sorry, I encountered an error: {str(e)}. Please try again."
976
 
977
  def api_login(user_id: str) -> dict:
978
- """API endpoint for user login/session creation"""
979
  try:
980
  session_token = assistant.login_user(user_id)
981
  return {
@@ -991,7 +940,6 @@ def api_login(user_id: str) -> dict:
991
  }
992
 
993
  def api_chat(message: str, user_id: str, session_token: str = None, target_language: str = "English") -> dict:
994
- """API endpoint for chat functionality with proper user session"""
995
  try:
996
  response = assistant.process_message(message, user_id, session_token, target_language)
997
  return {
@@ -1007,7 +955,6 @@ def api_chat(message: str, user_id: str, session_token: str = None, target_langu
1007
  }
1008
 
1009
  def api_progress(user_id: str, session_token: str = None) -> dict:
1010
- """API endpoint for user progress with session validation"""
1011
  try:
1012
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1013
  return {"success": False, "error": "Invalid session"}
@@ -1028,7 +975,6 @@ def api_progress(user_id: str, session_token: str = None) -> dict:
1028
  }
1029
 
1030
  def api_recommendations(user_id: str, session_token: str = None) -> dict:
1031
- """API endpoint for learning recommendations with session validation"""
1032
  try:
1033
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1034
  return {"success": False, "error": "Invalid session"}
@@ -1049,7 +995,6 @@ def api_recommendations(user_id: str, session_token: str = None) -> dict:
1049
  }
1050
 
1051
  def api_review_words(user_id: str, session_token: str = None) -> dict:
1052
- """API endpoint for words to review with session validation"""
1053
  try:
1054
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1055
  return {"success": False, "error": "Invalid session"}
@@ -1070,7 +1015,6 @@ def api_review_words(user_id: str, session_token: str = None) -> dict:
1070
  }
1071
 
1072
  def api_mastered_words(user_id: str, session_token: str = None) -> dict:
1073
- """API endpoint for mastered words with session validation"""
1074
  try:
1075
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1076
  return {"success": False, "error": "Invalid session"}
@@ -1091,7 +1035,6 @@ def api_mastered_words(user_id: str, session_token: str = None) -> dict:
1091
  }
1092
 
1093
  def api_new_word(user_id: str, session_token: str = None) -> dict:
1094
- """API endpoint to retrieve a new, unshown word"""
1095
  try:
1096
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1097
  return {"success": False, "error": "Invalid session"}
@@ -1126,7 +1069,6 @@ def api_new_word(user_id: str, session_token: str = None) -> dict:
1126
  }
1127
 
1128
  def api_new_idiom(user_id: str, session_token: str = None) -> dict:
1129
- """API endpoint to retrieve a new, unshown idiom"""
1130
  try:
1131
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1132
  return {"success": False, "error": "Invalid session"}
@@ -1161,7 +1103,6 @@ def api_new_idiom(user_id: str, session_token: str = None) -> dict:
1161
  }
1162
 
1163
  def api_learning_words(user_id: str, session_token: str = None, page: int = 1, page_size: int = 10) -> dict:
1164
- """API endpoint for all words in learning phase with pagination"""
1165
  try:
1166
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1167
  return {"success": False, "error": "Invalid session"}
@@ -1212,773 +1153,6 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1212
  ]
1213
  )
1214
 
1215
- with gr.Tab("📖 API Documentation"):
1216
- gr.Markdown("""
1217
- ## API Endpoints for Flutter Integration
1218
- ### Base URL: `https://huggingface.co/spaces/GuestUser33/kazakh-learning-api`
1219
-
1220
- ### Authentication Flow:
1221
- 1. **Login** to get a session token
1222
- 2. **Use session token** for subsequent API calls
1223
- 3. **Session tokens expire** after inactivity
1224
-
1225
- ### Available Endpoints:
1226
-
1227
- #### 1. Login API
1228
- ```
1229
- POST /api/predict
1230
- Content-Type: application/json
1231
-
1232
- {
1233
- "data": ["user_id"],
1234
- "fn_index": 0
1235
- }
1236
- ```
1237
- **Response**:
1238
- ```json
1239
- {
1240
- "data": [
1241
- {
1242
- "success": true,
1243
- "session_token": "uuid-string",
1244
- "user_id": "user_id",
1245
- "message": "Login successful"
1246
- }
1247
- ]
1248
- }
1249
- ```
1250
-
1251
- #### 2. Chat API
1252
- ```
1253
- POST /api/predict
1254
- Content-Type: application/json
1255
-
1256
- {
1257
- "data": ["message", "user_id", "session_token", "English"],
1258
- "fn_index": 1
1259
- }
1260
- ```
1261
- **Parameters**:
1262
- - `message`: The user's query (e.g., "сәлем деген не?" or "/progress")
1263
- - `user_id`: Unique identifier for the user
1264
- - `session_token`: Session token from login (use empty string "" if no token)
1265
- - `target_language`: Language for responses ("English", "Kazakh", or "Russian")
1266
-
1267
- **Response**:
1268
- ```json
1269
- {
1270
- "data": [
1271
- {
1272
- "success": true,
1273
- "response": "response_text",
1274
- "user_id": "user_id"
1275
- }
1276
- ]
1277
- }
1278
- ```
1279
-
1280
- #### 3. Progress API
1281
- ```
1282
- POST /api/predict
1283
- Content-Type: application/json
1284
-
1285
- {
1286
- "data": ["user_id", "session_token"],
1287
- "fn_index": 2
1288
- }
1289
- ```
1290
- **Response**:
1291
- ```json
1292
- {
1293
- "data": [
1294
- {
1295
- "success": true,
1296
- "progress_text": "progress_report",
1297
- "progress_data": {
1298
- "category_stats": {
1299
- "word": {"count": number, "average_mastery": number},
1300
- "idiom": {"count": number, "average_mastery": number}
1301
- },
1302
- "recent_activity": number,
1303
- "daily_activity": [{"date": "YYYY-MM-DD", "daily_count": number}, ...],
1304
- "total_words": number
1305
- },
1306
- "user_id": "user_id"
1307
- }
1308
- ]
1309
- }
1310
- ```
1311
-
1312
- #### 4. Recommendations API
1313
- ```
1314
- POST /api/predict
1315
- Content-Type: application/json
1316
-
1317
- {
1318
- "data": ["user_id", "session_token"],
1319
- "fn_index": 3
1320
- }
1321
- ```
1322
- **Response**:
1323
- ```json
1324
- {
1325
- "data": [
1326
- {
1327
- "success": true,
1328
- "recommendations_text": "recommendations",
1329
- "recommendations_list": ["recommendation1", "recommendation2", ...],
1330
- "user_id": "user_id"
1331
- }
1332
- ]
1333
- }
1334
- ```
1335
-
1336
- #### 5. Review Words API
1337
- ```
1338
- POST /api/predict
1339
- Content-Type: application/json
1340
-
1341
- {
1342
- "data": ["user_id", "session_token"],
1343
- "fn_index": 4
1344
- }
1345
- ```
1346
- **Response**:
1347
- ```json
1348
- {
1349
- "data": [
1350
- {
1351
- "success": true,
1352
- "review_text": "review_words",
1353
- "review_data": [
1354
- {
1355
- "word": "word",
1356
- "definition": "definition",
1357
- "category": "word|idiom",
1358
- "mastery_level": number,
1359
- "last_reviewed": "YYYY-MM-DDTHH:MM:SS",
1360
- "encounter_count": number
1361
- },
1362
- ...
1363
- ],
1364
- "user_id": "user_id"
1365
- }
1366
- ]
1367
- }
1368
- ```
1369
-
1370
- #### 6. Mastered Words API
1371
- ```
1372
- POST /api/predict
1373
- Content-Type: application/json
1374
-
1375
- {
1376
- "data": ["user_id", "session_token"],
1377
- "fn_index": 5
1378
- }
1379
- ```
1380
- **Response**:
1381
- ```json
1382
- {
1383
- "data": [
1384
- {
1385
- "success": true,
1386
- "mastered_text": "mastered_words",
1387
- "mastered_data": [
1388
- {
1389
- "word": "word",
1390
- "definition": "definition",
1391
- "category": "word|idiom",
1392
- "mastery_level": number,
1393
- "encounter_count": number
1394
- },
1395
- ...
1396
- ],
1397
- "user_id": "user_id"
1398
- }
1399
- ]
1400
- }
1401
- ```
1402
-
1403
- #### 7. New Word API
1404
- ```
1405
- POST /api/predict
1406
- Content-Type: application/json
1407
-
1408
- {
1409
- "data": ["user_id", "session_token"],
1410
- "fn_index": 6
1411
- }
1412
- ```
1413
- **Response**:
1414
- ```json
1415
- {
1416
- "data": [
1417
- {
1418
- "success": true,
1419
- "word": "new_word",
1420
- "definition": "definition",
1421
- "category": "word",
1422
- "user_id": "user_id"
1423
- }
1424
- ]
1425
- }
1426
- ```
1427
-
1428
- #### 8. New Idiom API
1429
- ```
1430
- POST /api/predict
1431
- Content-Type: application/json
1432
-
1433
- {
1434
- "data": ["user_id", "session_token"],
1435
- "fn_index": 7
1436
- }
1437
- ```
1438
- **Response**:
1439
- ```json
1440
- {
1441
- "data": [
1442
- {
1443
- "success": true,
1444
- "word": "new_idiom",
1445
- "definition": "definition",
1446
- "category": "idiom",
1447
- "user_id": "user_id"
1448
- }
1449
- ]
1450
- }
1451
- ```
1452
-
1453
- #### 9. Learning Words API
1454
- ```
1455
- POST /api/predict
1456
- Content-Type: application/json
1457
-
1458
- {
1459
- "data": ["user_id", "session_token", page, page_size],
1460
- "fn_index": 8
1461
- }
1462
- ```
1463
- **Parameters**:
1464
- - `user_id`: Unique identifier for the user
1465
- - `session_token`: Session token from login (use empty string "" if no token)
1466
- - `page`: Page number for pagination (default: 1)
1467
- - `page_size`: Number of items per page (default: 10)
1468
-
1469
- **Response**:
1470
- ```json
1471
- {
1472
- "data": [
1473
- {
1474
- "success": true,
1475
- "learning_text": "learning_words",
1476
- "learning_data": [
1477
- {
1478
- "word": "word",
1479
- "definition": "definition",
1480
- "category": "word|idiom",
1481
- "mastery_level": number,
1482
- "encounter_count": number
1483
- },
1484
- ...
1485
- ],
1486
- "user_id": "user_id",
1487
- "page": number,
1488
- "page_size": number
1489
- }
1490
- ]
1491
- }
1492
- ```
1493
-
1494
- ### Flutter Integration Example:
1495
- ```dart
1496
- import 'dart:convert';
1497
- import 'package:http/http.dart' as http;
1498
-
1499
- class KazakhLearningAPI {
1500
- static const String baseUrl = 'https://huggingface.co/spaces/GuestUser33/kazakh-learning-api';
1501
- String? sessionToken;
1502
- String? userId;
1503
-
1504
- // Login and get session token
1505
- Future<bool> login(String userId) async {
1506
- try {
1507
- final response = await http.post(
1508
- Uri.parse('$baseUrl/api/predict'),
1509
- headers: {'Content-Type': 'application/json'},
1510
- body: jsonEncode({
1511
- 'data': [userId],
1512
- 'fn_index': 0
1513
- }),
1514
- );
1515
-
1516
- if (response.statusCode == 200) {
1517
- final result = jsonDecode(response.body);
1518
- if (result['data'] != null && result['data'][0]['success'] == true) {
1519
- this.userId = userId;
1520
- this.sessionToken = result['data'][0]['session_token'];
1521
- return true;
1522
- }
1523
- }
1524
- } catch (e) {
1525
- print('Login error: $e');
1526
- }
1527
- return false;
1528
- }
1529
-
1530
- // Send chat message
1531
- Future<String?> sendMessage(String message, {String targetLanguage = 'English'}) async {
1532
- if (userId == null) return null;
1533
-
1534
- try {
1535
- final response = await http.post(
1536
- Uri.parse('$baseUrl/api/predict'),
1537
- headers: {'Content-Type': 'application/json'},
1538
- body: jsonEncode({
1539
- 'data': [message, userId, sessionToken ?? "", targetLanguage],
1540
- 'fn_index': 1
1541
- }),
1542
- );
1543
-
1544
- if (response.statusCode == 200) {
1545
- final result = jsonDecode(response.body);
1546
- if (result['data'] != null && result['data'][0]['success'] == true) {
1547
- return result['data'][0]['response'];
1548
- }
1549
- }
1550
- } catch (e) {
1551
- print('Send message error: $e');
1552
- }
1553
- return null;
1554
- }
1555
-
1556
- // Get user progress
1557
- Future<Map?> getProgress() async {
1558
- if (userId == null) return null;
1559
-
1560
- try {
1561
- final response = await http.post(
1562
- Uri.parse('$baseUrl/api/predict'),
1563
- headers: {'Content-Type': 'application/json'},
1564
- body: jsonEncode({
1565
- 'data': [userId, sessionToken ?? ""],
1566
- 'fn_index': 2
1567
- }),
1568
- );
1569
-
1570
- if (response.statusCode == 200) {
1571
- final result = jsonDecode(response.body);
1572
- if (result['data'] != null && result['data'][0]['success'] == true) {
1573
- return result['data'][0]['progress_data'];
1574
- }
1575
- }
1576
- } catch (e) {
1577
- print('Get progress error: $e');
1578
- }
1579
- return null;
1580
- }
1581
-
1582
- // Get recommendations
1583
- Future<List?> getRecommendations() async {
1584
- if (userId == null) return null;
1585
-
1586
- try {
1587
- final response = await http.post(
1588
- Uri.parse('$baseUrl/api/predict'),
1589
- headers: {'Content-Type': 'application/json'},
1590
- body: jsonEncode({
1591
- 'data': [userId, sessionToken ?? ""],
1592
- 'fn_index': 3
1593
- }),
1594
- );
1595
-
1596
- if (response.statusCode == 200) {
1597
- final result = jsonDecode(response.body);
1598
- if (result['data'] != null && result['data'][0]['success'] == true) {
1599
- return List.from(result['data'][0]['recommendations_list'] ?? []);
1600
- }
1601
- }
1602
- } catch (e) {
1603
- print('Get recommendations error: $e');
1604
- }
1605
- return null;
1606
- }
1607
-
1608
- // Get words to review
1609
- Future<List?> getReviewWords() async {
1610
- if (userId == null) return null;
1611
-
1612
- try {
1613
- final response = await http.post(
1614
- Uri.parse('$baseUrl/api/predict'),
1615
- headers: {'Content-Type': 'application/json'},
1616
- body: jsonEncode({
1617
- 'data': [userId, sessionToken ?? ""],
1618
- 'fn_index': 4
1619
- }),
1620
- );
1621
-
1622
- if (response.statusCode == 200) {
1623
- final result = jsonDecode(response.body);
1624
- if (result['data'] != null && result['data'][0]['success'] == true) {
1625
- return result['data'][0]['review_data'];
1626
- }
1627
- }
1628
- } catch (e) {
1629
- print('Get review words error: $e');
1630
- }
1631
- return null;
1632
- }
1633
-
1634
- // Get mastered words
1635
- Future<List?> getMasteredWords() async {
1636
- if (userId == null) return null;
1637
-
1638
- try {
1639
- final response = await http.post(
1640
- Uri.parse('$baseUrl/api/predict'),
1641
- headers: {'Content-Type': 'application/json'},
1642
- body: jsonEncode({
1643
- 'data': [userId, sessionToken ?? ""],
1644
- 'fn_index': 5
1645
- }),
1646
- );
1647
-
1648
- if (response.statusCode == 200) {
1649
- final result = jsonDecode(response.body);
1650
- if (result['data'] != null && result['data'][0]['success'] == true) {
1651
- return result['data'][0]['mastered_data'];
1652
- }
1653
- }
1654
- } catch (e) {
1655
- print('Get mastered words error: $e');
1656
- }
1657
- return null;
1658
- }
1659
-
1660
- // Get new word
1661
- Future<Map?> getNewWord() async {
1662
- if (userId == null) return null;
1663
-
1664
- try {
1665
- final response = await http.post(
1666
- Uri.parse('$baseUrl/api/predict'),
1667
- headers: {'Content-Type': 'application/json'},
1668
- body: jsonEncode({
1669
- 'data': [userId, sessionToken ?? ""],
1670
- 'fn_index': 6
1671
- }),
1672
- );
1673
-
1674
- if (response.statusCode == 200) {
1675
- final result = jsonDecode(response.body);
1676
- if (result['data'] != null && result['data'][0]['success'] == true) {
1677
- return result['data'][0];
1678
- }
1679
- }
1680
- } catch (e) {
1681
- print('Get new word error: $e');
1682
- }
1683
- return null;
1684
- }
1685
-
1686
- // Get new idiom
1687
- Future<Map?> getNewIdiom() async {
1688
- if (userId == null) return null;
1689
-
1690
- try {
1691
- final response = await http.post(
1692
- Uri.parse('$baseUrl/api/predict'),
1693
- headers: {'Content-Type': 'application/json'},
1694
- body: jsonEncode({
1695
- 'data': [userId, sessionToken ?? ""],
1696
- 'fn_index': 7
1697
- }),
1698
- );
1699
-
1700
- if (response.statusCode == 200) {
1701
- final result = jsonDecode(response.body);
1702
- if (result['data'] != null && result['data'][0]['success'] == true) {
1703
- return result['data'][0];
1704
- }
1705
- }
1706
- } catch (e) {
1707
- print('Get new idiom error: $e');
1708
- }
1709
- return null;
1710
- }
1711
-
1712
- // Get learning words
1713
- Future<Map?> getLearningWords({int page = 1, int pageSize = 10}) async {
1714
- if (userId == null) return null;
1715
-
1716
- try {
1717
- final response = await http.post(
1718
- Uri.parse('$baseUrl/api/predict'),
1719
- headers: {'Content-Type': 'application/json'},
1720
- body: jsonEncode({
1721
- 'data': [userId, sessionToken ?? "", page, pageSize],
1722
- 'fn_index': 8
1723
- }),
1724
- );
1725
-
1726
- if (response.statusCode == 200) {
1727
- final result = jsonDecode(response.body);
1728
- if (result['data'] != null && result['data'][0]['success'] == true) {
1729
- return result['data'][0];
1730
- }
1731
- }
1732
- } catch (e) {
1733
- print('Get learning words error: $e');
1734
- }
1735
- return null;
1736
- }
1737
-
1738
- // Helper method to check if session is valid
1739
- bool get isLoggedIn => userId != null;
1740
-
1741
- // Logout method
1742
- void logout() {
1743
- userId = null;
1744
- sessionToken = null;
1745
- }
1746
- }
1747
- ```
1748
-
1749
- ### Key Features:
1750
- - ✅ **Multi-User Support**: Each user has separate learning progress
1751
- - ✅ **Session Management**: Secure session tokens for authentication
1752
- - ✅ **Personalized Tracking**: Individual progress tracking per user using RAG model
1753
- - ✅ **Multi-Language Support**: Responses in English, Kazakh, or Russian
1754
- - ✅ **API Ready**: All endpoints ready for mobile app integration
1755
- - ✅ **Session Validation**: Automatic session validation and expiry
1756
-
1757
- ### Usage Notes:
1758
- - Always call **login** first to get a session token
1759
- - Use **empty string ""** for session_token if no token is available
1760
- - Specify `target_language` ("English", "Kazakh", "Russian") for responses
1761
- - Handle **session expiry** by re-logging in
1762
- - Use **unique user_id** for each user (e.g., email, username)
1763
- - Commands like `/progress`, `/recommendations`, `/review`, `/mastered`, `/newword`, `/newidiom`, `/learning`, `/help` are supported
1764
- - **Error handling** is crucial - always check for success field and handle exceptions
1765
-
1766
- ### Error Handling:
1767
- All API responses include a `success` field. If `success: false`, check the `error` field for details:
1768
- ```json
1769
- {
1770
- "data": [
1771
- {
1772
- "success": false,
1773
- "error": "Error message here"
1774
- }
1775
- ]
1776
- }
1777
- """
1778
- )
1779
-
1780
- with gr.Tab("🔌 API Testing"):
1781
- gr.Markdown("## Test API Endpoints")
1782
- gr.Markdown("### Use these endpoints programmatically:")
1783
- gr.Markdown("""
1784
- **API Endpoints:**
1785
- - **Login:** `/api/predict` with `fn_index=0`
1786
- - **Chat:** `/api/predict` with `fn_index=1`
1787
- - **Progress:** `/api/predict` with `fn_index=2`
1788
- - **Recommendations:** `/api/predict` with `fn_index=3`
1789
- - **Review Words:** `/api/predict` with `fn_index=4`
1790
- - **Mastered Words:** `/api/predict` with `fn_index=5`
1791
- - **New Word:** `/api/predict` with `fn_index=6`
1792
- - **New Idiom:** `/api/predict` with `fn_index=7`
1793
- - **Learning Words:** `/api/predict` with `fn_index=8`
1794
- """)
1795
-
1796
- with gr.Row():
1797
- with gr.Column():
1798
- user_id_input = gr.Textbox(label="User ID", value="test_user", placeholder="Enter unique user ID")
1799
- session_token_input = gr.Textbox(label="Session Token", placeholder="Session token (get from login)")
1800
- message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
1801
- target_language_api = gr.Dropdown(label="Explanation Language", choices=["English", "Kazakh", "Russian"], value="English")
1802
- page_input = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
1803
- page_size_input = gr.Number(label="Page Size", value=10, minimum=1, precision=0)
1804
-
1805
- with gr.Row():
1806
- login_btn = gr.Button("🔑 Test Login API")
1807
- chat_btn = gr.Button("💬 Test Chat API")
1808
- progress_btn = gr.Button("📊 Test Progress API")
1809
- recommendations_btn = gr.Button("💡 Test Recommendations API")
1810
- review_btn = gr.Button("📚 Test Review Words API")
1811
- mastered_btn = gr.Button("🏆 Test Mastered Words API")
1812
- new_word_btn = gr.Button("📝 Test New Word API")
1813
- new_idiom_btn = gr.Button("🎭 Test New Idiom API")
1814
- learning_btn = gr.Button("📖 Test Learning Words API")
1815
-
1816
- api_output = gr.JSON(label="API Response")
1817
-
1818
- login_interface = gr.Interface(
1819
- fn=api_login,
1820
- inputs=gr.Textbox(label="User ID"),
1821
- outputs=gr.JSON(label="Response"),
1822
- title="Login API",
1823
- description="Login endpoint",
1824
- allow_flagging="never"
1825
- )
1826
-
1827
- chat_api_interface = gr.Interface(
1828
- fn=api_chat,
1829
- inputs=[
1830
- gr.Textbox(label="Message"),
1831
- gr.Textbox(label="User ID"),
1832
- gr.Textbox(label="Session Token"),
1833
- gr.Dropdown(label="Target Language", choices=["English", "Kazakh", "Russian"])
1834
- ],
1835
- outputs=gr.JSON(label="Response"),
1836
- title="Chat API",
1837
- description="Chat endpoint",
1838
- allow_flagging="never"
1839
- )
1840
-
1841
- progress_interface = gr.Interface(
1842
- fn=api_progress,
1843
- inputs=[
1844
- gr.Textbox(label="User ID"),
1845
- gr.Textbox(label="Session Token")
1846
- ],
1847
- outputs=gr.JSON(label="Response"),
1848
- title="Progress API",
1849
- description="Progress endpoint",
1850
- allow_flagging="never"
1851
- )
1852
-
1853
- recommendations_interface = gr.Interface(
1854
- fn=api_recommendations,
1855
- inputs=[
1856
- gr.Textbox(label="User ID"),
1857
- gr.Textbox(label="Session Token")
1858
- ],
1859
- outputs=gr.JSON(label="Response"),
1860
- title="Recommendations API",
1861
- description="Recommendations endpoint",
1862
- allow_flagging="never"
1863
- )
1864
-
1865
- review_interface = gr.Interface(
1866
- fn=api_review_words,
1867
- inputs=[
1868
- gr.Textbox(label="User ID"),
1869
- gr.Textbox(label="Session Token")
1870
- ],
1871
- outputs=gr.JSON(label="Response"),
1872
- title="Review Words API",
1873
- description="Review words endpoint",
1874
- allow_flagging="never"
1875
- )
1876
-
1877
- mastered_interface = gr.Interface(
1878
- fn=api_mastered_words,
1879
- inputs=[
1880
- gr.Textbox(label="User ID"),
1881
- gr.Textbox(label="Session Token")
1882
- ],
1883
- outputs=gr.JSON(label="Response"),
1884
- title="Mastered Words API",
1885
- description="Mastered words endpoint",
1886
- allow_flagging="never"
1887
- )
1888
-
1889
- new_word_interface = gr.Interface(
1890
- fn=api_new_word,
1891
- inputs=[
1892
- gr.Textbox(label="User ID"),
1893
- gr.Textbox(label="Session Token")
1894
- ],
1895
- outputs=gr.JSON(label="Response"),
1896
- title="New Word API",
1897
- description="New word endpoint",
1898
- allow_flagging="never"
1899
- )
1900
-
1901
- new_idiom_interface = gr.Interface(
1902
- fn=api_new_idiom,
1903
- inputs=[
1904
- gr.Textbox(label="User ID"),
1905
- gr.Textbox(label="Session Token")
1906
- ],
1907
- outputs=gr.JSON(label="Response"),
1908
- title="New Idiom API",
1909
- description="New idiom endpoint",
1910
- allow_flagging="never"
1911
- )
1912
-
1913
- learning_interface = gr.Interface(
1914
- fn=api_learning_words,
1915
- inputs=[
1916
- gr.Textbox(label="User ID"),
1917
- gr.Textbox(label="Session Token"),
1918
- gr.Number(label="Page Number"),
1919
- gr.Number(label="Page Size")
1920
- ],
1921
- outputs=gr.JSON(label="Response"),
1922
- title="Learning Words API",
1923
- description="Learning words endpoint",
1924
- allow_flagging="never"
1925
- )
1926
-
1927
- # Connect buttons to test the APIs
1928
- login_btn.click(
1929
- fn=api_login,
1930
- inputs=user_id_input,
1931
- outputs=api_output
1932
- )
1933
-
1934
- chat_btn.click(
1935
- fn=api_chat,
1936
- inputs=[message_input, user_id_input, session_token_input, target_language_api],
1937
- outputs=api_output
1938
- )
1939
-
1940
- progress_btn.click(
1941
- fn=api_progress,
1942
- inputs=[user_id_input, session_token_input],
1943
- outputs=api_output
1944
- )
1945
-
1946
- recommendations_btn.click(
1947
- fn=api_recommendations,
1948
- inputs=[user_id_input, session_token_input],
1949
- outputs=api_output
1950
- )
1951
-
1952
- review_btn.click(
1953
- fn=api_review_words,
1954
- inputs=[user_id_input, session_token_input],
1955
- outputs=api_output
1956
- )
1957
-
1958
- mastered_btn.click(
1959
- fn=api_mastered_words,
1960
- inputs=[user_id_input, session_token_input],
1961
- outputs=api_output
1962
- )
1963
-
1964
- new_word_btn.click(
1965
- fn=api_new_word,
1966
- inputs=[user_id_input, session_token_input],
1967
- outputs=api_output
1968
- )
1969
-
1970
- new_idiom_btn.click(
1971
- fn=api_new_idiom,
1972
- inputs=[user_id_input, session_token_input],
1973
- outputs=api_output
1974
- )
1975
-
1976
- learning_btn.click(
1977
- fn=api_learning_words,
1978
- inputs=[user_id_input, session_token_input, page_input, page_size_input],
1979
- outputs=api_output
1980
- )
1981
-
1982
  if __name__ == "__main__":
1983
  demo.launch(
1984
  show_api=True,
 
9
  from collections import defaultdict
10
  import re
11
  import uuid
 
12
  import google.generativeai as genai
13
 
14
  from dotenv import load_dotenv
 
52
  self.init_database()
53
 
54
  def init_database(self):
 
55
  conn = sqlite3.connect(self.db_path)
56
  cursor = conn.cursor()
57
 
 
112
  conn.close()
113
 
114
  def create_user_session(self, user_id: str) -> str:
 
115
  session_token = str(uuid.uuid4())
116
  now = datetime.now().isoformat()
117
 
 
135
  return session_token
136
 
137
  def validate_session(self, user_id: str, session_token: str) -> bool:
 
138
  conn = sqlite3.connect(self.db_path)
139
  cursor = conn.cursor()
140
 
 
149
  return result is not None and result[0] == 1
150
 
151
  def update_session_activity(self, user_id: str, session_token: str):
 
152
  conn = sqlite3.connect(self.db_path)
153
  cursor = conn.cursor()
154
 
 
162
  conn.close()
163
 
164
  def start_session(self, user_id: str) -> str:
 
165
  session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
166
  session = LearningSession(
167
  session_id=session_id,
 
181
  return session_id
182
 
183
  def end_session(self, session_id: str):
 
184
  conn = sqlite3.connect(self.db_path)
185
  cursor = conn.cursor()
186
  cursor.execute('''
 
236
  conn.close()
237
 
238
  def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
 
239
  conn = sqlite3.connect(self.db_path)
240
  cursor = conn.cursor()
241
 
 
267
  conn.close()
268
 
269
  def get_user_progress(self, user_id: str) -> Dict:
 
270
  conn = sqlite3.connect(self.db_path)
271
  cursor = conn.cursor()
272
 
 
312
  }
313
 
314
  def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
 
315
  conn = sqlite3.connect(self.db_path)
316
  cursor = conn.cursor()
317
 
 
338
  return words
339
 
340
  def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
 
341
  conn = sqlite3.connect(self.db_path)
342
  cursor = conn.cursor()
343
 
 
364
  return words
365
 
366
  def get_learning_recommendations(self, user_id: str) -> List[str]:
 
367
  progress = self.get_user_progress(user_id)
368
  recommendations = []
369
 
 
383
  return recommendations
384
 
385
  def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
 
386
  conn = sqlite3.connect(self.db_path)
387
  cursor = conn.cursor()
388
 
 
419
  self.user_memories = {}
420
 
421
  def setup_environment(self):
 
422
  load_dotenv()
423
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
424
  self.MODEL = "gemini-1.5-flash"
425
  self.db_name = "vector_db"
426
 
427
  def setup_vectorstore(self):
 
428
  folders = glob.glob("knowledge-base/*")
429
  text_loader_kwargs = {'encoding': 'utf-8'}
430
  documents = []
 
470
  print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
471
 
472
  def setup_llm(self, target_language: str = "English"):
 
473
  self.system_prompt = f"""
474
  You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations. Respond in {target_language}.
475
 
 
487
  - Always identify the main Kazakh word/idiom for progress tracking.
488
  - **RAG Usage**:
489
  - Use Retrieval-Augmented Generation (RAG) only when the query explicitly asks for explanations of specific Kazakh terms or idioms (e.g., "What does сәлем mean?") or when the context strongly suggests a need for knowledge base information (e.g., queries about specific words or idioms).
490
+ - When using RAG, limit the response to explaining 1-2 distinct terms at most, unless the user explicitly asks for multiple terms (e.g., "List several idioms"). For each term, provide 3-4 relevant examples. Do not list all or many terms or matches from the knowledge base.
491
  - For general queries (e.g., greetings, procedural questions, or commands like /progress) or grammar-related queries (e.g., "explain me nouns"), rely on your general knowledge and do not use RAG unless the knowledge base contains relevant information.
492
  - Since the knowledge base contains only words and idioms, grammar explanations (e.g., about nouns, verbs) should be provided using your own knowledge, without relying on RAG, unless the query specifically involves terms in the knowledge base.
493
  - Be encouraging and supportive.
 
507
  )
508
 
509
  def normalize_term(self, term: str) -> str:
 
510
  return ' '.join(term.lower().strip().split())
511
 
512
  def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
 
519
  bold_matches = re.findall(bold_pattern, response)
520
 
521
  for term in bold_matches:
522
+ normalized_term = self.normalize_term(term)
523
  if normalized_term in seen_terms or len(normalized_term) <= 2 or len(normalized_term) > 100:
524
  print(f"Skipped term {normalized_term}: Invalid length or already seen")
525
  continue
526
 
527
+ category = "word"
 
528
  definition = ""
529
  term_matched = False
530
+ original_term = term
531
 
 
532
  for known_term in self.known_terms:
533
  if normalized_term == self.normalize_term(known_term):
534
  term_matched = True
535
+ original_term = known_term
 
536
  for doc in retrieved_docs:
537
  doc_type = doc.metadata.get('doc_type', '').lower()
538
  if normalized_term in self.normalize_term(doc.page_content):
 
544
  category = "grammar"
545
  definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
546
  break
 
547
  if not definition and len(known_term.split()) > 1:
548
  category = "idiom"
549
  definition = self.extract_clean_definition(normalized_term, "", response)
550
  break
551
 
 
552
  if not term_matched:
553
  for known_term in self.known_terms:
554
  normalized_known = self.normalize_term(known_term)
 
 
555
  if (normalized_term.startswith(normalized_known) and
556
  len(normalized_term) <= len(normalized_known) + 4):
557
  term_matched = True
558
+ normalized_term = normalized_known
559
+ original_term = known_term
560
  for doc in retrieved_docs:
561
  if normalized_known in self.normalize_term(doc.page_content):
562
  doc_type = doc.metadata.get('doc_type', '').lower()
 
568
  category = "grammar"
569
  definition = self.extract_clean_definition(normalized_known, doc.page_content, response)
570
  break
 
571
  if not definition and len(known_term.split()) > 1:
572
  category = "idiom"
573
  definition = self.extract_clean_definition(normalized_known, "", response)
574
  break
575
+
 
576
  if term_matched and len(original_term.split()) == 1 and any('words' in doc.metadata.get('doc_type', '').lower() for doc in retrieved_docs):
577
  category = "word"
578
 
 
591
  return terms
592
 
593
  def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
 
594
  normalized_term = self.normalize_term(term)
595
 
 
596
  retrieved_docs = self.vectorstore.similarity_search(term, k=5)
597
  for doc in retrieved_docs:
598
  lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
 
606
  return f"Definition for {term}"
607
 
608
  def get_user_memory(self, user_id: str):
 
609
  if user_id not in self.user_memories:
610
  self.user_memories[user_id] = ConversationBufferMemory(
611
  memory_key='chat_history',
 
615
  return self.user_memories[user_id]
616
 
617
  def get_user_chain(self, user_id: str):
 
618
  memory = self.get_user_memory(user_id)
619
  retriever = self.vectorstore.as_retriever()
620
  return ConversationalRetrievalChain.from_llm(
 
624
  )
625
 
626
  def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, target_language: str = "English") -> str:
 
 
627
  if session_token and not self.tracker.validate_session(user_id, session_token):
628
  return f"Session expired. Please login again in {target_language}."
629
 
 
633
  if user_id not in self.user_sessions:
634
  self.user_sessions[user_id] = self.tracker.start_session(user_id)
635
 
 
636
  self.setup_llm(target_language)
637
 
 
638
  if message.lower().startswith('/progress'):
639
  return self.get_progress_report(user_id)
640
  elif message.lower().startswith('/recommendations'):
 
660
  elif message.lower().startswith('/help'):
661
  return self.get_help_message()
662
 
 
663
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
664
  context = "\n".join([doc.page_content for doc in retrieved_docs])
665
 
 
666
  memory = self.get_user_memory(user_id)
667
  chat_history = ""
668
  for msg in memory.chat_memory.messages[-10:]:
 
671
  elif isinstance(msg, AIMessage):
672
  chat_history += f"Assistant: {msg.content}\n"
673
 
 
674
  progress = self.tracker.get_user_progress(user_id)
675
  words_to_review = self.tracker.get_words_to_review(user_id, 5)
676
  mastered_words = self.tracker.get_mastered_words(user_id, page=1, page_size=5)
 
697
  for word in mastered_words])
698
  )
699
 
 
700
  full_prompt = f"""
701
  {self.system_prompt}
702
 
 
713
  Respond in {target_language}. If explaining a Kazakh word or idiom retrieved from the context, **bold** the term (e.g., **күләпара**) in your response to highlight it. Only bold the main term being explained.
714
  """
715
 
 
716
  response = self.llm.generate_content(full_prompt).text
717
 
 
718
  memory.chat_memory.add_user_message(message)
719
  memory.chat_memory.add_ai_message(response)
720
 
 
721
  extracted_terms = self.extract_kazakh_terms(message, response)
722
  unique_terms = {}
723
  for term, category, definition in extracted_terms:
 
731
  return response
732
 
733
  def get_progress_report(self, user_id: str) -> str:
 
734
  progress = self.tracker.get_user_progress(user_id)
735
 
736
  if progress['total_words'] == 0:
 
760
  return report
761
 
762
  def get_recommendations(self, user_id: str) -> str:
 
763
  recommendations = self.tracker.get_learning_recommendations(user_id)
764
 
765
  if not recommendations:
 
772
  return response
773
 
774
  def get_review_words(self, user_id: str) -> str:
 
775
  words_to_review = self.tracker.get_words_to_review(user_id, 10)
776
 
777
  if not words_to_review:
 
789
  return response
790
 
791
  def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
 
792
  mastered_words = self.tracker.get_mastered_words(user_id, page, page_size)
793
 
794
  if not mastered_words:
 
806
  return response
807
 
808
  def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
 
809
  learning_words = self.tracker.get_learning_words(user_id, page, page_size)
810
 
811
  if not learning_words:
 
824
  return response
825
 
826
  def get_new_word(self, user_id: str) -> Optional[Dict]:
 
827
  conn = sqlite3.connect(self.tracker.db_path)
828
  cursor = conn.cursor()
829
 
 
854
  return None
855
 
856
  def get_new_idiom(self, user_id: str) -> Optional[Dict]:
 
857
  conn = sqlite3.connect(self.tracker.db_path)
858
  cursor = conn.cursor()
859
 
 
886
  def get_help_message(self) -> str:
887
  """Get help message with available commands"""
888
  return """
889
+ 🎓 **Kazakh Learning Assistant Help**
890
+
891
+ **Available Commands**:
892
+ - `/progress` - View your detailed learning progress
893
+ - `/recommendations` - Get personalized learning suggestions
894
+ - `/review` - See words that need review
895
+ - `/mastered` - See words you've mastered (mastery level > 0)
896
+ - `/help` - Show this help message
897
+
898
+ **How to Use**:
899
+ - Ask about any Kazakh word or idiom for definitions and examples
900
+ - Your progress is automatically tracked as you learn
901
+ - Regular practice improves your mastery levels
902
+ - Use commands to monitor your learning journey
903
+
904
+ **Examples**:
905
+ - "What does 'сәлем' mean?"
906
+ - "Tell me about Kazakh idioms"
907
+ - "How do you say 'thank you' in Kazakh?"
908
+
909
+ Start learning by asking about any Kazakh term! 🌟
910
+ """
911
 
912
  def login_user(self, user_id: str) -> str:
 
913
  session_token = self.tracker.create_user_session(user_id)
914
  return session_token
915
 
 
917
  assistant = PersonalizedKazakhAssistant()
918
 
919
  def chat_interface(message, history, target_language):
 
920
  try:
921
  web_user_id = "web_user_default"
922
  response = assistant.process_message(message, web_user_id, target_language=target_language)
 
925
  return f"Sorry, I encountered an error: {str(e)}. Please try again."
926
 
927
  def api_login(user_id: str) -> dict:
 
928
  try:
929
  session_token = assistant.login_user(user_id)
930
  return {
 
940
  }
941
 
942
  def api_chat(message: str, user_id: str, session_token: str = None, target_language: str = "English") -> dict:
 
943
  try:
944
  response = assistant.process_message(message, user_id, session_token, target_language)
945
  return {
 
955
  }
956
 
957
  def api_progress(user_id: str, session_token: str = None) -> dict:
 
958
  try:
959
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
960
  return {"success": False, "error": "Invalid session"}
 
975
  }
976
 
977
  def api_recommendations(user_id: str, session_token: str = None) -> dict:
 
978
  try:
979
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
980
  return {"success": False, "error": "Invalid session"}
 
995
  }
996
 
997
  def api_review_words(user_id: str, session_token: str = None) -> dict:
 
998
  try:
999
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1000
  return {"success": False, "error": "Invalid session"}
 
1015
  }
1016
 
1017
  def api_mastered_words(user_id: str, session_token: str = None) -> dict:
 
1018
  try:
1019
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1020
  return {"success": False, "error": "Invalid session"}
 
1035
  }
1036
 
1037
  def api_new_word(user_id: str, session_token: str = None) -> dict:
 
1038
  try:
1039
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1040
  return {"success": False, "error": "Invalid session"}
 
1069
  }
1070
 
1071
  def api_new_idiom(user_id: str, session_token: str = None) -> dict:
 
1072
  try:
1073
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1074
  return {"success": False, "error": "Invalid session"}
 
1103
  }
1104
 
1105
  def api_learning_words(user_id: str, session_token: str = None, page: int = 1, page_size: int = 10) -> dict:
 
1106
  try:
1107
  if session_token and not assistant.tracker.validate_session(user_id, session_token):
1108
  return {"success": False, "error": "Invalid session"}
 
1153
  ]
1154
  )
1155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
  if __name__ == "__main__":
1157
  demo.launch(
1158
  show_api=True,