GuestUser33 commited on
Commit
2ed3340
·
verified ·
1 Parent(s): 6deaeb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +970 -802
app.py CHANGED
@@ -1,803 +1,971 @@
1
- import os
2
- os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
3
- import glob
4
- import json
5
- import sqlite3
6
- from datetime import datetime, timedelta
7
- from typing import Dict, List, Optional, Tuple
8
- from dataclasses import dataclass, asdict
9
- from collections import defaultdict
10
- import re
11
-
12
- from dotenv import load_dotenv
13
- import gradio as gr
14
-
15
- from langchain_community.document_loaders import DirectoryLoader, TextLoader
16
- from langchain.text_splitter import CharacterTextSplitter
17
- from langchain.schema import Document
18
- from langchain_chroma import Chroma
19
- from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
20
- from langchain_huggingface import HuggingFaceEmbeddings
21
- from langchain.memory import ConversationBufferMemory
22
- from langchain.chains import ConversationalRetrievalChain
23
- import numpy as np
24
-
25
- @dataclass
26
- class LearningSession:
27
- session_id: str
28
- user_id: str
29
- start_time: datetime
30
- end_time: Optional[datetime] = None
31
- words_learned: int = 0
32
- idioms_learned: int = 0
33
- questions_asked: int = 0
34
-
35
- @dataclass
36
- class WordProgress:
37
- word: str
38
- definition: str
39
- category: str
40
- first_encountered: datetime
41
- last_reviewed: datetime
42
- encounter_count: int
43
- mastery_level: int
44
- correct_answers: int
45
- total_questions: int
46
-
47
- class PersonalizedLearningTracker:
48
- def __init__(self, db_path: str = "learning_progress.db"):
49
- self.db_path = db_path
50
- self.init_database()
51
-
52
- def init_database(self):
53
- """Initialize SQLite database for tracking learning progress"""
54
- conn = sqlite3.connect(self.db_path)
55
- cursor = conn.cursor()
56
-
57
- cursor.execute('''
58
- CREATE TABLE IF NOT EXISTS learning_sessions (
59
- session_id TEXT PRIMARY KEY,
60
- user_id TEXT NOT NULL,
61
- start_time TEXT NOT NULL,
62
- end_time TEXT,
63
- words_learned INTEGER DEFAULT 0,
64
- idioms_learned INTEGER DEFAULT 0,
65
- questions_asked INTEGER DEFAULT 0
66
- )
67
- ''')
68
-
69
- cursor.execute('''
70
- CREATE TABLE IF NOT EXISTS word_progress (
71
- id INTEGER PRIMARY KEY AUTOINCREMENT,
72
- user_id TEXT NOT NULL,
73
- word TEXT NOT NULL,
74
- definition TEXT NOT NULL,
75
- category TEXT NOT NULL,
76
- first_encountered TEXT NOT NULL,
77
- last_reviewed TEXT NOT NULL,
78
- encounter_count INTEGER DEFAULT 1,
79
- mastery_level INTEGER DEFAULT 0,
80
- correct_answers INTEGER DEFAULT 0,
81
- total_questions INTEGER DEFAULT 0,
82
- UNIQUE(user_id, word, category)
83
- )
84
- ''')
85
-
86
- cursor.execute('''
87
- CREATE TABLE IF NOT EXISTS learning_analytics (
88
- id INTEGER PRIMARY KEY AUTOINCREMENT,
89
- user_id TEXT NOT NULL,
90
- date TEXT NOT NULL,
91
- metric_name TEXT NOT NULL,
92
- metric_value REAL NOT NULL
93
- )
94
- ''')
95
-
96
- conn.commit()
97
- conn.close()
98
-
99
- def start_session(self, user_id: str) -> str:
100
- """Start a new learning session"""
101
- session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
102
- session = LearningSession(
103
- session_id=session_id,
104
- user_id=user_id,
105
- start_time=datetime.now()
106
- )
107
-
108
- conn = sqlite3.connect(self.db_path)
109
- cursor = conn.cursor()
110
- cursor.execute('''
111
- INSERT INTO learning_sessions (session_id, user_id, start_time)
112
- VALUES (?, ?, ?)
113
- ''', (session.session_id, session.user_id, session.start_time.isoformat()))
114
- conn.commit()
115
- conn.close()
116
-
117
- return session_id
118
-
119
- def end_session(self, session_id: str):
120
- """End a learning session"""
121
- conn = sqlite3.connect(self.db_path)
122
- cursor = conn.cursor()
123
- cursor.execute('''
124
- UPDATE learning_sessions
125
- SET end_time = ?
126
- WHERE session_id = ?
127
- ''', (datetime.now().isoformat(), session_id))
128
- conn.commit()
129
- conn.close()
130
-
131
- def track_word_encounter(self, user_id: str, word: str, definition: str, category: str):
132
- """Track when a user encounters a word or idiom"""
133
- conn = sqlite3.connect(self.db_path)
134
- cursor = conn.cursor()
135
-
136
- cursor.execute('''
137
- SELECT * FROM word_progress
138
- WHERE user_id = ? AND word = ? AND category = ?
139
- ''', (user_id, word, category))
140
-
141
- existing = cursor.fetchone()
142
- now = datetime.now().isoformat()
143
-
144
- if existing:
145
-
146
- cursor.execute('''
147
- UPDATE word_progress
148
- SET last_reviewed = ?, encounter_count = encounter_count + 1
149
- WHERE user_id = ? AND word = ? AND category = ?
150
- ''', (now, user_id, word, category))
151
- else:
152
-
153
- cursor.execute('''
154
- INSERT INTO word_progress
155
- (user_id, word, definition, category, first_encountered, last_reviewed)
156
- VALUES (?, ?, ?, ?, ?, ?)
157
- ''', (user_id, word, definition, category, now, now))
158
-
159
- conn.commit()
160
- conn.close()
161
-
162
- def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
163
- """Update mastery level based on user performance"""
164
- conn = sqlite3.connect(self.db_path)
165
- cursor = conn.cursor()
166
-
167
- cursor.execute('''
168
- SELECT mastery_level, correct_answers, total_questions
169
- FROM word_progress
170
- WHERE user_id = ? AND word = ? AND category = ?
171
- ''', (user_id, word, category))
172
-
173
- result = cursor.fetchone()
174
- if result:
175
- current_mastery, correct_answers, total_questions = result
176
- new_correct = correct_answers + (1 if correct else 0)
177
- new_total = total_questions + 1
178
-
179
- accuracy = new_correct / new_total if new_total > 0 else 0
180
- new_mastery = min(5, int(accuracy * 5) + (1 if new_total >= 3 else 0))
181
-
182
- cursor.execute('''
183
- UPDATE word_progress
184
- SET mastery_level = ?, correct_answers = ?, total_questions = ?
185
- WHERE user_id = ? AND word = ? AND category = ?
186
- ''', (new_mastery, new_correct, new_total, user_id, word, category))
187
-
188
- conn.commit()
189
- conn.close()
190
-
191
- def get_user_progress(self, user_id: str) -> Dict:
192
- """Get comprehensive user progress statistics"""
193
- conn = sqlite3.connect(self.db_path)
194
- cursor = conn.cursor()
195
-
196
- cursor.execute('''
197
- SELECT category, COUNT(*), AVG(mastery_level)
198
- FROM word_progress
199
- WHERE user_id = ?
200
- GROUP BY category
201
- ''', (user_id,))
202
-
203
- category_stats = {}
204
- for category, count, avg_mastery in cursor.fetchall():
205
- category_stats[category] = {
206
- 'count': count,
207
- 'average_mastery': round(avg_mastery or 0, 2)
208
- }
209
-
210
- week_ago = (datetime.now() - timedelta(days=7)).isoformat()
211
- cursor.execute('''
212
- SELECT COUNT(*) FROM word_progress
213
- WHERE user_id = ? AND last_reviewed >= ?
214
- ''', (user_id, week_ago))
215
- recent_activity = cursor.fetchone()[0]
216
-
217
- cursor.execute('''
218
- SELECT DATE(last_reviewed) as date, COUNT(*) as daily_count
219
- FROM word_progress
220
- WHERE user_id = ?
221
- GROUP BY DATE(last_reviewed)
222
- ORDER BY date DESC
223
- LIMIT 30
224
- ''', (user_id,))
225
-
226
- daily_activity = cursor.fetchall()
227
-
228
- conn.close()
229
-
230
- return {
231
- 'category_stats': category_stats,
232
- 'recent_activity': recent_activity,
233
- 'daily_activity': daily_activity,
234
- 'total_words': sum(stats['count'] for stats in category_stats.values())
235
- }
236
-
237
- def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
238
- """Get words that need review based on spaced repetition"""
239
- conn = sqlite3.connect(self.db_path)
240
- cursor = conn.cursor()
241
-
242
- cursor.execute('''
243
- SELECT word, definition, category, mastery_level, last_reviewed
244
- FROM word_progress
245
- WHERE user_id = ? AND (
246
- mastery_level < 3 OR
247
- last_reviewed < datetime('now', '-2 days')
248
- )
249
- ORDER BY mastery_level ASC, last_reviewed ASC
250
- LIMIT ?
251
- ''', (user_id, limit))
252
-
253
- words = []
254
- for word, definition, category, mastery, last_reviewed in cursor.fetchall():
255
- words.append({
256
- 'word': word,
257
- 'definition': definition,
258
- 'category': category,
259
- 'mastery_level': mastery,
260
- 'last_reviewed': last_reviewed
261
- })
262
-
263
- conn.close()
264
- return words
265
-
266
- def get_learning_recommendations(self, user_id: str) -> List[str]:
267
- """Get personalized learning recommendations"""
268
- progress = self.get_user_progress(user_id)
269
- recommendations = []
270
-
271
- if progress['total_words'] < 10:
272
- recommendations.append("Start with basic vocabulary - try asking about common Kazakh words!")
273
-
274
- if 'idiom' not in progress['category_stats'] or progress['category_stats'].get('idiom', {}).get('count', 0) < 5:
275
- recommendations.append("Explore Kazakh idioms to improve your cultural understanding!")
276
-
277
- words_to_review = self.get_words_to_review(user_id, 5)
278
- if words_to_review:
279
- recommendations.append(f"Review these words: {', '.join([w['word'] for w in words_to_review[:3]])}")
280
-
281
- if progress['recent_activity'] == 0:
282
- recommendations.append("You haven't practiced recently - consistency is key to language learning!")
283
-
284
- return recommendations
285
-
286
- class PersonalizedKazakhAssistant:
287
- def __init__(self):
288
-
289
- self.setup_environment()
290
- self.setup_vectorstore()
291
- self.setup_llm()
292
- self.tracker = PersonalizedLearningTracker()
293
- self.current_user = "default_user"
294
- self.current_session = None
295
-
296
- def setup_environment(self):
297
- """Setup environment and configuration"""
298
- # load_dotenv()
299
- # os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")
300
- self.google_api_key = os.getenv("GOOGLE_API_KEY")
301
- self.MODEL = "gemini-1.5-flash"
302
- self.db_name = "vector_db"
303
-
304
- def setup_vectorstore(self):
305
- """Setup document loading and vector store"""
306
- folders = glob.glob("knowledge-base/*")
307
- text_loader_kwargs = {'encoding': 'utf-8'}
308
- documents = []
309
-
310
- for folder in folders:
311
- doc_type = os.path.basename(folder)
312
- loader = DirectoryLoader(
313
- folder,
314
- glob="**/*.txt",
315
- loader_cls=TextLoader,
316
- loader_kwargs=text_loader_kwargs
317
- )
318
- folder_docs = loader.load()
319
- for doc in folder_docs:
320
- doc.metadata["doc_type"] = doc_type
321
- documents.append(doc)
322
-
323
- text_splitter = CharacterTextSplitter(separator=r'\n', chunk_size=2000, chunk_overlap=0)
324
- chunks = text_splitter.split_documents(documents)
325
-
326
- print(f"Total chunks: {len(chunks)}")
327
-
328
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased-v1")
329
-
330
- if os.path.exists(self.db_name):
331
- Chroma(persist_directory=self.db_name, embedding_function=embeddings).delete_collection()
332
-
333
- self.vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=self.db_name)
334
- print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
335
-
336
- def setup_llm(self):
337
- """Setup LLM with enhanced system prompt"""
338
- system_prompt = """
339
- You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations.
340
-
341
- Key capabilities:
342
- 1. **Answer Queries**: Provide accurate definitions and examples for Kazakh words and idioms from your knowledge base
343
- 2. **Track Learning Progress**: Identify and track when users learn new words or idioms
344
- 3. **Personalized Responses**: Adapt responses based on user's learning history and progress
345
- 4. **Progress Reporting**: Provide detailed progress reports when asked
346
- 5. **Learning Recommendations**: Suggest words/idioms to review or learn next
347
-
348
- Response Guidelines:
349
- - For word/idiom queries: Provide definition, usage examples, and related information
350
- - Always identify the main Kazakh word/idiom being discussed for progress tracking
351
- - Be encouraging and supportive of the user's learning journey
352
- - Use simple, clear explanations appropriate for language learners
353
- - When discussing progress, be specific and motivating
354
-
355
- Format responses naturally in conversational style, not JSON unless specifically requested.
356
- """
357
-
358
- self.llm = ChatGoogleGenerativeAI(
359
- model="models/gemini-1.5-flash",
360
- temperature=0.7,
361
- system_instruction=system_prompt
362
- )
363
-
364
- self.memory = ConversationBufferMemory(
365
- memory_key='chat_history',
366
- return_messages=True,
367
- max_token_limit=10000
368
- )
369
-
370
- retriever = self.vectorstore.as_retriever()
371
- self.conversation_chain = ConversationalRetrievalChain.from_llm(
372
- llm=self.llm,
373
- retriever=retriever,
374
- memory=self.memory
375
- )
376
-
377
- def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
378
- """Extract meaningful Kazakh terms using document metadata to determine category"""
379
- terms = []
380
-
381
- try:
382
- retrieved_docs = self.vectorstore.similarity_search(message, k=5)
383
-
384
- kazakh_words = re.findall(r'[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:\s+[А-Яа-яӘәҒғҚқ��ңӨөҰұҮүҺһІі]+)*', response)
385
-
386
- for word in kazakh_words:
387
- word = word.strip()
388
-
389
- if len(word) <= 2 or len(word) > 50:
390
- continue
391
-
392
- skip_words = ['деген', 'деп', 'берілген', 'мәтінде', 'мағынасы', 'дегеннің',
393
- 'түсіндірілген', 'келтірілген', 'болып', 'табылады', 'ауруы',
394
- 'мынадай', 'тақырыбына', 'тіркестер', 'арналған', 'байланысты']
395
-
396
- if any(skip in word.lower() for skip in skip_words):
397
- continue
398
-
399
- category = "word"
400
- definition = ""
401
-
402
- for doc in retrieved_docs:
403
- if word.lower() in doc.page_content.lower():
404
- doc_type = doc.metadata.get('doc_type', '').lower()
405
- if 'idiom' in doc_type or 'тіркес' in doc_type:
406
- category = "idiom"
407
- else:
408
- category = "word"
409
-
410
- definition = self.extract_clean_definition(word, doc.page_content, response)
411
- break
412
-
413
- if definition and len(word.split()) <= 4:
414
-
415
- if not any(phrase in word.lower() for phrase in ['қалай', 'қандай', 'қайда', 'неше', 'қашан']):
416
- terms.append((word, category, definition))
417
-
418
- except Exception as e:
419
- print(f"Error extracting terms: {e}")
420
-
421
- return terms
422
-
423
- def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
424
- """Extract clean definition for a term"""
425
- sentences = response.split('.')
426
- for sentence in sentences:
427
- if term.lower() in sentence.lower():
428
- clean_sentence = sentence.strip()
429
- if len(clean_sentence) > 10 and len(clean_sentence) < 150:
430
-
431
- if not any(word in clean_sentence.lower() for word in ['деген не', 'қалай аталады', 'нені білдіреді']):
432
- return clean_sentence
433
-
434
- doc_sentences = doc_content.split('.')
435
- for sentence in doc_sentences:
436
- if term.lower() in sentence.lower():
437
- clean_sentence = sentence.strip()
438
- if len(clean_sentence) > 10 and len(clean_sentence) < 150:
439
- return clean_sentence
440
-
441
- return f"Definition for {term}"
442
-
443
- def process_message(self, message: str) -> str:
444
- """Process user message with personalization tracking"""
445
-
446
- if not self.current_session:
447
- self.current_session = self.tracker.start_session(self.current_user)
448
-
449
- if message.lower().startswith('/progress'):
450
- return self.get_progress_report()
451
- elif message.lower().startswith('/recommendations'):
452
- return self.get_recommendations()
453
- elif message.lower().startswith('/review'):
454
- return self.get_review_words()
455
- elif message.lower().startswith('/help'):
456
- return self.get_help_message()
457
-
458
- result = self.conversation_chain.invoke({"question": message})
459
- response = result["answer"]
460
-
461
- extracted_terms = self.extract_kazakh_terms(message, response)
462
-
463
- for term, category, definition in extracted_terms:
464
- if definition and term:
465
- self.tracker.track_word_encounter(
466
- self.current_user,
467
- term,
468
- definition,
469
- category
470
- )
471
-
472
- return response
473
-
474
- def extract_definition_from_response(self, response: str, term: str) -> str:
475
- """Extract definition of a term from the response"""
476
-
477
- sentences = response.split('.')
478
- for sentence in sentences:
479
- if term in sentence:
480
- return sentence.strip()
481
- return "Definition extracted from conversation"
482
-
483
- def get_progress_report(self) -> str:
484
- """Generate a comprehensive progress report"""
485
- progress = self.tracker.get_user_progress(self.current_user)
486
-
487
- if progress['total_words'] == 0:
488
- return "Сіз әлі үйренуді бастамадыңыз! Маған кез келген қазақ сөзі немесе тіркес туралы сұраңыз. 🌟\n\nYou haven't started learning yet! Ask me about any Kazakh word or idiom to begin your journey. 🌟"
489
-
490
- report = "📊 **Сізді�� үйрену прогресіңіз / Your Learning Progress Report**\n\n"
491
-
492
- report += f"🎯 **Үйренген терминдер саны / Total Terms Learned**: {progress['total_words']}\n"
493
-
494
- for category, stats in progress['category_stats'].items():
495
- emoji = "📝" if category == "word" else "🎭"
496
- category_name = "Сөздер / Words" if category == "word" else "Тіркестер / Idioms"
497
- report += f"{emoji} **{category_name}**: {stats['count']} (Орташа меңгеру / Average mastery: {stats['average_mastery']}/5)\n"
498
-
499
- report += f"\n⚡ **Соңғы белсенділік / Recent Activity**: {progress['recent_activity']} терминдер соңғы 7 күнде қаралды / terms reviewed in the last 7 days\n"
500
-
501
- if progress['daily_activity']:
502
- recent_days = len(progress['daily_activity'])
503
- report += f"🔥 **Үйрену ырғағы / Learning Streak**: {recent_days} күн белсенді болдыңыз / Active on {recent_days} days recently\n"
504
-
505
- recommendations = self.tracker.get_learning_recommendations(self.current_user)
506
- if recommendations:
507
- report += f"\n💡 **Ұсыныстар / Recommendations**:\n"
508
- for i, rec in enumerate(recommendations, 1):
509
- report += f"{i}. {rec}\n"
510
-
511
- return report
512
-
513
- def get_recommendations(self) -> str:
514
- """Get personalized learning recommendations"""
515
- recommendations = self.tracker.get_learning_recommendations(self.current_user)
516
-
517
- if not recommendations:
518
- return "Керемет! Сіз өте жақсы прогресс жасап жатырсыз. Үнемі жаттығуды жалғастырыңыз! 🎉\n\nGreat job! You're making excellent progress. Keep practicing regularly! 🎉"
519
-
520
- response = "💡 **Жеке ұсыныстар / Personalized Learning Recommendations**:\n\n"
521
- for i, rec in enumerate(recommendations, 1):
522
- response += f"{i}. {rec}\n"
523
-
524
- return response
525
-
526
- def get_review_words(self) -> str:
527
- """Get words that need review"""
528
- words_to_review = self.tracker.get_words_to_review(self.current_user, 10)
529
-
530
- if not words_to_review:
531
- return "Тамаша! Сізде қазір қайталау қажет сөздер жоқ. Жаңа терминдерді үйренуге тырысыңыз! ✨\n\nExcellent! You don't have any words that need review right now. Try learning some new terms! ✨"
532
-
533
- response = "📚 **Қайталауға арналған сөздер / Words to Review**:\n\n"
534
- for word_info in words_to_review:
535
- emoji = "📝" if word_info['category'] == "word" else "🎭"
536
- mastery_stars = "⭐" * word_info['mastery_level'] + "☆" * (5 - word_info['mastery_level'])
537
- response += f"{emoji} **{word_info['word']}** - {mastery_stars}\n"
538
-
539
- definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
540
- response += f" {definition_preview}\n\n"
541
-
542
- return response
543
-
544
- def get_help_message(self) -> str:
545
- """Get help message with available commands"""
546
- return """
547
- 🎓 **Kazakh Learning Assistant Help**
548
-
549
- **Available Commands**:
550
- - `/progress` - View your detailed learning progress
551
- - `/recommendations` - Get personalized learning suggestions
552
- - `/review` - See words that need review
553
- - `/help` - Show this help message
554
-
555
- **How to Use**:
556
- - Ask about any Kazakh word or idiom for definitions and examples
557
- - Your progress is automatically tracked as you learn
558
- - Regular practice improves your mastery levels
559
- - Use commands to monitor your learning journey
560
-
561
- **Examples**:
562
- - "What does 'сәлем' mean?"
563
- - "Tell me about Kazakh idioms"
564
- - "How do you say 'thank you' in Kazakh?"
565
-
566
- Start learning by asking about any Kazakh term! 🌟
567
- """
568
-
569
- assistant = PersonalizedKazakhAssistant()
570
-
571
- def chat_interface(message, history):
572
- """Chat interface for Gradio"""
573
- try:
574
- response = assistant.process_message(message)
575
- return response
576
- except Exception as e:
577
- return f"Sorry, I encountered an error: {str(e)}. Please try again."
578
-
579
- demo = gr.ChatInterface(
580
- chat_interface,
581
- type="messages",
582
- title="🇰🇿 Personalized Kazakh Learning Assistant",
583
- description="Learn Kazakh words and idioms with personalized progress tracking. Use commands like /progress, /recommendations, and /review to monitor your learning journey!",
584
- examples=[
585
- "What does 'сәлем' mean?",
586
- "Tell me about Kazakh idioms",
587
- "/progress",
588
- "/recommendations",
589
- "How do you say 'hello' in Kazakh?"
590
- ]
591
- )
592
-
593
- if __name__ == "__main__":
594
- demo.launch(inbrowser=True)
595
-
596
- def api_chat(message: str, user_id: str = "default_user") -> dict:
597
- """API endpoint for chat functionality"""
598
- try:
599
-
600
- assistant.current_user = user_id
601
- if not assistant.current_session:
602
- assistant.current_session = assistant.tracker.start_session(user_id)
603
-
604
- response = assistant.process_message(message)
605
- return {
606
- "success": True,
607
- "response": response,
608
- "user_id": user_id
609
- }
610
- except Exception as e:
611
- return {
612
- "success": False,
613
- "error": str(e),
614
- "response": "Кешіріңіз, қате орын алды. Қайталап көріңіз."
615
- }
616
-
617
- def api_progress(user_id: str = "default_user") -> dict:
618
- """API endpoint for user progress"""
619
- try:
620
- assistant.current_user = user_id
621
- progress_text = assistant.get_progress_report()
622
- progress_data = assistant.tracker.get_user_progress(user_id)
623
-
624
- return {
625
- "success": True,
626
- "progress_text": progress_text,
627
- "progress_data": progress_data,
628
- "user_id": user_id
629
- }
630
- except Exception as e:
631
- return {
632
- "success": False,
633
- "error": str(e)
634
- }
635
-
636
- def api_recommendations(user_id: str = "default_user") -> dict:
637
- """API endpoint for learning recommendations"""
638
- try:
639
- assistant.current_user = user_id
640
- recommendations_text = assistant.get_recommendations()
641
- recommendations_list = assistant.tracker.get_learning_recommendations(user_id)
642
-
643
- return {
644
- "success": True,
645
- "recommendations_text": recommendations_text,
646
- "recommendations_list": recommendations_list,
647
- "user_id": user_id
648
- }
649
- except Exception as e:
650
- return {
651
- "success": False,
652
- "error": str(e)
653
- }
654
-
655
- def api_review_words(user_id: str = "default_user") -> dict:
656
- """API endpoint for words to review"""
657
- try:
658
- assistant.current_user = user_id
659
- review_text = assistant.get_review_words()
660
- review_data = assistant.tracker.get_words_to_review(user_id, 10)
661
-
662
- return {
663
- "success": True,
664
- "review_text": review_text,
665
- "review_data": review_data,
666
- "user_id": user_id
667
- }
668
- except Exception as e:
669
- return {
670
- "success": False,
671
- "error": str(e)
672
- }
673
-
674
- with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
675
- gr.Markdown("# 🇰🇿 Personalized Kazakh Learning Assistant")
676
- gr.Markdown("### Chat Interface + API Endpoints for Mobile Integration")
677
-
678
- with gr.Tab("💬 Chat Interface"):
679
- chat_interface = gr.ChatInterface(
680
- chat_interface,
681
- type="messages",
682
- examples=[
683
- "сәлем деген не?",
684
- "күләпара не үшін керек?",
685
- "/progress",
686
- "/recommendations",
687
- "/review"
688
- ]
689
- )
690
-
691
- with gr.Tab("🔌 API Testing"):
692
- gr.Markdown("## Test API Endpoints")
693
-
694
- with gr.Row():
695
- with gr.Column():
696
- user_id_input = gr.Textbox(label="User ID", value="test_user", placeholder="Enter user ID")
697
- message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
698
-
699
- with gr.Row():
700
- chat_btn = gr.Button("💬 Test Chat API")
701
- progress_btn = gr.Button("📊 Test Progress API")
702
- recommendations_btn = gr.Button("💡 Test Recommendations API")
703
- review_btn = gr.Button("📚 Test Review API")
704
-
705
- api_output = gr.JSON(label="API Response")
706
-
707
- chat_btn.click(
708
- fn=lambda msg, uid: api_chat(msg, uid),
709
- inputs=[message_input, user_id_input],
710
- outputs=api_output
711
- )
712
-
713
- progress_btn.click(
714
- fn=lambda uid: api_progress(uid),
715
- inputs=user_id_input,
716
- outputs=api_output
717
- )
718
-
719
- recommendations_btn.click(
720
- fn=lambda uid: api_recommendations(uid),
721
- inputs=user_id_input,
722
- outputs=api_output
723
- )
724
-
725
- review_btn.click(
726
- fn=lambda uid: api_review_words(uid),
727
- inputs=user_id_input,
728
- outputs=api_output
729
- )
730
-
731
- with gr.Tab("📖 API Documentation"):
732
- gr.Markdown("""
733
- ## API Endpoints for Flutter Integration
734
-
735
- Your Gradio Space will automatically provide these API endpoints:
736
-
737
- ### Base URL: `https://your-username-kazakh-learning.hf.space`
738
-
739
- ### Available Endpoints:
740
-
741
- #### 1. Chat API
742
- ```
743
- POST /api/predict
744
- Content-Type: application/json
745
-
746
- {
747
- "data": ["message", "user_id"],
748
- "fn_index": 0
749
- }
750
- ```
751
-
752
- #### 2. Progress API
753
- ```
754
- POST /api/predict
755
- Content-Type: application/json
756
-
757
- {
758
- "data": ["user_id"],
759
- "fn_index": 1
760
- }
761
- ```
762
-
763
- #### 3. Recommendations API
764
- ```
765
- POST /api/predict
766
- Content-Type: application/json
767
-
768
- {
769
- "data": ["user_id"],
770
- "fn_index": 2
771
- }
772
- ```
773
-
774
- #### 4. Review Words API
775
- ```
776
- POST /api/predict
777
- Content-Type: application/json
778
-
779
- {
780
- "data": ["user_id"],
781
- "fn_index": 3
782
- }
783
- ```
784
-
785
- ### Flutter HTTP Example:
786
- ```dart
787
- // Chat API call
788
- final response = await http.post(
789
- Uri.parse('https://your-username-kazakh-learning.hf.space/api/predict'),
790
- headers: {'Content-Type': 'application/json'},
791
- body: jsonEncode({
792
- 'data': ['сәлем деген не?', 'user123'],
793
- 'fn_index': 0
794
- }),
795
- );
796
-
797
- final result = jsonDecode(response.body);
798
- final chatResponse = result['data'][0]['response'];
799
- ```
800
- """)
801
-
802
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  demo.launch()
 
1
+ import os
2
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
3
+ import glob
4
+ import json
5
+ import sqlite3
6
+ from datetime import datetime, timedelta
7
+ from typing import Dict, List, Optional, Tuple
8
+ from dataclasses import dataclass, asdict
9
+ from collections import defaultdict
10
+ import re
11
+ import uuid
12
+ import hashlib
13
+
14
+ from dotenv import load_dotenv
15
+ import gradio as gr
16
+
17
+ from langchain_community.document_loaders import DirectoryLoader, TextLoader
18
+ from langchain.text_splitter import CharacterTextSplitter
19
+ from langchain.schema import Document
20
+ from langchain_chroma import Chroma
21
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
22
+ from langchain_huggingface import HuggingFaceEmbeddings
23
+ from langchain.memory import ConversationBufferMemory
24
+ from langchain.chains import ConversationalRetrievalChain
25
+ import numpy as np
26
+
27
+ @dataclass
28
+ class LearningSession:
29
+ session_id: str
30
+ user_id: str
31
+ start_time: datetime
32
+ end_time: Optional[datetime] = None
33
+ words_learned: int = 0
34
+ idioms_learned: int = 0
35
+ questions_asked: int = 0
36
+
37
+ @dataclass
38
+ class WordProgress:
39
+ word: str
40
+ definition: str
41
+ category: str
42
+ first_encountered: datetime
43
+ last_reviewed: datetime
44
+ encounter_count: int
45
+ mastery_level: int
46
+ correct_answers: int
47
+ total_questions: int
48
+
49
+ class PersonalizedLearningTracker:
50
+ def __init__(self, db_path: str = "learning_progress.db"):
51
+ self.db_path = db_path
52
+ self.init_database()
53
+
54
+ def init_database(self):
55
+ """Initialize SQLite database for tracking learning progress"""
56
+ conn = sqlite3.connect(self.db_path)
57
+ cursor = conn.cursor()
58
+
59
+ cursor.execute('''
60
+ CREATE TABLE IF NOT EXISTS learning_sessions (
61
+ session_id TEXT PRIMARY KEY,
62
+ user_id TEXT NOT NULL,
63
+ start_time TEXT NOT NULL,
64
+ end_time TEXT,
65
+ words_learned INTEGER DEFAULT 0,
66
+ idioms_learned INTEGER DEFAULT 0,
67
+ questions_asked INTEGER DEFAULT 0
68
+ )
69
+ ''')
70
+
71
+ cursor.execute('''
72
+ CREATE TABLE IF NOT EXISTS word_progress (
73
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
74
+ user_id TEXT NOT NULL,
75
+ word TEXT NOT NULL,
76
+ definition TEXT NOT NULL,
77
+ category TEXT NOT NULL,
78
+ first_encountered TEXT NOT NULL,
79
+ last_reviewed TEXT NOT NULL,
80
+ encounter_count INTEGER DEFAULT 1,
81
+ mastery_level INTEGER DEFAULT 0,
82
+ correct_answers INTEGER DEFAULT 0,
83
+ total_questions INTEGER DEFAULT 0,
84
+ UNIQUE(user_id, word, category)
85
+ )
86
+ ''')
87
+
88
+ cursor.execute('''
89
+ CREATE TABLE IF NOT EXISTS learning_analytics (
90
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
91
+ user_id TEXT NOT NULL,
92
+ date TEXT NOT NULL,
93
+ metric_name TEXT NOT NULL,
94
+ metric_value REAL NOT NULL
95
+ )
96
+ ''')
97
+
98
+ cursor.execute('''
99
+ CREATE TABLE IF NOT EXISTS user_sessions (
100
+ user_id TEXT NOT NULL,
101
+ session_token TEXT NOT NULL,
102
+ created_at TEXT NOT NULL,
103
+ last_activity TEXT NOT NULL,
104
+ is_active BOOLEAN DEFAULT 1,
105
+ PRIMARY KEY (user_id, session_token)
106
+ )
107
+ ''')
108
+
109
+ conn.commit()
110
+ conn.close()
111
+
112
+ def create_user_session(self, user_id: str) -> str:
113
+ """Create a new session token for a user"""
114
+ session_token = str(uuid.uuid4())
115
+ now = datetime.now().isoformat()
116
+
117
+ conn = sqlite3.connect(self.db_path)
118
+ cursor = conn.cursor()
119
+
120
+ cursor.execute('''
121
+ UPDATE user_sessions
122
+ SET is_active = 0
123
+ WHERE user_id = ?
124
+ ''', (user_id,))
125
+
126
+ cursor.execute('''
127
+ INSERT INTO user_sessions (user_id, session_token, created_at, last_activity)
128
+ VALUES (?, ?, ?, ?)
129
+ ''', (user_id, session_token, now, now))
130
+
131
+ conn.commit()
132
+ conn.close()
133
+
134
+ return session_token
135
+
136
+ def validate_session(self, user_id: str, session_token: str) -> bool:
137
+ """Validate if a session is active and belongs to the user"""
138
+ conn = sqlite3.connect(self.db_path)
139
+ cursor = conn.cursor()
140
+
141
+ cursor.execute('''
142
+ SELECT is_active FROM user_sessions
143
+ WHERE user_id = ? AND session_token = ?
144
+ ''', (user_id, session_token))
145
+
146
+ result = cursor.fetchone()
147
+ conn.close()
148
+
149
+ return result is not None and result[0] == 1
150
+
151
+ def update_session_activity(self, user_id: str, session_token: str):
152
+ """Update last activity time for a session"""
153
+ conn = sqlite3.connect(self.db_path)
154
+ cursor = conn.cursor()
155
+
156
+ cursor.execute('''
157
+ UPDATE user_sessions
158
+ SET last_activity = ?
159
+ WHERE user_id = ? AND session_token = ?
160
+ ''', (datetime.now().isoformat(), user_id, session_token))
161
+
162
+ conn.commit()
163
+ conn.close()
164
+
165
+ def start_session(self, user_id: str) -> str:
166
+ """Start a new learning session"""
167
+ session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
168
+ session = LearningSession(
169
+ session_id=session_id,
170
+ user_id=user_id,
171
+ start_time=datetime.now()
172
+ )
173
+
174
+ conn = sqlite3.connect(self.db_path)
175
+ cursor = conn.cursor()
176
+ cursor.execute('''
177
+ INSERT INTO learning_sessions (session_id, user_id, start_time)
178
+ VALUES (?, ?, ?)
179
+ ''', (session.session_id, session.user_id, session.start_time.isoformat()))
180
+ conn.commit()
181
+ conn.close()
182
+
183
+ return session_id
184
+
185
+ def end_session(self, session_id: str):
186
+ """End a learning session"""
187
+ conn = sqlite3.connect(self.db_path)
188
+ cursor = conn.cursor()
189
+ cursor.execute('''
190
+ UPDATE learning_sessions
191
+ SET end_time = ?
192
+ WHERE session_id = ?
193
+ ''', (datetime.now().isoformat(), session_id))
194
+ conn.commit()
195
+ conn.close()
196
+
197
+ def track_word_encounter(self, user_id: str, word: str, definition: str, category: str):
198
+ """Track when a user encounters a word or idiom"""
199
+ conn = sqlite3.connect(self.db_path)
200
+ cursor = conn.cursor()
201
+
202
+ cursor.execute('''
203
+ SELECT * FROM word_progress
204
+ WHERE user_id = ? AND word = ? AND category = ?
205
+ ''', (user_id, word, category))
206
+
207
+ existing = cursor.fetchone()
208
+ now = datetime.now().isoformat()
209
+
210
+ if existing:
211
+ cursor.execute('''
212
+ UPDATE word_progress
213
+ SET last_reviewed = ?, encounter_count = encounter_count + 1
214
+ WHERE user_id = ? AND word = ? AND category = ?
215
+ ''', (now, user_id, word, category))
216
+ else:
217
+ cursor.execute('''
218
+ INSERT INTO word_progress
219
+ (user_id, word, definition, category, first_encountered, last_reviewed)
220
+ VALUES (?, ?, ?, ?, ?, ?)
221
+ ''', (user_id, word, definition, category, now, now))
222
+
223
+ conn.commit()
224
+ conn.close()
225
+
226
+ def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
227
+ """Update mastery level based on user performance"""
228
+ conn = sqlite3.connect(self.db_path)
229
+ cursor = conn.cursor()
230
+
231
+ cursor.execute('''
232
+ SELECT mastery_level, correct_answers, total_questions
233
+ FROM word_progress
234
+ WHERE user_id = ? AND word = ? AND category = ?
235
+ ''', (user_id, word, category))
236
+
237
+ result = cursor.fetchone()
238
+ if result:
239
+ current_mastery, correct_answers, total_questions = result
240
+ new_correct = correct_answers + (1 if correct else 0)
241
+ new_total = total_questions + 1
242
+
243
+ accuracy = new_correct / new_total if new_total > 0 else 0
244
+ new_mastery = min(5, int(accuracy * 5) + (1 if new_total >= 3 else 0))
245
+
246
+ cursor.execute('''
247
+ UPDATE word_progress
248
+ SET mastery_level = ?, correct_answers = ?, total_questions = ?
249
+ WHERE user_id = ? AND word = ? AND category = ?
250
+ ''', (new_mastery, new_correct, new_total, user_id, word, category))
251
+
252
+ conn.commit()
253
+ conn.close()
254
+
255
+ def get_user_progress(self, user_id: str) -> Dict:
256
+ """Get comprehensive user progress statistics"""
257
+ conn = sqlite3.connect(self.db_path)
258
+ cursor = conn.cursor()
259
+
260
+ cursor.execute('''
261
+ SELECT category, COUNT(*), AVG(mastery_level)
262
+ FROM word_progress
263
+ WHERE user_id = ?
264
+ GROUP BY category
265
+ ''', (user_id,))
266
+
267
+ category_stats = {}
268
+ for category, count, avg_mastery in cursor.fetchall():
269
+ category_stats[category] = {
270
+ 'count': count,
271
+ 'average_mastery': round(avg_mastery or 0, 2)
272
+ }
273
+
274
+ week_ago = (datetime.now() - timedelta(days=7)).isoformat()
275
+ cursor.execute('''
276
+ SELECT COUNT(*) FROM word_progress
277
+ WHERE user_id = ? AND last_reviewed >= ?
278
+ ''', (user_id, week_ago))
279
+ recent_activity = cursor.fetchone()[0]
280
+
281
+ cursor.execute('''
282
+ SELECT DATE(last_reviewed) as date, COUNT(*) as daily_count
283
+ FROM word_progress
284
+ WHERE user_id = ?
285
+ GROUP BY DATE(last_reviewed)
286
+ ORDER BY date DESC
287
+ LIMIT 30
288
+ ''', (user_id,))
289
+
290
+ daily_activity = cursor.fetchall()
291
+
292
+ conn.close()
293
+
294
+ return {
295
+ 'category_stats': category_stats,
296
+ 'recent_activity': recent_activity,
297
+ 'daily_activity': daily_activity,
298
+ 'total_words': sum(stats['count'] for stats in category_stats.values())
299
+ }
300
+
301
+ def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
302
+ """Get words that need review based on spaced repetition"""
303
+ conn = sqlite3.connect(self.db_path)
304
+ cursor = conn.cursor()
305
+
306
+ cursor.execute('''
307
+ SELECT word, definition, category, mastery_level, last_reviewed
308
+ FROM word_progress
309
+ WHERE user_id = ? AND (
310
+ mastery_level < 3 OR
311
+ last_reviewed < datetime('now', '-2 days')
312
+ )
313
+ ORDER BY mastery_level ASC, last_reviewed ASC
314
+ LIMIT ?
315
+ ''', (user_id, limit))
316
+
317
+ words = []
318
+ for word, definition, category, mastery, last_reviewed in cursor.fetchall():
319
+ words.append({
320
+ 'word': word,
321
+ 'definition': definition,
322
+ 'category': category,
323
+ 'mastery_level': mastery,
324
+ 'last_reviewed': last_reviewed
325
+ })
326
+
327
+ conn.close()
328
+ return words
329
+
330
+ def get_learning_recommendations(self, user_id: str) -> List[str]:
331
+ """Get personalized learning recommendations"""
332
+ progress = self.get_user_progress(user_id)
333
+ recommendations = []
334
+
335
+ if progress['total_words'] < 10:
336
+ recommendations.append("Start with basic vocabulary - try asking about common Kazakh words!")
337
+
338
+ if 'idiom' not in progress['category_stats'] or progress['category_stats'].get('idiom', {}).get('count', 0) < 5:
339
+ recommendations.append("Explore Kazakh idioms to improve your cultural understanding!")
340
+
341
+ words_to_review = self.get_words_to_review(user_id, 5)
342
+ if words_to_review:
343
+ recommendations.append(f"Review these words: {', '.join([w['word'] for w in words_to_review[:3]])}")
344
+
345
+ if progress['recent_activity'] == 0:
346
+ recommendations.append("You haven't practiced recently - consistency is key to language learning!")
347
+
348
+ return recommendations
349
+
350
+ class PersonalizedKazakhAssistant:
351
+ def __init__(self):
352
+ self.setup_environment()
353
+ self.setup_vectorstore()
354
+ self.setup_llm()
355
+ self.tracker = PersonalizedLearningTracker()
356
+ self.user_sessions = {}
357
+ self.user_memories = {}
358
+
359
+ def setup_environment(self):
360
+ """Setup environment and configuration"""
361
+
362
+ self.google_api_key = os.getenv("GOOGLE_API_KEY")
363
+ self.MODEL = "gemini-1.5-flash"
364
+ self.db_name = "vector_db"
365
+
366
+ def setup_vectorstore(self):
367
+ """Setup document loading and vector store"""
368
+ folders = glob.glob("knowledge-base/*")
369
+ text_loader_kwargs = {'encoding': 'utf-8'}
370
+ documents = []
371
+
372
+ for folder in folders:
373
+ doc_type = os.path.basename(folder)
374
+ loader = DirectoryLoader(
375
+ folder,
376
+ glob="**/*.txt",
377
+ loader_cls=TextLoader,
378
+ loader_kwargs=text_loader_kwargs
379
+ )
380
+ folder_docs = loader.load()
381
+ for doc in folder_docs:
382
+ doc.metadata["doc_type"] = doc_type
383
+ documents.append(doc)
384
+
385
+ text_splitter = CharacterTextSplitter(separator=r'\n', chunk_size=2000, chunk_overlap=0)
386
+ chunks = text_splitter.split_documents(documents)
387
+
388
+ print(f"Total chunks: {len(chunks)}")
389
+
390
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased-v1")
391
+
392
+ if os.path.exists(self.db_name):
393
+ Chroma(persist_directory=self.db_name, embedding_function=embeddings).delete_collection()
394
+
395
+ self.vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=self.db_name)
396
+ print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
397
+
398
+ def setup_llm(self):
399
+ """Setup LLM with enhanced system prompt"""
400
+ system_prompt = """
401
+ You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations.
402
+
403
+ Key capabilities:
404
+ 1. **Answer Queries**: Provide accurate definitions and examples for Kazakh words and idioms from your knowledge base
405
+ 2. **Track Learning Progress**: Identify and track when users learn new words or idioms
406
+ 3. **Personalized Responses**: Adapt responses based on user's learning history and progress
407
+ 4. **Progress Reporting**: Provide detailed progress reports when asked
408
+ 5. **Learning Recommendations**: Suggest words/idioms to review or learn next
409
+
410
+ Response Guidelines:
411
+ - For word/idiom queries: Provide definition, usage examples, and related information
412
+ - Always identify the main Kazakh word/idiom being discussed for progress tracking
413
+ - Be encouraging and supportive of the user's learning journey
414
+ - Use simple, clear explanations appropriate for language learners
415
+ - When discussing progress, be specific and motivating
416
+
417
+ Format responses naturally in conversational style, not JSON unless specifically requested.
418
+ """
419
+
420
+ self.llm = ChatGoogleGenerativeAI(
421
+ model="models/gemini-1.5-flash",
422
+ temperature=0.7,
423
+ system_instruction=system_prompt
424
+ )
425
+
426
+
427
+ def get_user_memory(self, user_id: str):
428
+ """Get or create conversation memory for a specific user"""
429
+ if user_id not in self.user_memories:
430
+ self.user_memories[user_id] = ConversationBufferMemory(
431
+ memory_key='chat_history',
432
+ return_messages=True,
433
+ max_token_limit=10000
434
+ )
435
+ return self.user_memories[user_id]
436
+
437
+ def get_user_chain(self, user_id: str):
438
+ """Get or create conversation chain for a specific user"""
439
+ memory = self.get_user_memory(user_id)
440
+ retriever = self.vectorstore.as_retriever()
441
+ return ConversationalRetrievalChain.from_llm(
442
+ llm=self.llm,
443
+ retriever=retriever,
444
+ memory=memory
445
+ )
446
+
447
+ def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
448
+ """Extract meaningful Kazakh terms using document metadata to determine category"""
449
+ terms = []
450
+
451
+ try:
452
+ retrieved_docs = self.vectorstore.similarity_search(message, k=5)
453
+
454
+ kazakh_words = re.findall(r'[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:\s+[А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*', response)
455
+
456
+ for word in kazakh_words:
457
+ word = word.strip()
458
+
459
+ if len(word) <= 2 or len(word) > 50:
460
+ continue
461
+
462
+ skip_words = ['деген', 'деп', 'берілген', 'мәтінде', 'мағынасы', 'дегеннің',
463
+ 'түсіндірілген', 'келтірілген', 'болып', 'табылады', 'ауруы',
464
+ 'мынадай', 'тақырыбына', 'тіркестер', 'арналған', 'байланысты']
465
+
466
+ if any(skip in word.lower() for skip in skip_words):
467
+ continue
468
+
469
+ category = "word"
470
+ definition = ""
471
+
472
+ for doc in retrieved_docs:
473
+ if word.lower() in doc.page_content.lower():
474
+ doc_type = doc.metadata.get('doc_type', '').lower()
475
+ if 'idiom' in doc_type or 'тіркес' in doc_type:
476
+ category = "idiom"
477
+ else:
478
+ category = "word"
479
+
480
+ definition = self.extract_clean_definition(word, doc.page_content, response)
481
+ break
482
+
483
+ if definition and len(word.split()) <= 4:
484
+ if not any(phrase in word.lower() for phrase in ['қалай', 'қандай', 'қайда', 'неше', 'қашан']):
485
+ terms.append((word, category, definition))
486
+
487
+ except Exception as e:
488
+ print(f"Error extracting terms: {e}")
489
+
490
+ return terms
491
+
492
+ def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
493
+ """Extract clean definition for a term"""
494
+ sentences = response.split('.')
495
+ for sentence in sentences:
496
+ if term.lower() in sentence.lower():
497
+ clean_sentence = sentence.strip()
498
+ if len(clean_sentence) > 10 and len(clean_sentence) < 150:
499
+ if not any(word in clean_sentence.lower() for word in ['деген не', 'қалай аталады', 'нені білдіреді']):
500
+ return clean_sentence
501
+
502
+ doc_sentences = doc_content.split('.')
503
+ for sentence in doc_sentences:
504
+ if term.lower() in sentence.lower():
505
+ clean_sentence = sentence.strip()
506
+ if len(clean_sentence) > 10 and len(clean_sentence) < 150:
507
+ return clean_sentence
508
+
509
+ return f"Definition for {term}"
510
+
511
+ def process_message(self, message: str, user_id: str = "default_user", session_token: str = None) -> str:
512
+ """Process user message with proper user session management"""
513
+
514
+ if session_token and not self.tracker.validate_session(user_id, session_token):
515
+ return "Session expired. Please login again."
516
+
517
+ if session_token:
518
+ self.tracker.update_session_activity(user_id, session_token)
519
+
520
+ if user_id not in self.user_sessions:
521
+ self.user_sessions[user_id] = self.tracker.start_session(user_id)
522
+
523
+ if message.lower().startswith('/progress'):
524
+ return self.get_progress_report(user_id)
525
+ elif message.lower().startswith('/recommendations'):
526
+ return self.get_recommendations(user_id)
527
+ elif message.lower().startswith('/review'):
528
+ return self.get_review_words(user_id)
529
+ elif message.lower().startswith('/help'):
530
+ return self.get_help_message()
531
+
532
+ conversation_chain = self.get_user_chain(user_id)
533
+ result = conversation_chain.invoke({"question": message})
534
+ response = result["answer"]
535
+
536
+ extracted_terms = self.extract_kazakh_terms(message, response)
537
+
538
+ for term, category, definition in extracted_terms:
539
+ if definition and term:
540
+ self.tracker.track_word_encounter(
541
+ user_id,
542
+ term,
543
+ definition,
544
+ category
545
+ )
546
+
547
+ return response
548
+
549
+ def get_progress_report(self, user_id: str) -> str:
550
+ """Generate a comprehensive progress report for specific user"""
551
+ progress = self.tracker.get_user_progress(user_id)
552
+
553
+ if progress['total_words'] == 0:
554
+ return "Сіз әлі үйренуді бастамадыңыз! Маған кез келген қазақ сөзі немесе тіркес туралы сұраңыз. 🌟\n\nYou haven't started learning yet! Ask me about any Kazakh word or idiom to begin your journey. 🌟"
555
+
556
+ report = "📊 **Сіздің үйрену прогресіңіз / Your Learning Progress Report**\n\n"
557
+
558
+ report += f"🎯 **Үйренген терминдер саны / Total Terms Learned**: {progress['total_words']}\n"
559
+
560
+ for category, stats in progress['category_stats'].items():
561
+ emoji = "📝" if category == "word" else "🎭"
562
+ category_name = "Сөздер / Words" if category == "word" else "Тіркестер / Idioms"
563
+ report += f"{emoji} **{category_name}**: {stats['count']} (Орташа меңгеру / Average mastery: {stats['average_mastery']}/5)\n"
564
+
565
+ report += f"\n⚡ **Соңғы белсенділік / Recent Activity**: {progress['recent_activity']} терминдер соңғы 7 күнде қаралды / terms reviewed in the last 7 days\n"
566
+
567
+ if progress['daily_activity']:
568
+ recent_days = len(progress['daily_activity'])
569
+ report += f"🔥 **Үйрену ырғағы / Learning Streak**: {recent_days} күн белсенді болдыңыз / Active on {recent_days} days recently\n"
570
+
571
+ recommendations = self.tracker.get_learning_recommendations(user_id)
572
+ if recommendations:
573
+ report += f"\n💡 **Ұсыныстар / Recommendations**:\n"
574
+ for i, rec in enumerate(recommendations, 1):
575
+ report += f"{i}. {rec}\n"
576
+
577
+ return report
578
+
579
+ def get_recommendations(self, user_id: str) -> str:
580
+ """Get personalized learning recommendations for specific user"""
581
+ recommendations = self.tracker.get_learning_recommendations(user_id)
582
+
583
+ if not recommendations:
584
+ return "Керемет! Сіз өте жақсы прогресс жасап жатырсыз. Үнемі жаттығуды жалғастырыңыз! 🎉\n\nGreat job! You're making excellent progress. Keep practicing regularly! 🎉"
585
+
586
+ response = "💡 **Жеке ұсыныстар / Personalized Learning Recommendations**:\n\n"
587
+ for i, rec in enumerate(recommendations, 1):
588
+ response += f"{i}. {rec}\n"
589
+
590
+ return response
591
+
592
+ def get_review_words(self, user_id: str) -> str:
593
+ """Get words that need review for specific user"""
594
+ words_to_review = self.tracker.get_words_to_review(user_id, 10)
595
+
596
+ if not words_to_review:
597
+ return "Тамаша! Сізде қазір қайталау қажет сөздер жоқ. Жаңа терминдерді үйренуге тырысыңыз! ✨\n\nExcellent! You don't have any words that need review right now. Try learning some new terms! ✨"
598
+
599
+ response = "📚 **Қайталауға арналған сөздер / Words to Review**:\n\n"
600
+ for word_info in words_to_review:
601
+ emoji = "📝" if word_info['category'] == "word" else "🎭"
602
+ mastery_stars = "⭐" * word_info['mastery_level'] + "☆" * (5 - word_info['mastery_level'])
603
+ response += f"{emoji} **{word_info['word']}** - {mastery_stars}\n"
604
+
605
+ definition_preview = word_info['definition'][:80] + "..." if len(word_info['definition']) > 80 else word_info['definition']
606
+ response += f" {definition_preview}\n\n"
607
+
608
+ return response
609
+
610
+ def get_help_message(self) -> str:
611
+ """Get help message with available commands"""
612
+ return """
613
+ 🎓 **Kazakh Learning Assistant Help**
614
+
615
+ **Available Commands**:
616
+ - `/progress` - View your detailed learning progress
617
+ - `/recommendations` - Get personalized learning suggestions
618
+ - `/review` - See words that need review
619
+ - `/help` - Show this help message
620
+
621
+ **How to Use**:
622
+ - Ask about any Kazakh word or idiom for definitions and examples
623
+ - Your progress is automatically tracked as you learn
624
+ - Regular practice improves your mastery levels
625
+ - Use commands to monitor your learning journey
626
+
627
+ **Examples**:
628
+ - "What does 'сәлем' mean?"
629
+ - "Tell me about Kazakh idioms"
630
+ - "How do you say 'thank you' in Kazakh?"
631
+
632
+ Start learning by asking about any Kazakh term! 🌟
633
+ """
634
+
635
+ def login_user(self, user_id: str) -> str:
636
+ """Create a session token for user authentication"""
637
+ session_token = self.tracker.create_user_session(user_id)
638
+ return session_token
639
+
640
+ assistant = PersonalizedKazakhAssistant()
641
+
642
+ def chat_interface(message, history):
643
+ """Chat interface for Gradio - uses default user for web interface"""
644
+ try:
645
+ web_user_id = "web_user_" + str(hash(str(history)) % 10000)
646
+ response = assistant.process_message(message, web_user_id)
647
+ return response
648
+ except Exception as e:
649
+ return f"Sorry, I encountered an error: {str(e)}. Please try again."
650
+
651
+ def api_login(user_id: str) -> dict:
652
+ """API endpoint for user login/session creation"""
653
+ try:
654
+ session_token = assistant.login_user(user_id)
655
+ return {
656
+ "success": True,
657
+ "session_token": session_token,
658
+ "user_id": user_id,
659
+ "message": "Login successful"
660
+ }
661
+ except Exception as e:
662
+ return {
663
+ "success": False,
664
+ "error": str(e)
665
+ }
666
+
667
+ def api_chat(message: str, user_id: str, session_token: str = None) -> dict:
668
+ """API endpoint for chat functionality with proper user session"""
669
+ try:
670
+ response = assistant.process_message(message, user_id, session_token)
671
+ return {
672
+ "success": True,
673
+ "response": response,
674
+ "user_id": user_id
675
+ }
676
+ except Exception as e:
677
+ return {
678
+ "success": False,
679
+ "error": str(e),
680
+ "response": "Кешіріңіз, қате орын алды. Қайталап көріңіз."
681
+ }
682
+
683
+ def api_progress(user_id: str, session_token: str = None) -> dict:
684
+ """API endpoint for user progress with session validation"""
685
+ try:
686
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
687
+ return {"success": False, "error": "Invalid session"}
688
+
689
+ progress_text = assistant.get_progress_report(user_id)
690
+ progress_data = assistant.tracker.get_user_progress(user_id)
691
+
692
+ return {
693
+ "success": True,
694
+ "progress_text": progress_text,
695
+ "progress_data": progress_data,
696
+ "user_id": user_id
697
+ }
698
+ except Exception as e:
699
+ return {
700
+ "success": False,
701
+ "error": str(e)
702
+ }
703
+
704
+ def api_recommendations(user_id: str, session_token: str = None) -> dict:
705
+ """API endpoint for learning recommendations with session validation"""
706
+ try:
707
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
708
+ return {"success": False, "error": "Invalid session"}
709
+
710
+ recommendations_text = assistant.get_recommendations(user_id)
711
+ recommendations_list = assistant.tracker.get_learning_recommendations(user_id)
712
+
713
+ return {
714
+ "success": True,
715
+ "recommendations_text": recommendations_text,
716
+ "recommendations_list": recommendations_list,
717
+ "user_id": user_id
718
+ }
719
+ except Exception as e:
720
+ return {
721
+ "success": False,
722
+ "error": str(e)
723
+ }
724
+
725
+ def api_review_words(user_id: str, session_token: str = None) -> dict:
726
+ """API endpoint for words to review with session validation"""
727
+ try:
728
+ if session_token and not assistant.tracker.validate_session(user_id, session_token):
729
+ return {"success": False, "error": "Invalid session"}
730
+
731
+ review_text = assistant.get_review_words(user_id)
732
+ review_data = assistant.tracker.get_words_to_review(user_id, 10)
733
+
734
+ return {
735
+ "success": True,
736
+ "review_text": review_text,
737
+ "review_data": review_data,
738
+ "user_id": user_id
739
+ }
740
+ except Exception as e:
741
+ return {
742
+ "success": False,
743
+ "error": str(e)
744
+ }
745
+
746
+ # Gradio Interface with API Testing
747
+ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
748
+ gr.Markdown("# 🇰🇿 Personalized Kazakh Learning Assistant")
749
+ gr.Markdown("### Multi-User Chat Interface + API Endpoints for Mobile Integration")
750
+
751
+ with gr.Tab("💬 Chat Interface"):
752
+ chat_interface = gr.ChatInterface(
753
+ chat_interface,
754
+ type="messages",
755
+ examples=[
756
+ "сәлем деген не?",
757
+ "күләпара не үшін керек?",
758
+ "/progress",
759
+ "/recommendations",
760
+ "/review"
761
+ ]
762
+ )
763
+
764
+ with gr.Tab("🔌 API Testing"):
765
+ gr.Markdown("## Test API Endpoints")
766
+
767
+ with gr.Row():
768
+ with gr.Column():
769
+ user_id_input = gr.Textbox(label="User ID", value="test_user", placeholder="Enter unique user ID")
770
+ session_token_input = gr.Textbox(label="Session Token", placeholder="Session token (get from login)")
771
+ message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
772
+
773
+ with gr.Row():
774
+ login_btn = gr.Button("🔑 Test Login API")
775
+ chat_btn = gr.Button("💬 Test Chat API")
776
+ progress_btn = gr.Button("📊 Test Progress API")
777
+ recommendations_btn = gr.Button("💡 Test Recommendations API")
778
+ review_btn = gr.Button("📚 Test Review API")
779
+
780
+ api_output = gr.JSON(label="API Response")
781
+
782
+ login_btn.click(
783
+ fn=lambda uid: api_login(uid),
784
+ inputs=user_id_input,
785
+ outputs=api_output
786
+ )
787
+
788
+ chat_btn.click(
789
+ fn=lambda msg, uid, token: api_chat(msg, uid, token),
790
+ inputs=[message_input, user_id_input, session_token_input],
791
+ outputs=api_output
792
+ )
793
+
794
+ progress_btn.click(
795
+ fn=lambda uid, token: api_progress(uid, token),
796
+ inputs=[user_id_input, session_token_input],
797
+ outputs=api_output
798
+ )
799
+
800
+ recommendations_btn.click(
801
+ fn=lambda uid, token: api_recommendations(uid, token),
802
+ inputs=[user_id_input, session_token_input],
803
+ outputs=api_output
804
+ )
805
+
806
+ review_btn.click(
807
+ fn=lambda uid, token: api_review_words(uid, token),
808
+ inputs=[user_id_input, session_token_input],
809
+ outputs=api_output
810
+ )
811
+
812
+ with gr.Tab("📖 API Documentation"):
813
+ gr.Markdown("""
814
+ ## API Endpoints for Flutter Integration
815
+
816
+ ### Base URL: `https://huggingface.co/spaces/GuestUser33/kazakh-learning-api`
817
+
818
+ ### Authentication Flow:
819
+ 1. **Login** to get session token
820
+ 2. **Use session token** for subsequent API calls
821
+ 3. **Session tokens expire** after inactivity
822
+
823
+ ### Available Endpoints:
824
+
825
+ #### 1. Login API
826
+ ```
827
+ POST /api/predict
828
+ Content-Type: application/json
829
+
830
+ {
831
+ "data": ["user_id"],
832
+ "fn_index": 0
833
+ }
834
+ ```
835
+ **Response**: `{"success": true, "session_token": "uuid", "user_id": "user_id"}`
836
+
837
+ #### 2. Chat API
838
+ ```
839
+ POST /api/predict
840
+ Content-Type: application/json
841
+
842
+ {
843
+ "data": ["message", "user_id", "session_token"],
844
+ "fn_index": 1
845
+ }
846
+ ```
847
+
848
+ #### 3. Progress API
849
+ ```
850
+ POST /api/predict
851
+ Content-Type: application/json
852
+
853
+ {
854
+ "data": ["user_id", "session_token"],
855
+ "fn_index": 2
856
+ }
857
+ ```
858
+
859
+ #### 4. Recommendations API
860
+ ```
861
+ POST /api/predict
862
+ Content-Type: application/json
863
+
864
+ {
865
+ "data": ["user_id", "session_token"],
866
+ "fn_index": 3
867
+ }
868
+ ```
869
+
870
+ #### 5. Review Words API
871
+ ```
872
+ POST /api/predict
873
+ Content-Type: application/json
874
+
875
+ {
876
+ "data": ["user_id", "session_token"],
877
+ "fn_index": 4
878
+ }
879
+ ```
880
+
881
+ ### Flutter Integration Example:
882
+ ```dart
883
+ class KazakhLearningAPI {
884
+ static const String baseUrl = 'https://huggingface.co/spaces/GuestUser33/kazakh-learning-api';
885
+ String? sessionToken;
886
+ String? userId;
887
+
888
+ // Login and get session token
889
+ Future<bool> login(String userId) async {
890
+ final response = await http.post(
891
+ Uri.parse('$baseUrl/api/predict'),
892
+ headers: {'Content-Type': 'application/json'},
893
+ body: jsonEncode({
894
+ 'data': [userId],
895
+ 'fn_index': 0
896
+ }),
897
+ );
898
+
899
+ if (response.statusCode == 200) {
900
+ final result = jsonDecode(response.body);
901
+ if (result['data'][0]['success']) {
902
+ this.userId = userId;
903
+ this.sessionToken = result['data'][0]['session_token'];
904
+ return true;
905
+ }
906
+ }
907
+ return false;
908
+ }
909
+
910
+ // Send chat message
911
+ Future<String?> sendMessage(String message) async {
912
+ if (sessionToken == null) return null;
913
+
914
+ final response = await http.post(
915
+ Uri.parse('$baseUrl/api/predict'),
916
+ headers: {'Content-Type': 'application/json'},
917
+ body: jsonEncode({
918
+ 'data': [message, userId, sessionToken],
919
+ 'fn_index': 1
920
+ }),
921
+ );
922
+
923
+ if (response.statusCode == 200) {
924
+ final result = jsonDecode(response.body);
925
+ if (result['data'][0]['success']) {
926
+ return result['data'][0]['response'];
927
+ }
928
+ }
929
+ return null;
930
+ }
931
+
932
+ // Get user progress
933
+ Future<Map<String, dynamic>?> getProgress() async {
934
+ if (sessionToken == null) return null;
935
+
936
+ final response = await http.post(
937
+ Uri.parse('$baseUrl/api/predict'),
938
+ headers: {'Content-Type': 'application/json'},
939
+ body: jsonEncode({
940
+ 'data': [userId, sessionToken],
941
+ 'fn_index': 2
942
+ }),
943
+ );
944
+
945
+ if (response.statusCode == 200) {
946
+ final result = jsonDecode(response.body);
947
+ if (result['data'][0]['success']) {
948
+ return result['data'][0]['progress_data'];
949
+ }
950
+ }
951
+ return null;
952
+ }
953
+ }
954
+ ```
955
+
956
+ ### Key Features:
957
+ - ✅ **Multi-User Support**: Each user has separate learning progress
958
+ - ✅ **Session Management**: Secure session tokens for authentication
959
+ - ✅ **Personalized Tracking**: Individual progress tracking per user
960
+ - ✅ **API Ready**: All endpoints ready for mobile app integration
961
+ - ✅ **Session Validation**: Automatic session validation and expiry
962
+
963
+ ### Usage Notes:
964
+ - Always call **login** first to get a session token
965
+ - Include **session_token** in all subsequent API calls
966
+ - Handle **session expiry** by re-logging in
967
+ - Use **unique user_id** for each user (could be email, username, etc.)
968
+ """)
969
+
970
+ if __name__ == "__main__":
971
  demo.launch()