Spaces:

GuestUser33
/

kazakh-learning-api

Sleeping

App Files Files Community

GuestUser33 commited on Jun 7, 2025

Commit

0653775

verified ·

1 Parent(s): 24ea7f3

Slight update

Browse files

Files changed (1) hide show

app.py +30 -856

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ from dataclasses import dataclass, asdict
 from collections import defaultdict
 import re
 import uuid
-import hashlib
 import google.generativeai as genai
 from dotenv import load_dotenv
@@ -53,7 +52,6 @@ class PersonalizedLearningTracker:
         self.init_database()
     def init_database(self):
-        """Initialize SQLite database for tracking learning progress"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -114,7 +112,6 @@ class PersonalizedLearningTracker:
         conn.close()
     def create_user_session(self, user_id: str) -> str:
-        """Create a new session token for a user"""
         session_token = str(uuid.uuid4())
         now = datetime.now().isoformat()
@@ -138,7 +135,6 @@ class PersonalizedLearningTracker:
         return session_token
     def validate_session(self, user_id: str, session_token: str) -> bool:
-        """Validate if a session is active and belongs to the user"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -153,7 +149,6 @@ class PersonalizedLearningTracker:
         return result is not None and result[0] == 1
     def update_session_activity(self, user_id: str, session_token: str):
-        """Update last activity time for a session"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -167,7 +162,6 @@ class PersonalizedLearningTracker:
         conn.close()
     def start_session(self, user_id: str) -> str:
-        """Start a new learning session"""
         session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         session = LearningSession(
             session_id=session_id,
@@ -187,7 +181,6 @@ class PersonalizedLearningTracker:
         return session_id
     def end_session(self, session_id: str):
-        """End a learning session"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         cursor.execute('''
@@ -243,7 +236,6 @@ class PersonalizedLearningTracker:
         conn.close()
     def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
-        """Update mastery level based on user performance for mastered terms"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -275,7 +267,6 @@ class PersonalizedLearningTracker:
         conn.close()
     def get_user_progress(self, user_id: str) -> Dict:
-        """Get comprehensive user progress statistics"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -321,7 +312,6 @@ class PersonalizedLearningTracker:
         }
     def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
-        """Get words that need review based on spaced repetition"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -348,7 +338,6 @@ class PersonalizedLearningTracker:
         return words
     def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
-        """Get words with is_mastered = 1, with pagination"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -375,7 +364,6 @@ class PersonalizedLearningTracker:
         return words
     def get_learning_recommendations(self, user_id: str) -> List[str]:
-        """Get personalized learning recommendations"""
         progress = self.get_user_progress(user_id)
         recommendations = []
@@ -395,7 +383,6 @@ class PersonalizedLearningTracker:
         return recommendations
     def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
-        """Get all words and idioms in learning phase, with pagination"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -432,14 +419,12 @@ class PersonalizedKazakhAssistant:
         self.user_memories = {}
     def setup_environment(self):
-        """Setup environment and configuration"""
         load_dotenv()
         genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
         self.MODEL = "gemini-1.5-flash"
         self.db_name = "vector_db"
     def setup_vectorstore(self):
-        """Setup document loading and vector store"""
         folders = glob.glob("knowledge-base/*")
         text_loader_kwargs = {'encoding': 'utf-8'}
         documents = []
@@ -485,7 +470,6 @@ class PersonalizedKazakhAssistant:
         print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
     def setup_llm(self, target_language: str = "English"):
-        """Setup Gemini model with system prompt formatted with target language"""
         self.system_prompt = f"""
         You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations. Respond in {target_language}.
@@ -503,7 +487,7 @@ class PersonalizedKazakhAssistant:
         - Always identify the main Kazakh word/idiom for progress tracking.
         - **RAG Usage**:
         - Use Retrieval-Augmented Generation (RAG) only when the query explicitly asks for explanations of specific Kazakh terms or idioms (e.g., "What does сәлем mean?") or when the context strongly suggests a need for knowledge base information (e.g., queries about specific words or idioms).
-        - When using RAG to explain terms (e.g., nouns, idioms), limit examples to 3-4 relevant ones. Do not list all or many examples or all matches from the knowledge base if not explicitly asked (only 3,4).
         - For general queries (e.g., greetings, procedural questions, or commands like /progress) or grammar-related queries (e.g., "explain me nouns"), rely on your general knowledge and do not use RAG unless the knowledge base contains relevant information.
         - Since the knowledge base contains only words and idioms, grammar explanations (e.g., about nouns, verbs) should be provided using your own knowledge, without relying on RAG, unless the query specifically involves terms in the knowledge base.
         - Be encouraging and supportive.
@@ -523,7 +507,6 @@ class PersonalizedKazakhAssistant:
         )
     def normalize_term(self, term: str) -> str:
-        """Normalize term by converting to lowercase and removing extra spaces"""
         return ' '.join(term.lower().strip().split())
     def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
@@ -536,23 +519,20 @@ class PersonalizedKazakhAssistant:
             bold_matches = re.findall(bold_pattern, response)
             for term in bold_matches:
-                normalized_term = self.normalize_term(term)  # Normalize to lowercase
                 if normalized_term in seen_terms or len(normalized_term) <= 2 or len(normalized_term) > 100:
                     print(f"Skipped term {normalized_term}: Invalid length or already seen")
                     continue
-                # Initialize category and definition
-                category = "word"  # Default to word
                 definition = ""
                 term_matched = False
-                original_term = term  # Preserve original case for tracking
-                # Check for exact match in known terms (case-insensitive)
                 for known_term in self.known_terms:
                     if normalized_term == self.normalize_term(known_term):
                         term_matched = True
-                        original_term = known_term  # Use the known term's original case
-                        # Determine category based on known term's source
                         for doc in retrieved_docs:
                             doc_type = doc.metadata.get('doc_type', '').lower()
                             if normalized_term in self.normalize_term(doc.page_content):
@@ -564,23 +544,19 @@ class PersonalizedKazakhAssistant:
                                     category = "grammar"
                                 definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
                                 break
-                        # If no document match, check term length for idiom likelihood
                         if not definition and len(known_term.split()) > 1:
                             category = "idiom"
                             definition = self.extract_clean_definition(normalized_term, "", response)
                         break
-                # If no exact match, try fuzzy matching for idioms with suffixes
                 if not term_matched:
                     for known_term in self.known_terms:
                         normalized_known = self.normalize_term(known_term)
-                        # Check if the bolded term is a close match to a known term
-                        # Allow up to 4 extra characters (e.g., grammatical endings)
                         if (normalized_term.startswith(normalized_known) and
                             len(normalized_term) <= len(normalized_known) + 4):
                             term_matched = True
-                            normalized_term = normalized_known  # Use the base known term
-                            original_term = known_term  # Use the original known term for tracking
                             for doc in retrieved_docs:
                                 if normalized_known in self.normalize_term(doc.page_content):
                                     doc_type = doc.metadata.get('doc_type', '').lower()
@@ -592,13 +568,11 @@ class PersonalizedKazakhAssistant:
                                         category = "grammar"
                                     definition = self.extract_clean_definition(normalized_known, doc.page_content, response)
                                     break
-                            # If no document match, assume idiom for multi-word terms
                             if not definition and len(known_term.split()) > 1:
                                 category = "idiom"
                                 definition = self.extract_clean_definition(normalized_known, "", response)
                             break
-                # Additional check: single-word terms from words folder should not be idioms
                 if term_matched and len(original_term.split()) == 1 and any('words' in doc.metadata.get('doc_type', '').lower() for doc in retrieved_docs):
                     category = "word"
@@ -617,10 +591,8 @@ class PersonalizedKazakhAssistant:
             return terms
     def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
-        """Extract a clean definition for a term from the knowledge base."""
         normalized_term = self.normalize_term(term)
-        # Search through retrieved documents for the term's definition
         retrieved_docs = self.vectorstore.similarity_search(term, k=5)
         for doc in retrieved_docs:
             lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
@@ -634,7 +606,6 @@ class PersonalizedKazakhAssistant:
         return f"Definition for {term}"
     def get_user_memory(self, user_id: str):
-        """Get or create conversation memory for a specific user"""
         if user_id not in self.user_memories:
             self.user_memories[user_id] = ConversationBufferMemory(
                 memory_key='chat_history',
@@ -644,7 +615,6 @@ class PersonalizedKazakhAssistant:
         return self.user_memories[user_id]
     def get_user_chain(self, user_id: str):
-        """Get or create conversation chain for a specific user"""
         memory = self.get_user_memory(user_id)
         retriever = self.vectorstore.as_retriever()
         return ConversationalRetrievalChain.from_llm(
@@ -654,8 +624,6 @@ class PersonalizedKazakhAssistant:
         )
     def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, target_language: str = "English") -> str:
-        """Process user message with proper user session management"""
         if session_token and not self.tracker.validate_session(user_id, session_token):
             return f"Session expired. Please login again in {target_language}."
@@ -665,10 +633,8 @@ class PersonalizedKazakhAssistant:
         if user_id not in self.user_sessions:
             self.user_sessions[user_id] = self.tracker.start_session(user_id)
-        # Set up LLM with the specified target language
         self.setup_llm(target_language)
-        # Handle special commands
         if message.lower().startswith('/progress'):
             return self.get_progress_report(user_id)
         elif message.lower().startswith('/recommendations'):
@@ -694,11 +660,9 @@ class PersonalizedKazakhAssistant:
         elif message.lower().startswith('/help'):
             return self.get_help_message()
-        # Retrieve relevant documents from vectorstore
         retrieved_docs = self.vectorstore.similarity_search(message, k=5)
         context = "\n".join([doc.page_content for doc in retrieved_docs])
-        # Get conversation history
         memory = self.get_user_memory(user_id)
         chat_history = ""
         for msg in memory.chat_memory.messages[-10:]:
@@ -707,7 +671,6 @@ class PersonalizedKazakhAssistant:
             elif isinstance(msg, AIMessage):
                 chat_history += f"Assistant: {msg.content}\n"
-        # Retrieve user progress from SQLite database
         progress = self.tracker.get_user_progress(user_id)
         words_to_review = self.tracker.get_words_to_review(user_id, 5)
         mastered_words = self.tracker.get_mastered_words(user_id, page=1, page_size=5)
@@ -734,7 +697,6 @@ class PersonalizedKazakhAssistant:
                                         for word in mastered_words])
             )
-        # Construct prompt with context, history, and progress
         full_prompt = f"""
         {self.system_prompt}
@@ -751,14 +713,11 @@ class PersonalizedKazakhAssistant:
         Respond in {target_language}. If explaining a Kazakh word or idiom retrieved from the context, **bold** the term (e.g., **күләпара**) in your response to highlight it. Only bold the main term being explained.
         """
-        # Call Gemini API
         response = self.llm.generate_content(full_prompt).text
-        # Add to conversation memory
         memory.chat_memory.add_user_message(message)
         memory.chat_memory.add_ai_message(response)
-        # Extract and track terms, ensuring each term is stored only once per response
         extracted_terms = self.extract_kazakh_terms(message, response)
         unique_terms = {}
         for term, category, definition in extracted_terms:
@@ -772,7 +731,6 @@ class PersonalizedKazakhAssistant:
         return response
     def get_progress_report(self, user_id: str) -> str:
-        """Generate a comprehensive progress report for specific user"""
         progress = self.tracker.get_user_progress(user_id)
         if progress['total_words'] == 0:
@@ -802,7 +760,6 @@ class PersonalizedKazakhAssistant:
         return report
     def get_recommendations(self, user_id: str) -> str:
-        """Get personalized learning recommendations for specific user"""
         recommendations = self.tracker.get_learning_recommendations(user_id)
         if not recommendations:
@@ -815,7 +772,6 @@ class PersonalizedKazakhAssistant:
         return response
     def get_review_words(self, user_id: str) -> str:
-        """Get words that need review for specific user"""
         words_to_review = self.tracker.get_words_to_review(user_id, 10)
         if not words_to_review:
@@ -833,7 +789,6 @@ class PersonalizedKazakhAssistant:
         return response
     def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
-        """Get words that have been mastered (is_mastered = 1) for specific user"""
         mastered_words = self.tracker.get_mastered_words(user_id, page, page_size)
         if not mastered_words:
@@ -851,7 +806,6 @@ class PersonalizedKazakhAssistant:
         return response
     def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
-        """Get all words and idioms in learning phase for specific user"""
         learning_words = self.tracker.get_learning_words(user_id, page, page_size)
         if not learning_words:
@@ -870,7 +824,6 @@ class PersonalizedKazakhAssistant:
         return response
     def get_new_word(self, user_id: str) -> Optional[Dict]:
-        """Retrieve a new, unshown word from the knowledge base"""
         conn = sqlite3.connect(self.tracker.db_path)
         cursor = conn.cursor()
@@ -901,7 +854,6 @@ class PersonalizedKazakhAssistant:
         return None
     def get_new_idiom(self, user_id: str) -> Optional[Dict]:
-        """Retrieve a new, unshown idiom from the knowledge base"""
         conn = sqlite3.connect(self.tracker.db_path)
         cursor = conn.cursor()
@@ -934,31 +886,30 @@ class PersonalizedKazakhAssistant:
     def get_help_message(self) -> str:
         """Get help message with available commands"""
         return """
-🎓 **Kazakh Learning Assistant Help**
-**Available Commands**:
-- `/progress` - View your detailed learning progress
-- `/recommendations` - Get personalized learning suggestions
-- `/review` - See words that need review
-- `/mastered` - See words you've mastered (mastery level > 0)
-- `/help` - Show this help message
-**How to Use**:
-- Ask about any Kazakh word or idiom for definitions and examples
-- Your progress is automatically tracked as you learn
-- Regular practice improves your mastery levels
-- Use commands to monitor your learning journey
-**Examples**:
-- "What does 'сәлем' mean?"
-- "Tell me about Kazakh idioms"
-- "How do you say 'thank you' in Kazakh?"
-Start learning by asking about any Kazakh term! 🌟
-"""
     def login_user(self, user_id: str) -> str:
-        """Create a session token for user authentication"""
         session_token = self.tracker.create_user_session(user_id)
         return session_token
@@ -966,7 +917,6 @@ Start learning by asking about any Kazakh term! 🌟
 assistant = PersonalizedKazakhAssistant()
 def chat_interface(message, history, target_language):
-    """Chat interface for Gradio"""
     try:
         web_user_id = "web_user_default"
         response = assistant.process_message(message, web_user_id, target_language=target_language)
@@ -975,7 +925,6 @@ def chat_interface(message, history, target_language):
         return f"Sorry, I encountered an error: {str(e)}. Please try again."
 def api_login(user_id: str) -> dict:
-    """API endpoint for user login/session creation"""
     try:
         session_token = assistant.login_user(user_id)
         return {
@@ -991,7 +940,6 @@ def api_login(user_id: str) -> dict:
         }
 def api_chat(message: str, user_id: str, session_token: str = None, target_language: str = "English") -> dict:
-    """API endpoint for chat functionality with proper user session"""
     try:
         response = assistant.process_message(message, user_id, session_token, target_language)
         return {
@@ -1007,7 +955,6 @@ def api_chat(message: str, user_id: str, session_token: str = None, target_langu
         }
 def api_progress(user_id: str, session_token: str = None) -> dict:
-    """API endpoint for user progress with session validation"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1028,7 +975,6 @@ def api_progress(user_id: str, session_token: str = None) -> dict:
         }
 def api_recommendations(user_id: str, session_token: str = None) -> dict:
-    """API endpoint for learning recommendations with session validation"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1049,7 +995,6 @@ def api_recommendations(user_id: str, session_token: str = None) -> dict:
         }
 def api_review_words(user_id: str, session_token: str = None) -> dict:
-    """API endpoint for words to review with session validation"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1070,7 +1015,6 @@ def api_review_words(user_id: str, session_token: str = None) -> dict:
         }
 def api_mastered_words(user_id: str, session_token: str = None) -> dict:
-    """API endpoint for mastered words with session validation"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1091,7 +1035,6 @@ def api_mastered_words(user_id: str, session_token: str = None) -> dict:
         }
 def api_new_word(user_id: str, session_token: str = None) -> dict:
-    """API endpoint to retrieve a new, unshown word"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1126,7 +1069,6 @@ def api_new_word(user_id: str, session_token: str = None) -> dict:
         }
 def api_new_idiom(user_id: str, session_token: str = None) -> dict:
-    """API endpoint to retrieve a new, unshown idiom"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1161,7 +1103,6 @@ def api_new_idiom(user_id: str, session_token: str = None) -> dict:
         }
 def api_learning_words(user_id: str, session_token: str = None, page: int = 1, page_size: int = 10) -> dict:
-    """API endpoint for all words in learning phase with pagination"""
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
@@ -1212,773 +1153,6 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
             ]
         )
-    with gr.Tab("📖 API Documentation"):
-        gr.Markdown("""
-        ## API Endpoints for Flutter Integration
-        ### Base URL: `https://huggingface.co/spaces/GuestUser33/kazakh-learning-api`
-        ### Authentication Flow:
-        1. **Login** to get a session token
-        2. **Use session token** for subsequent API calls
-        3. **Session tokens expire** after inactivity
-        ### Available Endpoints:
-        #### 1. Login API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id"],
-        "fn_index": 0
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "session_token": "uuid-string",
-            "user_id": "user_id",
-            "message": "Login successful"
-            }
-        ]
-        }
-        ```
-        #### 2. Chat API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["message", "user_id", "session_token", "English"],
-        "fn_index": 1
-        }
-        ```
-        **Parameters**:
-        - `message`: The user's query (e.g., "сәлем деген не?" or "/progress")
-        - `user_id`: Unique identifier for the user
-        - `session_token`: Session token from login (use empty string "" if no token)
-        - `target_language`: Language for responses ("English", "Kazakh", or "Russian")
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "response": "response_text",
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 3. Progress API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 2
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "progress_text": "progress_report",
-            "progress_data": {
-                "category_stats": {
-                "word": {"count": number, "average_mastery": number},
-                "idiom": {"count": number, "average_mastery": number}
-                },
-                "recent_activity": number,
-                "daily_activity": [{"date": "YYYY-MM-DD", "daily_count": number}, ...],
-                "total_words": number
-            },
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 4. Recommendations API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 3
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "recommendations_text": "recommendations",
-            "recommendations_list": ["recommendation1", "recommendation2", ...],
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 5. Review Words API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 4
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "review_text": "review_words",
-            "review_data": [
-                {
-                "word": "word",
-                "definition": "definition",
-                "category": "word|idiom",
-                "mastery_level": number,
-                "last_reviewed": "YYYY-MM-DDTHH:MM:SS",
-                "encounter_count": number
-                },
-                ...
-            ],
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 6. Mastered Words API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 5
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "mastered_text": "mastered_words",
-            "mastered_data": [
-                {
-                "word": "word",
-                "definition": "definition",
-                "category": "word|idiom",
-                "mastery_level": number,
-                "encounter_count": number
-                },
-                ...
-            ],
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 7. New Word API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 6
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "word": "new_word",
-            "definition": "definition",
-            "category": "word",
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 8. New Idiom API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token"],
-        "fn_index": 7
-        }
-        ```
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "word": "new_idiom",
-            "definition": "definition",
-            "category": "idiom",
-            "user_id": "user_id"
-            }
-        ]
-        }
-        ```
-        #### 9. Learning Words API
-        ```
-        POST /api/predict
-        Content-Type: application/json
-        {
-        "data": ["user_id", "session_token", page, page_size],
-        "fn_index": 8
-        }
-        ```
-        **Parameters**:
-        - `user_id`: Unique identifier for the user
-        - `session_token`: Session token from login (use empty string "" if no token)
-        - `page`: Page number for pagination (default: 1)
-        - `page_size`: Number of items per page (default: 10)
-        **Response**:
-        ```json
-        {
-        "data": [
-            {
-            "success": true,
-            "learning_text": "learning_words",
-            "learning_data": [
-                {
-                "word": "word",
-                "definition": "definition",
-                "category": "word|idiom",
-                "mastery_level": number,
-                "encounter_count": number
-                },
-                ...
-            ],
-            "user_id": "user_id",
-            "page": number,
-            "page_size": number
-            }
-        ]
-        }
-        ```
-        ### Flutter Integration Example:
-        ```dart
-        import 'dart:convert';
-        import 'package:http/http.dart' as http;
-        class KazakhLearningAPI {
-        static const String baseUrl = 'https://huggingface.co/spaces/GuestUser33/kazakh-learning-api';
-        String? sessionToken;
-        String? userId;
-        // Login and get session token
-        Future<bool> login(String userId) async {
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId],
-                'fn_index': 0
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                this.userId = userId;
-                this.sessionToken = result['data'][0]['session_token'];
-                return true;
-                }
-            }
-            } catch (e) {
-            print('Login error: $e');
-            }
-            return false;
-        }
-        // Send chat message
-        Future<String?> sendMessage(String message, {String targetLanguage = 'English'}) async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [message, userId, sessionToken ?? "", targetLanguage],
-                'fn_index': 1
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0]['response'];
-                }
-            }
-            } catch (e) {
-            print('Send message error: $e');
-            }
-            return null;
-        }
-        // Get user progress
-        Future<Map?> getProgress() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 2
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0]['progress_data'];
-                }
-            }
-            } catch (e) {
-            print('Get progress error: $e');
-            }
-            return null;
-        }
-        // Get recommendations
-        Future<List?> getRecommendations() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 3
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return List.from(result['data'][0]['recommendations_list'] ?? []);
-                }
-            }
-            } catch (e) {
-            print('Get recommendations error: $e');
-            }
-            return null;
-        }
-        // Get words to review
-        Future<List?> getReviewWords() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 4
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0]['review_data'];
-                }
-            }
-            } catch (e) {
-            print('Get review words error: $e');
-            }
-            return null;
-        }
-        // Get mastered words
-        Future<List?> getMasteredWords() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 5
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0]['mastered_data'];
-                }
-            }
-            } catch (e) {
-            print('Get mastered words error: $e');
-            }
-            return null;
-        }
-        // Get new word
-        Future<Map?> getNewWord() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 6
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0];
-                }
-            }
-            } catch (e) {
-            print('Get new word error: $e');
-            }
-            return null;
-        }
-        // Get new idiom
-        Future<Map?> getNewIdiom() async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? ""],
-                'fn_index': 7
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0];
-                }
-            }
-            } catch (e) {
-            print('Get new idiom error: $e');
-            }
-            return null;
-        }
-        // Get learning words
-        Future<Map?> getLearningWords({int page = 1, int pageSize = 10}) async {
-            if (userId == null) return null;
-            try {
-            final response = await http.post(
-                Uri.parse('$baseUrl/api/predict'),
-                headers: {'Content-Type': 'application/json'},
-                body: jsonEncode({
-                'data': [userId, sessionToken ?? "", page, pageSize],
-                'fn_index': 8
-                }),
-            );
-            if (response.statusCode == 200) {
-                final result = jsonDecode(response.body);
-                if (result['data'] != null && result['data'][0]['success'] == true) {
-                return result['data'][0];
-                }
-            }
-            } catch (e) {
-            print('Get learning words error: $e');
-            }
-            return null;
-        }
-        // Helper method to check if session is valid
-        bool get isLoggedIn => userId != null;
-        // Logout method
-        void logout() {
-            userId = null;
-            sessionToken = null;
-        }
-        }
-        ```
-        ### Key Features:
-        - ✅ **Multi-User Support**: Each user has separate learning progress
-        - ✅ **Session Management**: Secure session tokens for authentication
-        - ✅ **Personalized Tracking**: Individual progress tracking per user using RAG model
-        - ✅ **Multi-Language Support**: Responses in English, Kazakh, or Russian
-        - ✅ **API Ready**: All endpoints ready for mobile app integration
-        - ✅ **Session Validation**: Automatic session validation and expiry
-        ### Usage Notes:
-        - Always call **login** first to get a session token
-        - Use **empty string ""** for session_token if no token is available
-        - Specify `target_language` ("English", "Kazakh", "Russian") for responses
-        - Handle **session expiry** by re-logging in
-        - Use **unique user_id** for each user (e.g., email, username)
-        - Commands like `/progress`, `/recommendations`, `/review`, `/mastered`, `/newword`, `/newidiom`, `/learning`, `/help` are supported
-        - **Error handling** is crucial - always check for success field and handle exceptions
-        ### Error Handling:
-        All API responses include a `success` field. If `success: false`, check the `error` field for details:
-        ```json
-        {
-        "data": [
-            {
-            "success": false,
-            "error": "Error message here"
-            }
-        ]
-        }
-        """
-        )
-    with gr.Tab("🔌 API Testing"):
-        gr.Markdown("## Test API Endpoints")
-        gr.Markdown("### Use these endpoints programmatically:")
-        gr.Markdown("""
-        **API Endpoints:**
-        - **Login:** `/api/predict` with `fn_index=0`
-        - **Chat:** `/api/predict` with `fn_index=1`
-        - **Progress:** `/api/predict` with `fn_index=2`
-        - **Recommendations:** `/api/predict` with `fn_index=3`
-        - **Review Words:** `/api/predict` with `fn_index=4`
-        - **Mastered Words:** `/api/predict` with `fn_index=5`
-        - **New Word:** `/api/predict` with `fn_index=6`
-        - **New Idiom:** `/api/predict` with `fn_index=7`
-        - **Learning Words:** `/api/predict` with `fn_index=8`
-        """)
-        with gr.Row():
-            with gr.Column():
-                user_id_input = gr.Textbox(label="User ID", value="test_user", placeholder="Enter unique user ID")
-                session_token_input = gr.Textbox(label="Session Token", placeholder="Session token (get from login)")
-                message_input = gr.Textbox(label="Message", placeholder="Enter your message in Kazakh or English")
-                target_language_api = gr.Dropdown(label="Explanation Language", choices=["English", "Kazakh", "Russian"], value="English")
-                page_input = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
-                page_size_input = gr.Number(label="Page Size", value=10, minimum=1, precision=0)
-        with gr.Row():
-            login_btn = gr.Button("🔑 Test Login API")
-            chat_btn = gr.Button("💬 Test Chat API")
-            progress_btn = gr.Button("📊 Test Progress API")
-            recommendations_btn = gr.Button("💡 Test Recommendations API")
-            review_btn = gr.Button("📚 Test Review Words API")
-            mastered_btn = gr.Button("🏆 Test Mastered Words API")
-            new_word_btn = gr.Button("📝 Test New Word API")
-            new_idiom_btn = gr.Button("🎭 Test New Idiom API")
-            learning_btn = gr.Button("📖 Test Learning Words API")
-        api_output = gr.JSON(label="API Response")
-        login_interface = gr.Interface(
-            fn=api_login,
-            inputs=gr.Textbox(label="User ID"),
-            outputs=gr.JSON(label="Response"),
-            title="Login API",
-            description="Login endpoint",
-            allow_flagging="never"
-        )
-        chat_api_interface = gr.Interface(
-            fn=api_chat,
-            inputs=[
-                gr.Textbox(label="Message"),
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token"),
-                gr.Dropdown(label="Target Language", choices=["English", "Kazakh", "Russian"])
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Chat API",
-            description="Chat endpoint",
-            allow_flagging="never"
-        )
-        progress_interface = gr.Interface(
-            fn=api_progress,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Progress API",
-            description="Progress endpoint",
-            allow_flagging="never"
-        )
-        recommendations_interface = gr.Interface(
-            fn=api_recommendations,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Recommendations API",
-            description="Recommendations endpoint",
-            allow_flagging="never"
-        )
-        review_interface = gr.Interface(
-            fn=api_review_words,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Review Words API",
-            description="Review words endpoint",
-            allow_flagging="never"
-        )
-        mastered_interface = gr.Interface(
-            fn=api_mastered_words,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Mastered Words API",
-            description="Mastered words endpoint",
-            allow_flagging="never"
-        )
-        new_word_interface = gr.Interface(
-            fn=api_new_word,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="New Word API",
-            description="New word endpoint",
-            allow_flagging="never"
-        )
-        new_idiom_interface = gr.Interface(
-            fn=api_new_idiom,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="New Idiom API",
-            description="New idiom endpoint",
-            allow_flagging="never"
-        )
-        learning_interface = gr.Interface(
-            fn=api_learning_words,
-            inputs=[
-                gr.Textbox(label="User ID"),
-                gr.Textbox(label="Session Token"),
-                gr.Number(label="Page Number"),
-                gr.Number(label="Page Size")
-            ],
-            outputs=gr.JSON(label="Response"),
-            title="Learning Words API",
-            description="Learning words endpoint",
-            allow_flagging="never"
-        )
-        # Connect buttons to test the APIs
-        login_btn.click(
-            fn=api_login,
-            inputs=user_id_input,
-            outputs=api_output
-        )
-        chat_btn.click(
-            fn=api_chat,
-            inputs=[message_input, user_id_input, session_token_input, target_language_api],
-            outputs=api_output
-        )
-        progress_btn.click(
-            fn=api_progress,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        recommendations_btn.click(
-            fn=api_recommendations,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        review_btn.click(
-            fn=api_review_words,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        mastered_btn.click(
-            fn=api_mastered_words,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        new_word_btn.click(
-            fn=api_new_word,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        new_idiom_btn.click(
-            fn=api_new_idiom,
-            inputs=[user_id_input, session_token_input],
-            outputs=api_output
-        )
-        learning_btn.click(
-            fn=api_learning_words,
-            inputs=[user_id_input, session_token_input, page_input, page_size_input],
-            outputs=api_output
-        )
 if __name__ == "__main__":
     demo.launch(
         show_api=True,

 from collections import defaultdict
 import re
 import uuid
 import google.generativeai as genai
 from dotenv import load_dotenv
         self.init_database()
     def init_database(self):
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         conn.close()
     def create_user_session(self, user_id: str) -> str:
         session_token = str(uuid.uuid4())
         now = datetime.now().isoformat()
         return session_token
     def validate_session(self, user_id: str, session_token: str) -> bool:
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         return result is not None and result[0] == 1
     def update_session_activity(self, user_id: str, session_token: str):
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         conn.close()
     def start_session(self, user_id: str) -> str:
         session_id = f"{user_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         session = LearningSession(
             session_id=session_id,
         return session_id
     def end_session(self, session_id: str):
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         cursor.execute('''
         conn.close()
     def update_mastery_level(self, user_id: str, word: str, category: str, correct: bool):
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         conn.close()
     def get_user_progress(self, user_id: str) -> Dict:
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         }
     def get_words_to_review(self, user_id: str, limit: int = 10) -> List[Dict]:
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         return words
     def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         return words
     def get_learning_recommendations(self, user_id: str) -> List[str]:
         progress = self.get_user_progress(user_id)
         recommendations = []
         return recommendations
     def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> List[Dict]:
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         self.user_memories = {}
     def setup_environment(self):
         load_dotenv()
         genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
         self.MODEL = "gemini-1.5-flash"
         self.db_name = "vector_db"
     def setup_vectorstore(self):
         folders = glob.glob("knowledge-base/*")
         text_loader_kwargs = {'encoding': 'utf-8'}
         documents = []
         print(f"Vectorstore created with {self.vectorstore._collection.count()} documents")
     def setup_llm(self, target_language: str = "English"):
         self.system_prompt = f"""
         You are a personalized Kazakh language learning assistant with access to a comprehensive knowledge base and user learning history. Your role is to help users learn Kazakh words and idioms while tracking their progress and providing personalized recommendations. Respond in {target_language}.
         - Always identify the main Kazakh word/idiom for progress tracking.
         - **RAG Usage**:
         - Use Retrieval-Augmented Generation (RAG) only when the query explicitly asks for explanations of specific Kazakh terms or idioms (e.g., "What does сәлем mean?") or when the context strongly suggests a need for knowledge base information (e.g., queries about specific words or idioms).
+        - When using RAG, limit the response to explaining 1-2 distinct terms at most, unless the user explicitly asks for multiple terms (e.g., "List several idioms"). For each term, provide 3-4 relevant examples. Do not list all or many terms or matches from the knowledge base.
         - For general queries (e.g., greetings, procedural questions, or commands like /progress) or grammar-related queries (e.g., "explain me nouns"), rely on your general knowledge and do not use RAG unless the knowledge base contains relevant information.
         - Since the knowledge base contains only words and idioms, grammar explanations (e.g., about nouns, verbs) should be provided using your own knowledge, without relying on RAG, unless the query specifically involves terms in the knowledge base.
         - Be encouraging and supportive.
         )
     def normalize_term(self, term: str) -> str:
         return ' '.join(term.lower().strip().split())
     def extract_kazakh_terms(self, message: str, response: str) -> List[Tuple[str, str, str]]:
             bold_matches = re.findall(bold_pattern, response)
             for term in bold_matches:
+                normalized_term = self.normalize_term(term)
                 if normalized_term in seen_terms or len(normalized_term) <= 2 or len(normalized_term) > 100:
                     print(f"Skipped term {normalized_term}: Invalid length or already seen")
                     continue
+                category = "word"
                 definition = ""
                 term_matched = False
+                original_term = term
                 for known_term in self.known_terms:
                     if normalized_term == self.normalize_term(known_term):
                         term_matched = True
+                        original_term = known_term
                         for doc in retrieved_docs:
                             doc_type = doc.metadata.get('doc_type', '').lower()
                             if normalized_term in self.normalize_term(doc.page_content):
                                     category = "grammar"
                                 definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
                                 break
                         if not definition and len(known_term.split()) > 1:
                             category = "idiom"
                             definition = self.extract_clean_definition(normalized_term, "", response)
                         break
                 if not term_matched:
                     for known_term in self.known_terms:
                         normalized_known = self.normalize_term(known_term)
                         if (normalized_term.startswith(normalized_known) and
                             len(normalized_term) <= len(normalized_known) + 4):
                             term_matched = True
+                            normalized_term = normalized_known
+                            original_term = known_term
                             for doc in retrieved_docs:
                                 if normalized_known in self.normalize_term(doc.page_content):
                                     doc_type = doc.metadata.get('doc_type', '').lower()
                                         category = "grammar"
                                     definition = self.extract_clean_definition(normalized_known, doc.page_content, response)
                                     break
                             if not definition and len(known_term.split()) > 1:
                                 category = "idiom"
                                 definition = self.extract_clean_definition(normalized_known, "", response)
                             break
                 if term_matched and len(original_term.split()) == 1 and any('words' in doc.metadata.get('doc_type', '').lower() for doc in retrieved_docs):
                     category = "word"
             return terms
     def extract_clean_definition(self, term: str, doc_content: str, response: str) -> str:
         normalized_term = self.normalize_term(term)
         retrieved_docs = self.vectorstore.similarity_search(term, k=5)
         for doc in retrieved_docs:
             lines = doc.page_content.replace('\r\n', '\n').replace('\r', '\n').split('\n')
         return f"Definition for {term}"
     def get_user_memory(self, user_id: str):
         if user_id not in self.user_memories:
             self.user_memories[user_id] = ConversationBufferMemory(
                 memory_key='chat_history',
         return self.user_memories[user_id]
     def get_user_chain(self, user_id: str):
         memory = self.get_user_memory(user_id)
         retriever = self.vectorstore.as_retriever()
         return ConversationalRetrievalChain.from_llm(
         )
     def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, target_language: str = "English") -> str:
         if session_token and not self.tracker.validate_session(user_id, session_token):
             return f"Session expired. Please login again in {target_language}."
         if user_id not in self.user_sessions:
             self.user_sessions[user_id] = self.tracker.start_session(user_id)
         self.setup_llm(target_language)
         if message.lower().startswith('/progress'):
             return self.get_progress_report(user_id)
         elif message.lower().startswith('/recommendations'):
         elif message.lower().startswith('/help'):
             return self.get_help_message()
         retrieved_docs = self.vectorstore.similarity_search(message, k=5)
         context = "\n".join([doc.page_content for doc in retrieved_docs])
         memory = self.get_user_memory(user_id)
         chat_history = ""
         for msg in memory.chat_memory.messages[-10:]:
             elif isinstance(msg, AIMessage):
                 chat_history += f"Assistant: {msg.content}\n"
         progress = self.tracker.get_user_progress(user_id)
         words_to_review = self.tracker.get_words_to_review(user_id, 5)
         mastered_words = self.tracker.get_mastered_words(user_id, page=1, page_size=5)
                                         for word in mastered_words])
             )
         full_prompt = f"""
         {self.system_prompt}
         Respond in {target_language}. If explaining a Kazakh word or idiom retrieved from the context, **bold** the term (e.g., **күләпара**) in your response to highlight it. Only bold the main term being explained.
         """
         response = self.llm.generate_content(full_prompt).text
         memory.chat_memory.add_user_message(message)
         memory.chat_memory.add_ai_message(response)
         extracted_terms = self.extract_kazakh_terms(message, response)
         unique_terms = {}
         for term, category, definition in extracted_terms:
         return response
     def get_progress_report(self, user_id: str) -> str:
         progress = self.tracker.get_user_progress(user_id)
         if progress['total_words'] == 0:
         return report
     def get_recommendations(self, user_id: str) -> str:
         recommendations = self.tracker.get_learning_recommendations(user_id)
         if not recommendations:
         return response
     def get_review_words(self, user_id: str) -> str:
         words_to_review = self.tracker.get_words_to_review(user_id, 10)
         if not words_to_review:
         return response
     def get_mastered_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
         mastered_words = self.tracker.get_mastered_words(user_id, page, page_size)
         if not mastered_words:
         return response
     def get_learning_words(self, user_id: str, page: int = 1, page_size: int = 10) -> str:
         learning_words = self.tracker.get_learning_words(user_id, page, page_size)
         if not learning_words:
         return response
     def get_new_word(self, user_id: str) -> Optional[Dict]:
         conn = sqlite3.connect(self.tracker.db_path)
         cursor = conn.cursor()
         return None
     def get_new_idiom(self, user_id: str) -> Optional[Dict]:
         conn = sqlite3.connect(self.tracker.db_path)
         cursor = conn.cursor()
     def get_help_message(self) -> str:
         """Get help message with available commands"""
         return """
+        🎓 **Kazakh Learning Assistant Help**
+        **Available Commands**:
+        - `/progress` - View your detailed learning progress
+        - `/recommendations` - Get personalized learning suggestions
+        - `/review` - See words that need review
+        - `/mastered` - See words you've mastered (mastery level > 0)
+        - `/help` - Show this help message
+        **How to Use**:
+        - Ask about any Kazakh word or idiom for definitions and examples
+        - Your progress is automatically tracked as you learn
+        - Regular practice improves your mastery levels
+        - Use commands to monitor your learning journey
+        **Examples**:
+        - "What does 'сәлем' mean?"
+        - "Tell me about Kazakh idioms"
+        - "How do you say 'thank you' in Kazakh?"
+        Start learning by asking about any Kazakh term! 🌟
+        """
     def login_user(self, user_id: str) -> str:
         session_token = self.tracker.create_user_session(user_id)
         return session_token
 assistant = PersonalizedKazakhAssistant()
 def chat_interface(message, history, target_language):
     try:
         web_user_id = "web_user_default"
         response = assistant.process_message(message, web_user_id, target_language=target_language)
         return f"Sorry, I encountered an error: {str(e)}. Please try again."
 def api_login(user_id: str) -> dict:
     try:
         session_token = assistant.login_user(user_id)
         return {
         }
 def api_chat(message: str, user_id: str, session_token: str = None, target_language: str = "English") -> dict:
     try:
         response = assistant.process_message(message, user_id, session_token, target_language)
         return {
         }
 def api_progress(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_recommendations(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_review_words(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_mastered_words(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_new_word(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_new_idiom(user_id: str, session_token: str = None) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
         }
 def api_learning_words(user_id: str, session_token: str = None, page: int = 1, page_size: int = 10) -> dict:
     try:
         if session_token and not assistant.tracker.validate_session(user_id, session_token):
             return {"success": False, "error": "Invalid session"}
             ]
         )
 if __name__ == "__main__":
     demo.launch(
         show_api=True,