GuestUser33 commited on
Commit
51a5da5
·
verified ·
1 Parent(s): 45ea6b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -89
app.py CHANGED
@@ -527,7 +527,7 @@ class PersonalizedKazakhAssistant:
527
 
528
  try:
529
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
530
- bold_pattern = r'\*\*([А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+(?:[\s-][А-Яа-яӘәҒғҚқҢңӨөҰұҮүҺһІі]+)*)\*\*'
531
  bold_matches = re.findall(bold_pattern, response)
532
 
533
  for term in bold_matches:
@@ -536,31 +536,72 @@ class PersonalizedKazakhAssistant:
536
  print(f"Skipped term {normalized_term}: Invalid length or already seen")
537
  continue
538
 
539
- if normalized_term in self.known_terms:
540
- category = "word" # Default to word
541
- definition = ""
542
- for doc in retrieved_docs:
543
- if normalized_term in self.normalize_term(doc.page_content):
 
 
 
 
 
 
 
 
 
 
 
544
  doc_type = doc.metadata.get('doc_type', '').lower()
545
- # Prioritize folder structure
546
- if 'idioms' in doc_type.lower() or 'тіркес' in doc_type.lower():
547
- category = "idiom"
548
- elif 'words' in doc_type.lower():
549
- category = "word" # Ensure terms from words folder are words
550
- elif 'grammar' in doc_type.lower():
551
- category = "grammar"
552
- definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  break
554
- # Additional check: single-word terms from words folder should not be idioms
555
- if len(term.split()) == 1 and any('words' in doc.metadata.get('doc_type', '').lower() for doc in retrieved_docs):
556
- category = "word"
557
- if not definition:
558
- definition = self.extract_clean_definition(normalized_term, "", response)
559
-
560
- if definition:
561
- terms.append((term, category, definition))
562
- seen_terms.add(normalized_term)
563
- print(f"Added bolded term: {term}, category: {category}, definition: {definition}")
 
 
 
 
 
 
 
564
 
565
  return terms
566
 
@@ -605,8 +646,8 @@ class PersonalizedKazakhAssistant:
605
  memory=memory
606
  )
607
 
608
- def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, use_direct_gemini: bool = False, target_language: str = "English") -> str:
609
- """Process user message with proper user session management and toggle for direct Gemini"""
610
 
611
  if session_token and not self.tracker.validate_session(user_id, session_token):
612
  return f"Session expired. Please login again in {target_language}."
@@ -646,9 +687,6 @@ class PersonalizedKazakhAssistant:
646
  elif message.lower().startswith('/help'):
647
  return self.get_help_message()
648
 
649
- if use_direct_gemini:
650
- return self.process_direct_gemini(message, user_id, target_language)
651
-
652
  # Retrieve relevant documents from vectorstore
653
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
654
  context = "\n".join([doc.page_content for doc in retrieved_docs])
@@ -686,7 +724,7 @@ class PersonalizedKazakhAssistant:
686
  words_to_review=''.join([f" - {word['word']} (Category: {word['category']}, Mastery: {word['mastery_level']}/5, Encounters: {word['encounter_count']})\n"
687
  for word in words_to_review]),
688
  mastered_words=''.join([f" - {word['word']} (Category: {word['category']}, Mastery: {word['mastery_level']}/5, Encounters: {word['encounter_count']})\n"
689
- for word in mastered_words])
690
  )
691
 
692
  # Construct prompt with context, history, and progress
@@ -917,56 +955,14 @@ Start learning by asking about any Kazakh term! 🌟
917
  session_token = self.tracker.create_user_session(user_id)
918
  return session_token
919
 
920
- def process_direct_gemini(self, message: str, user_id: str, target_language: str = "English") -> str:
921
- """Process message using direct Gemini with conversation memory for context."""
922
- try:
923
- memory = self.get_user_memory(user_id)
924
- chat_history = ""
925
- for msg in memory.chat_memory.messages[-10:]:
926
- if isinstance(msg, HumanMessage):
927
- chat_history += f"User: {msg.content}\n"
928
- elif isinstance(msg, AIMessage):
929
- chat_history += f"Assistant: {msg.content}\n"
930
-
931
- direct_prompt = """
932
- You are a friendly and supportive Kazakh language learning assistant. Your role is to help users learn Kazakh vocabulary, grammar, and idioms in a clear, concise, and engaging way. Respond in the user's primary language, inferred from their input, unless a specific language (English, Kazakh, or Russian) is requested. Provide practical examples and explanations tailored to language learners. Keep responses concise (under 200 words) and encouraging. Use your internal knowledge to ensure accuracy and relevance, focusing exclusively on Kazakh language learning.
933
-
934
- Previous conversation context:
935
- {chat_history}
936
- """
937
-
938
- if target_language != "English" and not any(
939
- keyword in message.lower() for keyword in ['kazakh', 'қазақша', 'қазақ тілінде', 'russian', 'русский', 'орысша']
940
- ):
941
- modified_message = f"Explain in {target_language}: {message}"
942
- else:
943
- modified_message = message
944
-
945
- direct_model = genai.GenerativeModel(
946
- model_name=self.MODEL,
947
- system_instruction=direct_prompt.format(chat_history=chat_history),
948
- generation_config={
949
- "temperature": 0.7,
950
- "max_output_tokens": 200
951
- }
952
- )
953
-
954
- response = direct_model.generate_content(modified_message).text
955
-
956
- memory.chat_memory.add_user_message(message)
957
- memory.chat_memory.add_ai_message(response)
958
-
959
- return response
960
- except Exception as e:
961
- return f"Error processing direct Gemini request: {str(e)}"
962
 
963
  assistant = PersonalizedKazakhAssistant()
964
 
965
- def chat_interface(message, history, use_direct_gemini, target_language):
966
- """Chat interface for Gradio with toggle for direct Gemini mode"""
967
  try:
968
  web_user_id = "web_user_default"
969
- response = assistant.process_message(message, web_user_id, use_direct_gemini=use_direct_gemini, target_language=target_language)
970
  return response
971
  except Exception as e:
972
  return f"Sorry, I encountered an error: {str(e)}. Please try again."
@@ -987,10 +983,10 @@ def api_login(user_id: str) -> dict:
987
  "error": str(e)
988
  }
989
 
990
- def api_chat(message: str, user_id: str, session_token: str = None, use_direct_gemini: bool = False, target_language: str = "English") -> dict:
991
- """API endpoint for chat functionality with proper user session and direct Gemini toggle"""
992
  try:
993
- response = assistant.process_message(message, user_id, session_token, use_direct_gemini, target_language)
994
  return {
995
  "success": True,
996
  "response": response,
@@ -1186,9 +1182,8 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1186
  gr.Markdown("### Multi-User Chat Interface + API Endpoints for Mobile Integration")
1187
 
1188
  with gr.Tab("💬 Chat Interface"):
1189
- gr.Markdown("Toggle **Direct Gemini Mode** to learn Kazakh grammar without RAG. Select the language for explanations.")
1190
  with gr.Row():
1191
- use_direct_gemini = gr.Checkbox(label="Direct Gemini Mode (No RAG/Tracking)", value=False)
1192
  target_language = gr.Dropdown(
1193
  label="Explanation Language",
1194
  choices=["English", "Kazakh", "Russian"],
@@ -1196,17 +1191,17 @@ with gr.Blocks(title="🇰🇿 Kazakh Learning API") as demo:
1196
  )
1197
  chat_interface_component = gr.ChatInterface(
1198
  fn=chat_interface,
1199
- additional_inputs=[use_direct_gemini, target_language],
1200
  type="messages",
1201
  examples=[
1202
- ["сәлем деген не?", False, "English"],
1203
- ["күләпара не үшін керек?", False, "English"],
1204
- ["/progress", False, "English"],
1205
- ["/recommendations", False, "English"],
1206
- ["/review", False, "English"],
1207
- ["/mastered", False, "English"],
1208
- ["Explain Kazakh noun cases in Russian", True, "Russian"],
1209
- ["Teach me Kazakh verb conjugation in English", True, "English"]
1210
  ]
1211
  )
1212
 
 
527
 
528
  try:
529
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
530
+ bold_pattern = r'\*\*([^\*]+)\*\*' # Match any bolded text
531
  bold_matches = re.findall(bold_pattern, response)
532
 
533
  for term in bold_matches:
 
536
  print(f"Skipped term {normalized_term}: Invalid length or already seen")
537
  continue
538
 
539
+ # Initialize category and definition
540
+ category = "word" # Default to word
541
+ definition = ""
542
+ term_matched = False
543
+ original_term = term # Preserve original case for tracking
544
+
545
+ # Check if term is multi-word (likely an idiom)
546
+ if len(term.split()) > 1:
547
+ category = "idiom"
548
+
549
+ # Check for exact match in known terms
550
+ for known_term in self.known_terms:
551
+ if normalized_term == self.normalize_term(known_term):
552
+ term_matched = True
553
+ original_term = known_term
554
+ for doc in retrieved_docs:
555
  doc_type = doc.metadata.get('doc_type', '').lower()
556
+ if normalized_term in self.normalize_term(doc.page_content):
557
+ if 'idioms' in doc_type or 'тіркес' in doc_type:
558
+ category = "idiom"
559
+ elif 'words' in doc_type:
560
+ category = "word"
561
+ elif 'grammar' in doc_type:
562
+ category = "grammar"
563
+ definition = self.extract_clean_definition(normalized_term, doc.page_content, response)
564
+ break
565
+ break
566
+
567
+ # If no exact match, try fuzzy matching for idioms with suffixes
568
+ if not term_matched:
569
+ for known_term in self.known_terms:
570
+ normalized_known = self.normalize_term(known_term)
571
+ if (normalized_term.startswith(normalized_known) and
572
+ len(normalized_term) <= len(normalized_known) + 4):
573
+ term_matched = True
574
+ normalized_term = normalized_known
575
+ original_term = known_term
576
+ for doc in retrieved_docs:
577
+ if normalized_known in self.normalize_term(doc.page_content):
578
+ doc_type = doc.metadata.get('doc_type', '').lower()
579
+ if 'idioms' in doc_type or 'тіркес' in doc_type:
580
+ category = "idiom"
581
+ elif 'words' in doc_type:
582
+ category = "word"
583
+ elif 'grammar' in doc_type:
584
+ category = "grammar"
585
+ definition = self.extract_clean_definition(normalized_known, doc.page_content, response)
586
+ break
587
  break
588
+
589
+ # If term is multi-word and not matched, assume it's an idiom
590
+ if not term_matched and len(term.split()) > 1:
591
+ category = "idiom"
592
+ definition = self.extract_clean_definition(normalized_term, "", response)
593
+
594
+ # Single-word terms from words folder should be categorized as words
595
+ if term_matched and len(original_term.split()) == 1:
596
+ for doc in retrieved_docs:
597
+ if 'words' in doc.metadata.get('doc_type', '').lower():
598
+ category = "word"
599
+ break
600
+
601
+ if definition:
602
+ terms.append((original_term, category, definition))
603
+ seen_terms.add(normalized_term)
604
+ print(f"Added bolded term: {original_term}, category: {category}, definition: {definition}")
605
 
606
  return terms
607
 
 
646
  memory=memory
647
  )
648
 
649
+ def process_message(self, message: str, user_id: str = "default_user", session_token: str = None, target_language: str = "English") -> str:
650
+ """Process user message with proper user session management"""
651
 
652
  if session_token and not self.tracker.validate_session(user_id, session_token):
653
  return f"Session expired. Please login again in {target_language}."
 
687
  elif message.lower().startswith('/help'):
688
  return self.get_help_message()
689
 
 
 
 
690
  # Retrieve relevant documents from vectorstore
691
  retrieved_docs = self.vectorstore.similarity_search(message, k=5)
692
  context = "\n".join([doc.page_content for doc in retrieved_docs])
 
724
  words_to_review=''.join([f" - {word['word']} (Category: {word['category']}, Mastery: {word['mastery_level']}/5, Encounters: {word['encounter_count']})\n"
725
  for word in words_to_review]),
726
  mastered_words=''.join([f" - {word['word']} (Category: {word['category']}, Mastery: {word['mastery_level']}/5, Encounters: {word['encounter_count']})\n"
727
+ for word in mastered_words])
728
  )
729
 
730
  # Construct prompt with context, history, and progress
 
955
  session_token = self.tracker.create_user_session(user_id)
956
  return session_token
957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
 
959
  assistant = PersonalizedKazakhAssistant()
960
 
961
+ def chat_interface(message, history, target_language):
962
+ """Chat interface for Gradio"""
963
  try:
964
  web_user_id = "web_user_default"
965
+ response = assistant.process_message(message, web_user_id, target_language=target_language)
966
  return response
967
  except Exception as e:
968
  return f"Sorry, I encountered an error: {str(e)}. Please try again."
 
983
  "error": str(e)
984
  }
985
 
986
+ def api_chat(message: str, user_id: str, session_token: str = None, target_language: str = "English") -> dict:
987
+ """API endpoint for chat functionality with proper user session"""
988
  try:
989
+ response = assistant.process_message(message, user_id, session_token, target_language)
990
  return {
991
  "success": True,
992
  "response": response,
 
1182
  gr.Markdown("### Multi-User Chat Interface + API Endpoints for Mobile Integration")
1183
 
1184
  with gr.Tab("💬 Chat Interface"):
1185
+ gr.Markdown("Select the language for explanations.")
1186
  with gr.Row():
 
1187
  target_language = gr.Dropdown(
1188
  label="Explanation Language",
1189
  choices=["English", "Kazakh", "Russian"],
 
1191
  )
1192
  chat_interface_component = gr.ChatInterface(
1193
  fn=chat_interface,
1194
+ additional_inputs=[target_language],
1195
  type="messages",
1196
  examples=[
1197
+ ["сәлем деген не?", "English"],
1198
+ ["күләпара не үшін керек?", "English"],
1199
+ ["/progress", "English"],
1200
+ ["/recommendations", "English"],
1201
+ ["/review", "English"],
1202
+ ["/mastered", "English"],
1203
+ ["Explain Kazakh noun cases in Russian", "Russian"],
1204
+ ["Teach me Kazakh verb conjugation in English", "English"]
1205
  ]
1206
  )
1207