Spaces:

pykara
/

py-learn-backend

Runtime error

Oviya

add statichat

0815850 3 months ago

43.6 kB

	from flask import Blueprint, request, jsonify, current_app
	import json
	import re
	import numpy as np
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import os
	import string
	import tempfile
	from datetime import datetime

	# Defer heavy optional import (whisper) to optional load so import-time does not crash app
	MODEL_NAME = "base"
	model = None
	MODEL_AVAILABLE = False
	try:
	import whisper
	try:
	model = whisper.load_model(MODEL_NAME)
	MODEL_AVAILABLE = True
	print(f"Whisper model '{MODEL_NAME}' loaded successfully")
	except Exception as ex:
	print(f"Whisper installed but failed to load model '{MODEL_NAME}': {ex}")
	model = None
	MODEL_AVAILABLE = False
	except Exception as ex:
	print(f"Whisper not available: {ex}")
	model = None
	MODEL_AVAILABLE = False

	# Add SymSpell for spell checking
	try:
	from symspellpy import SymSpell, Verbosity
	import pkg_resources
	SYMSPELL_AVAILABLE = True
	except ImportError:
	print("SymSpell not available. Please install: pip install symspellpy")
	SYMSPELL_AVAILABLE = False


	staticchat_bp = Blueprint("staticchat", __name__)

	# NOTE: Blueprints do not have a config dict. MAX_CONTENT_LENGTH must be set on the Flask app.
	# If you want to enforce max content size, set app.config["MAX_CONTENT_LENGTH"] when creating the Flask app.

	# Initialize SymSpell if available
	sym_spell = None
	if SYMSPELL_AVAILABLE:
	try:
	sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
	dictionary_path = pkg_resources.resource_filename(
	"symspellpy", "frequency_dictionary_en_82_765.txt"
	)
	bigram_path = pkg_resources.resource_filename(
	"symspellpy", "frequency_bigramdictionary_en_243_342.txt"
	)

	# Load dictionaries
	sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
	sym_spell.load_bigram_dictionary(bigram_path, term_index=0, count_index=1)
	print("SymSpell spell checker initialized successfully")
	except Exception as e:
	print(f"Failed to initialize SymSpell: {e}")
	SYMSPELL_AVAILABLE = False

	# Try to import NLTK with fallback
	try:
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer

	# Download required NLTK resources
	try:
	nltk.data.find('tokenizers/punkt')
	except LookupError:
	nltk.download('punkt', quiet=True)

	try:
	nltk.data.find('corpora/stopwords')
	except LookupError:
	nltk.download('stopwords', quiet=True)

	try:
	nltk.data.find('corpora/wordnet')
	except LookupError:
	nltk.download('wordnet', quiet=True)

	NLTK_AVAILABLE = True
	except Exception as e:
	print(f"NLTK not available, using simple text processing: {e}")
	NLTK_AVAILABLE = False

	# Enhanced Scenario configurations
	SCENARIOS = {
	"greeting": {
	"keywords": ["good morning", "good afternoon", "good evening", "hello", "hi", "hey", "greetings"],
	"message": {
	"morning": "Good morning! Let's begin our lesson on tenses. You can ask me any question about tenses",
	"afternoon": "Good afternoon! Let's begin our lesson on tenses. You can ask me any question about tenses",
	"evening": "Good evening! Let's begin our lesson on tenses. You can ask me any question about tenses",
	"general": "Hello! Welcome to the English Tenses Learning Assistant. How can I help you with tenses today?"
	},
	"audio_url": "assets/staticchat/intro.mp3",
	"video_url": "assets/staticchat/intro.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	},
	"thanks": {
	"keywords": ["thank you", "thanks", "thank you very much", "appreciate it", "thanks a lot"],
	"message": "You're welcome! Do you have any other questions?",
	"audio_url": "assets/staticchat/you_are_welcome.mp3",
	"video_url": "assets/staticchat/you_are_welcome.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	},
	"farewell": {
	"keywords": ["bye", "goodbye", "see you", "farewell", "take care", "bye bye"],
	"message": "Goodbye! Keep practicing your English tenses. Remember, practice makes perfect!",
	"audio_url": "assets/staticchat/bye.mp3",
	"video_url": "assets/staticchat/bye.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	},
	"not_available": {
	"message": "I don't have the answer for that. Let's not available in my lesson today.",
	"suggestions": [
	"Try asking about common tenses like present simple or past perfect",
	"Ask me about tense structures or examples",
	"Check if your question is specifically about English verb tenses"
	],
	"audio_url": "assets/staticchat/no_db.mp3",
	"video_url": "assets/staticchat/no_db.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	},
	"out_of_syllabus": {
	"keywords": [
	# sports
	"sports", "sport", "cricket", "ipl", "match", "score", "wicket", "runs", "bat", "bowling",
	"football", "basketball", "tennis", "hockey",
	# other non-tense topics
	"weather", "rain", "sunny", "temperature",
	"food", "pizza", "burger", "restaurant", "cooking",
	"movie", "music", "song", "artist", "film",
	"history", "science", "math", "politics", "geography", "economics", "physics",
	# general grammar (NOT tenses)
	"noun", "pronoun", "adjective", "adverb", "preposition", "conjunction",
	"punctuation", "comma", "full stop", "spelling", "vocabulary", "synonym", "antonym",
	"phonetics", "pronunciation"
	],
	"message": "That's not part of our tense lesson. Let's stay on our topic.",
	"audio_url": "assets/staticchat/out_of_topic.mp3",
	"video_url": "assets/staticchat/out_of_topic.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	},
	"not_understandable": {
	"message": "I don't understand your question. Can you ask it again more simply?",
	"suggestions": [
	"Try using simpler words",
	"Ask about specific tenses like 'What is present tense?'",
	"Ask for examples of tenses",
	"Check your spelling and grammar"
	],
	"audio_url": "assets/staticchat/not_understand.mp3",
	"video_url": "assets/staticchat/not_understand.mp4",
	"story_url": "",
	"detail_url": "",
	"example_url": "",
	"type": "scenario"
	}
	}

	# Load questions from JSON file
	def load_questions():
	try:
	with open('assets/qa.json', 'r', encoding='utf-8') as f:
	data = json.load(f)
	print(f"Loaded {len(data)} questions from qa.json")

	# Debug: Print question categories
	tense_categories = {}
	for item in data:
	q = item['question'].lower()
	if 'present' in q:
	if 'continuous' in q or 'progressive' in q:
	tense_categories['present_continuous'] = tense_categories.get('present_continuous', 0) + 1
	elif 'perfect' in q:
	tense_categories['present_perfect'] = tense_categories.get('present_perfect', 0) + 1
	elif 'simple' in q:
	tense_categories['present_simple'] = tense_categories.get('present_simple', 0) + 1
	else:
	tense_categories['present_general'] = tense_categories.get('present_general', 0) + 1

	print(f"Tense categories in database: {tense_categories}")
	return data
	except FileNotFoundError:
	print("Error: qa.json not found")
	return []
	except json.JSONDecodeError as e:
	print(f"Error parsing qa.json: {e}")
	return []

	# Spell correction function
	def correct_spelling(text):
	"""Correct spelling using SymSpell"""
	if not SYMSPELL_AVAILABLE or sym_spell is None:
	return text

	try:
	# Split into words and correct each
	words = text.split()
	corrected_words = []

	for word in words:
	if len(word) <= 2: # Don't correct very short words
	corrected_words.append(word)
	continue

	# Check if word needs correction
	suggestions = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
	if suggestions and suggestions[0].term != word:
	corrected_words.append(suggestions[0].term)
	print(f"Corrected '{word}' to '{suggestions[0].term}'")
	else:
	corrected_words.append(word)

	corrected_text = ' '.join(corrected_words)

	# Also check for common bigram errors
	bigram_suggestions = sym_spell.lookup_compound(text, max_edit_distance=2)
	if bigram_suggestions and bigram_suggestions[0].term != corrected_text:
	print(f"Bigram correction: '{text}' -> '{bigram_suggestions[0].term}'")
	return bigram_suggestions[0].term

	return corrected_text
	except Exception as e:
	print(f"Spell correction error: {e}")
	return text

	# Enhanced text preprocessing
	def preprocess_text(text):
	"""Preprocess text with spelling correction and enhanced NLP"""
	# Correct spelling first
	if SYMSPELL_AVAILABLE:
	text = correct_spelling(text)

	# Convert to lowercase
	text = text.lower()

	# Remove special characters but keep spaces
	text = re.sub(r'[^\w\s]', ' ', text)

	# Remove extra whitespace
	text = ' '.join(text.split())

	if NLTK_AVAILABLE:
	try:
	# Tokenize
	tokens = word_tokenize(text)

	# Remove stopwords
	stop_words = set(stopwords.words('english'))
	# Keep important tense-related words that might be in stopwords
	important_words = {'am', 'is', 'are', 'was', 'were', 'have', 'has', 'had',
	'do', 'does', 'did', 'will', 'shall', 'would', 'could', 'should'}
	stop_words = stop_words - important_words

	tokens = [word for word in tokens if word not in stop_words]

	# Lemmatize
	lemmatizer = WordNetLemmatizer()
	tokens = [lemmatizer.lemmatize(word, pos='v') for word in tokens] # Lemmatize as verbs

	return ' '.join(tokens)
	except Exception as e:
	print(f"Error in NLP processing: {e}")
	# Fallback to simple processing
	return text
	else:
	# Enhanced simple processing
	# Keep important tense-related words
	important_words = {'tense', 'tenses', 'present', 'past', 'future',
	'continuous', 'perfect', 'simple', 'progressive',
	'am', 'is', 'are', 'was', 'were', 'have', 'has', 'had',
	'do', 'does', 'did', 'will', 'shall', 'would', 'could', 'should'}

	# Basic stopwords to remove
	basic_stopwords = {'a', 'an', 'the', 'of', 'in', 'on', 'at', 'by', 'for',
	'with', 'about', 'against', 'between', 'into', 'through',
	'during', 'before', 'after', 'above', 'below', 'to', 'from',
	'up', 'down', 'out', 'off', 'over', 'under', 'again',
	'further', 'then', 'once', 'here', 'there', 'when', 'where',
	'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',
	'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only',
	'own', 'same', 'so', 'than', 'too', 'very', 'can', 'may',
	'might', 'must', 'ought', 'shall', 'should', 'will', 'would'}

	# Remove stopwords but keep important tense words
	words = text.split()
	filtered_words = []
	for word in words:
	if word in important_words:
	filtered_words.append(word)
	elif word not in basic_stopwords:
	filtered_words.append(word)

	return ' '.join(filtered_words)

	def detect_scenario(user_question):
	"""Detect if the user input matches any special scenario"""
	question_lower = user_question.lower().strip()

	# First, check for greetings, thanks, and farewell (these have highest priority)
	# Check for greetings
	for greeting_keyword in SCENARIOS["greeting"]["keywords"]:
	if greeting_keyword in question_lower:
	current_hour = datetime.now().hour
	if current_hour < 12:
	greeting_type = "morning"
	elif current_hour < 17:
	greeting_type = "afternoon"
	else:
	greeting_type = "evening"

	return {
	"scenario": "greeting",
	"message": SCENARIOS["greeting"]["message"][greeting_type],
	"audio_url": SCENARIOS["greeting"]["audio_url"],
	"video_url": SCENARIOS["greeting"]["video_url"],
	"story_url": SCENARIOS["greeting"].get("story_url", ""),
	"detail_url": SCENARIOS["greeting"].get("detail_url", ""),
	"example_url": SCENARIOS["greeting"].get("example_url", "")
	}

	# Check for thanks
	for thanks_keyword in SCENARIOS["thanks"]["keywords"]:
	if thanks_keyword in question_lower:
	return {
	"scenario": "thanks",
	"message": SCENARIOS["thanks"]["message"],
	"audio_url": SCENARIOS["thanks"]["audio_url"],
	"video_url": SCENARIOS["thanks"]["video_url"],
	"story_url": SCENARIOS["thanks"].get("story_url", ""),
	"detail_url": SCENARIOS["thanks"].get("detail_url", ""),
	"example_url": SCENARIOS["thanks"].get("example_url", "")
	}

	# Check for farewell
	for farewell_keyword in SCENARIOS["farewell"]["keywords"]:
	if farewell_keyword in question_lower:
	return {
	"scenario": "farewell",
	"message": SCENARIOS["farewell"]["message"],
	"audio_url": SCENARIOS["farewell"]["audio_url"],
	"video_url": SCENARIOS["farewell"]["video_url"],
	"story_url": SCENARIOS["farewell"].get("story_url", ""),
	"detail_url": SCENARIOS["farewell"].get("detail_url", ""),
	"example_url": SCENARIOS["farewell"].get("example_url", "")
	}

	# Check for out of syllabus topics
	# Only trigger if question contains out-of-syllabus keywords AND no tense keywords
	question_words = set(question_lower.split())
	out_of_syllabus_keywords = set(SCENARIOS["out_of_syllabus"]["keywords"])

	# Check if question contains any out-of-syllabus keyword
	contains_out_of_syllabus = any(keyword in question_lower for keyword in out_of_syllabus_keywords)

	if contains_out_of_syllabus:
	# Check if it also contains tense-related keywords
	tense_keywords = ['tense', 'tenses', 'present', 'past', 'future',
	'continuous', 'perfect', 'simple', 'progressive',
	'verb', 'verbs', 'grammar', 'am', 'is', 'are',
	'was', 'were', 'have', 'has', 'had']

	contains_tense_keyword = any(tense_word in question_lower for tense_word in tense_keywords)

	# If it contains both, check if tense keyword is more dominant
	if contains_tense_keyword:
	# Count tense words vs out-of-syllabus words
	tense_count = sum(1 for word in tense_keywords if word in question_lower)
	out_count = sum(1 for word in out_of_syllabus_keywords if word in question_lower)

	# If more tense-related words, treat as tense question
	if tense_count >= out_count:
	return None

	# If no tense keywords or fewer tense words, it's out of syllabus
	return {
	"scenario": "out_of_syllabus",
	"message": SCENARIOS["out_of_syllabus"]["message"],
	"audio_url": SCENARIOS["out_of_syllabus"]["audio_url"],
	"video_url": SCENARIOS["out_of_syllabus"]["video_url"],
	"story_url": SCENARIOS["out_of_syllabus"].get("story_url", ""),
	"detail_url": SCENARIOS["out_of_syllabus"].get("detail_url", ""),
	"example_url": SCENARIOS["out_of_syllabus"].get("example_url", "")
	}

	# Check for not understandable
	# Clean text for length check
	clean_text = re.sub(r'[^\w\s]', '', question_lower)

	if len(clean_text.strip()) < 2:
	return {
	"scenario": "not_understandable",
	"message": SCENARIOS["not_understandable"]["message"],
	"audio_url": SCENARIOS["not_understandable"]["audio_url"],
	"video_url": SCENARIOS["not_understandable"]["video_url"],
	"story_url": SCENARIOS["not_understandable"].get("story_url", ""),
	"detail_url": SCENARIOS["not_understandable"].get("detail_url", ""),
	"example_url": SCENARIOS["not_understandable"].get("example_url", "")
	}

	# Check for gibberish
	words = clean_text.split()
	if words:
	avg_word_len = sum(len(word) for word in words) / len(words)
	if avg_word_len > 15: # Very long words might be gibberish
	return {
	"scenario": "not_understandable",
	"message": SCENARIOS["not_understandable"]["message"],
	"audio_url": SCENARIOS["not_understandable"]["audio_url"],
	"video_url": SCENARIOS["not_understandable"]["video_url"],
	"story_url": SCENARIOS["not_understandable"].get("story_url", ""),
	"detail_url": SCENARIOS["not_understandable"].get("detail_url", ""),
	"example_url": SCENARIOS["not_understandable"].get("example_url", "")
	}

	return None

	def check_topic_relevance(user_question):
	"""Return True only if the question is about English tenses (not general topics)."""
	q = user_question.lower().strip()

	# If the question clearly contains out-of-topic words AND does not say "tense",
	# treat it as out of syllabus.
	out_words = SCENARIOS["out_of_syllabus"].get("keywords", [])
	if any(re.search(rf"\b{re.escape(w)}\b", q) for w in out_words):
	if not re.search(r"\btense(s)?\b", q):
	return False

	# Strong tense intent words
	if re.search(r"\btense(s)?\b", q):
	return True

	# Common tense names (phrases)
	tense_phrases = [
	"present simple", "past simple", "future simple",
	"present continuous", "past continuous", "future continuous",
	"present perfect", "past perfect", "future perfect",
	"present perfect continuous", "past perfect continuous", "future perfect continuous",
	]
	if any(p in q for p in tense_phrases):
	return True

	# If user mentions time-words + aspect-words together, likely a tense question
	time_words = ["present", "past", "future"]
	aspect_words = ["simple", "continuous", "perfect", "progressive"]
	if any(re.search(rf"\b{w}\b", q) for w in time_words) and any(re.search(rf"\b{w}\b", q) for w in aspect_words):
	return True

	# If user asks usage/rules/structure about helping verbs, allow it (still tense-related)
	helpers = ["am", "is", "are", "was", "were", "have", "has", "had", "do", "does", "did", "will", "shall", "would", "could", "should"]
	intent_words = ["use", "using", "when", "rule", "rules", "structure", "form", "difference", "between", "meaning", "example", "examples"]
	if any(re.search(rf"\b{h}\b", q) for h in helpers) and any(re.search(rf"\b{i}\b", q) for i in intent_words):
	return True

	# Otherwise, not a tense question
	return False

	# Initialize questions data
	questions_data = load_questions()
	question_texts = [item['question'] for item in questions_data]
	preprocessed_questions = [preprocess_text(q) for q in question_texts]

	# Initialize TF-IDF vectorizer
	vectorizer = TfidfVectorizer(ngram_range=(1, 2)) # Use unigrams and bigrams
	if preprocessed_questions: # Only fit if we have questions
	tfidf_matrix = vectorizer.fit_transform(preprocessed_questions)
	print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
	else:
	tfidf_matrix = None

	def calculate_similarity(user_question):
	"""Calculate similarity between user question and stored questions"""
	if not preprocessed_questions: # No questions loaded
	return np.array([])

	# Preprocess user question
	preprocessed_user_q = preprocess_text(user_question)

	# Vectorize user question
	user_vector = vectorizer.transform([preprocessed_user_q])

	# Calculate similarity scores
	similarity_scores = cosine_similarity(user_vector, tfidf_matrix)

	return similarity_scores[0]

	def keyword_match(user_question, questions):
	"""Fallback keyword matching - IMPROVED"""
	user_words = set(preprocess_text(user_question).split())
	matches = []

	for i, q_data in enumerate(questions):
	question_words = set(preprocess_text(q_data['question']).split())
	common_words = user_words.intersection(question_words)

	if common_words:
	# Calculate score based on common words and length
	score = len(common_words) / max(len(user_words), len(question_words))
	matches.append({
	'index': i,
	'score': score,
	'common_words': list(common_words)
	})

	# Sort by score
	matches.sort(key=lambda x: x['score'], reverse=True)
	return matches

	def verify_match_relevance(user_q, matched_q, matched_answer):
	"""Verify if the match is actually relevant - IMPROVED VERSION"""
	user_q_lower = user_q.lower()
	matched_q_lower = matched_q.lower()
	matched_answer_lower = matched_answer.lower()

	# Extract key terms from user question
	user_terms = set(preprocess_text(user_q).split())

	# Extract key terms from matched question
	matched_terms = set(preprocess_text(matched_q).split())

	# Check for important keywords in user question
	important_keywords = ['difference', 'compare', 'between', 'versus', 'vs',
	'how to', 'how do i', 'explain', 'when to',
	'conditional', 'subjunctive', 'passive', 'modal',
	'reported speech', 'used to', 'mixed', 'perfect']

	# Group similar question starters
	question_starters = {
	'what': ['what is', 'what are', 'what does', 'what do'],
	'how': ['how to', 'how do', 'how does'],
	'when': ['when to', 'when do', 'when does'],
	'why': ['why do', 'why does', 'why is']
	}

	# Check if user and match have similar question starters
	user_starter = None
	matched_starter = None

	for starter_type, starters in question_starters.items():
	for starter in starters:
	if starter in user_q_lower:
	user_starter = starter_type
	if starter in matched_q_lower:
	matched_starter = starter_type

	# If both are asking "what" questions, it's likely a match even if wording differs
	if user_starter and matched_starter and user_starter == matched_starter:
	# Both are the same type of question (e.g., both "what" questions)
	print(f"Both are {user_starter} questions - accepting match")
	# Continue with other checks but don't reject just because wording differs

	# Check for important keywords that MUST be in the answer
	must_have_keywords = []
	for keyword in important_keywords:
	if keyword in user_q_lower:
	must_have_keywords.append(keyword)

	# If user asks for differences but answer doesn't compare, reject
	if 'difference' in user_q_lower or 'compare' in user_q_lower or 'versus' in user_q_lower:
	if not ('difference' in matched_answer_lower or 'compare' in matched_answer_lower or 'vs' in matched_answer_lower):
	print("User asked for differences but answer doesn't compare - rejecting")
	return False

	# If user asks "how to" but answer is just definition
	if ('how to' in user_q_lower or 'how do' in user_q_lower) and 'how' not in matched_answer_lower.lower():
	# Check if answer contains instructions/steps
	instruction_words = ['step', 'first', 'second', 'then', 'next', 'finally', 'process']
	if not any(word in matched_answer_lower for word in instruction_words):
	print("User asked 'how to' but answer is not instructional - rejecting")
	return False

	# Check if the match is just generic when user asks for specific
	generic_questions = ['what is', 'what are', 'what does', 'what do']
	specific_questions = ['difference between', 'how to use', 'when to use',
	'compare', 'explain the difference', 'give example of']

	user_is_specific = any(phrase in user_q_lower for phrase in specific_questions)
	match_is_generic = any(phrase in matched_q_lower for phrase in generic_questions)

	if user_is_specific and match_is_generic:
	# Check if the generic answer actually addresses the specific question
	user_specific_terms = []
	for phrase in specific_questions:
	if phrase in user_q_lower:
	# Get the terms after the phrase
	idx = user_q_lower.find(phrase) + len(phrase)
	user_specific_terms = user_q_lower[idx:].strip().split()[:3]
	break

	if user_specific_terms:
	# Check if these specific terms are in the answer
	if not any(term in matched_answer_lower for term in user_specific_terms if len(term) > 2):
	print("User asked specific, match is generic - likely wrong")
	return False

	# Check for core topic overlap
	user_words = set(user_q_lower.split())
	matched_words = set(matched_q_lower.split())
	common_core = user_words.intersection(matched_words)

	# Remove common stopwords
	stopwords_set = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
	common_core = {word for word in common_core if word not in stopwords_set and len(word) > 2}

	if len(common_core) >= 2: # At least 2 meaningful words in common
	print(f"Common core words: {common_core} - accepting match")
	return True

	# If TF-IDF score was high and we got here, it's probably OK
	return True

	def verify_tense_specificity(user_q, matched_q, matched_answer):
	"""Ensure we return the correct specificity for tense questions"""
	user_q_lower = user_q.lower()
	matched_q_lower = matched_q.lower()

	# Check if user is asking about general tense vs specific tense
	if 'present tense' in user_q_lower and ('continuous' not in user_q_lower and 'perfect' not in user_q_lower):
	# User is asking about present tense in general
	if 'present continuous' in matched_q_lower or 'present perfect' in matched_q_lower:
	# They got a specific tense instead of general
	# Check if we have a general present tense question
	for i, q_data in enumerate(questions_data):
	q_text = q_data['question'].lower()
	if 'present tense' in q_text and 'continuous' not in q_text and 'perfect' not in q_text:
	return i # Return index of general present tense

	elif 'past tense' in user_q_lower and ('continuous' not in user_q_lower and 'perfect' not in user_q_lower):
	if 'past continuous' in matched_q_lower or 'past perfect' in matched_q_lower:
	for i, q_data in enumerate(questions_data):
	q_text = q_data['question'].lower()
	if 'past tense' in q_text and 'continuous' not in q_text and 'perfect' not in q_text:
	return i

	elif 'future tense' in user_q_lower and ('continuous' not in user_q_lower and 'perfect' not in user_q_lower):
	if 'future continuous' in matched_q_lower or 'future perfect' in matched_q_lower:
	for i, q_data in enumerate(questions_data):
	q_text = q_data['question'].lower()
	if 'future tense' in q_text and 'continuous' not in q_text and 'perfect' not in q_text:
	return i

	return None # No need to override

	@staticchat_bp.route('/search', methods=['POST'])
	def search_question():
	try:
	data = request.get_json()
	original_question = data.get('question', '').strip()

	if not original_question:
	return jsonify({
	'success': False,
	'message': 'Please provide a question'
	}), 400

	print(f"\n=== Processing: '{original_question}' ===")

	# First, check for special scenarios
	scenario_result = detect_scenario(original_question)
	if scenario_result:
	print(f"Detected scenario: {scenario_result['scenario']}") # Debug log
	return jsonify({
	'success': True,
	'scenario': scenario_result['scenario'],
	'message': scenario_result['message'],
	'audio_url': scenario_result.get('audio_url', ''),
	'video_url': scenario_result.get('video_url', ''),
	'story_url': scenario_result.get('story_url', ''),
	'detail_url': scenario_result.get('detail_url', ''),
	'example_url': scenario_result.get('example_url', ''),
	'user_question': original_question,
	'matching_method': 'scenario'
	})

	print("No scenario detected, checking topic relevance...") # Debug log

	# Check if question is related to tenses
	is_topic_relevant = check_topic_relevance(original_question)
	print(f"Topic relevant: {is_topic_relevant}") # Debug log

	if not is_topic_relevant:
	# If not relevant and not caught by out_of_syllabus scenario
	return jsonify({
	'success': True,
	'scenario': 'out_of_syllabus',
	'message': SCENARIOS['out_of_syllabus']['message'],
	'audio_url': SCENARIOS['out_of_syllabus']['audio_url'],
	'video_url': SCENARIOS['out_of_syllabus']['video_url'],
	'story_url': SCENARIOS['out_of_syllabus'].get('story_url', ''),
	'detail_url': SCENARIOS['out_of_syllabus'].get('detail_url', ''),
	'example_url': SCENARIOS['out_of_syllabus'].get('example_url', ''),
	'user_question': original_question,
	'matching_method': 'scenario'
	})

	# Calculate similarity if we have questions
	if not preprocessed_questions:
	return jsonify({
	'success': True,
	'scenario': 'not_available',
	'message': SCENARIOS['not_available']['message'],
	'suggestions': SCENARIOS['not_available']['suggestions'],
	'audio_url': SCENARIOS['not_available']['audio_url'],
	'video_url': SCENARIOS['not_available']['video_url'],
	'story_url': SCENARIOS['not_available'].get('story_url', ''),
	'detail_url': SCENARIOS['not_available'].get('detail_url', ''),
	'example_url': SCENARIOS['not_available'].get('example_url', ''),
	'user_question': original_question,
	'matching_method': 'scenario'
	})

	similarity_scores = calculate_similarity(original_question)

	if len(similarity_scores) == 0: # No questions loaded
	return jsonify({
	'success': True,
	'scenario': 'not_available',
	'message': SCENARIOS['not_available']['message'],
	'suggestions': SCENARIOS['not_available']['suggestions'],
	'audio_url': SCENARIOS['not_available']['audio_url'],
	'video_url': SCENARIOS['not_available']['video_url'],
	'story_url': SCENARIOS['not_available'].get('story_url', ''),
	'detail_url': SCENARIOS['not_available'].get('detail_url', ''),
	'example_url': SCENARIOS['not_available'].get('example_url', ''),
	'user_question': original_question,
	'matching_method': 'scenario'
	})

	# Get the best match
	best_match_idx = similarity_scores.argmax()
	best_score = similarity_scores[best_match_idx]

	print(f"Best TF-IDF score: {best_score:.3f}") # Debug log
	print(f"Matched to question #{best_match_idx + 1}: {questions_data[best_match_idx]['question']}") # Debug log

	# Check if we need to override for tense specificity
	override_idx = verify_tense_specificity(
	original_question,
	questions_data[best_match_idx]['question'],
	questions_data[best_match_idx]['answer']
	)

	if override_idx is not None:
	best_match_idx = override_idx
	best_score = 0.9 # Set high score for exact match
	print(f"Overriding to general tense question: {questions_data[best_match_idx]['question']}")

	# Set higher threshold for matching - INCREASED to prevent wrong matches
	tfidf_threshold = 0.35 # Increased from 0.2 to 0.35
	keyword_threshold = 0.25 # Increased from 0.1 to 0.25

	if best_score > tfidf_threshold:
	# Verify the match is actually relevant
	matched_question = questions_data[best_match_idx]
	is_relevant = verify_match_relevance(original_question,
	matched_question['question'],
	matched_question['answer'])

	if is_relevant:
	# Good match found with TF-IDF
	return jsonify({
	'success': True,
	'matched_question': matched_question['question'],
	'answer': matched_question['answer'],
	'sno': matched_question['sno'],
	'audio_url': matched_question.get('audio_url', ''),
	'video_url': matched_question.get('video_url', ''),
	'story_url': matched_question.get('story_url', ''),
	'detail_url': matched_question.get('detail_url', ''),
	'example_url': matched_question.get('example_url', ''),
	'confidence_score': float(best_score),
	'user_question': original_question,
	'matching_method': 'tfidf',
	'spell_corrected': original_question if SYMSPELL_AVAILABLE else 'not_available'
	})
	else:
	# Match is not actually relevant
	print(f"Match verification failed. Score: {best_score:.3f}")
	# Fall through to not_available
	else:
	# Score below threshold
	print(f"Score below threshold. Score: {best_score:.3f}, Threshold: {tfidf_threshold}")

	# Try keyword matching as fallback (with higher threshold)
	keyword_matches = keyword_match(original_question, questions_data)

	print(f"Keyword matches found: {len(keyword_matches)}") # Debug log
	if keyword_matches:
	print(f"Best keyword score: {keyword_matches[0]['score']:.3f}") # Debug log

	if keyword_matches and keyword_matches[0]['score'] > keyword_threshold:
	best_keyword_match = keyword_matches[0]
	matched_question = questions_data[best_keyword_match['index']]

	# Verify keyword match too
	is_relevant = verify_match_relevance(original_question,
	matched_question['question'],
	matched_question['answer'])

	if is_relevant:
	return jsonify({
	'success': True,
	'matched_question': matched_question['question'],
	'answer': matched_question['answer'],
	'sno': matched_question['sno'],
	'audio_url': matched_question.get('audio_url', ''),
	'video_url': matched_question.get('video_url', ''),
	'story_url': matched_question.get('story_url', ''),
	'detail_url': matched_question.get('detail_url', ''),
	'example_url': matched_question.get('example_url', ''),
	'confidence_score': float(best_keyword_match['score']),
	'user_question': original_question,
	'matching_method': 'keyword',
	'common_words': best_keyword_match['common_words']
	})
	else:
	print("Keyword match verification failed")

	# No good match found but question is tense-related
	return jsonify({
	'success': True,
	'scenario': 'not_available',
	'message': SCENARIOS['not_available']['message'],
	'suggestions': SCENARIOS['not_available']['suggestions'],
	'audio_url': SCENARIOS['not_available']['audio_url'],
	'video_url': SCENARIOS['not_available']['video_url'],
	'story_url': SCENARIOS['not_available'].get('story_url', ''),
	'detail_url': SCENARIOS['not_available'].get('detail_url', ''),
	'example_url': SCENARIOS['not_available'].get('example_url', ''),
	'user_question': original_question,
	'matching_method': 'scenario',
	'debug_info': {
	'best_tfidf_score': float(best_score) if len(similarity_scores) > 0 else 0,
	'best_keyword_score': keyword_matches[0]['score'] if keyword_matches else 0
	}
	})

	except Exception as e:
	print(f"Error in search_question: {str(e)}")
	import traceback
	traceback.print_exc()
	return jsonify({
	'success': False,
	'message': f'Error processing request: {str(e)}'
	}), 500

	@staticchat_bp.route('/questions', methods=['GET'])
	def get_all_questions():
	"""Get all questions for reference"""
	try:
	questions = load_questions()
	# Return only question text for autocomplete
	question_list = [{'sno': q['sno'], 'question': q['question']} for q in questions]
	return jsonify({
	'success': True,
	'questions': question_list,
	'count': len(question_list)
	})
	except Exception as e:
	return jsonify({
	'success': False,
	'message': str(e)
	}), 500

	@staticchat_bp.route('/question/<int:sno>', methods=['GET'])
	def get_question_by_sno(sno):
	"""Get specific question by serial number"""
	try:
	questions = load_questions()
	question = next((q for q in questions if q['sno'] == sno), None)

	if question:
	return jsonify({
	'success': True,
	'question': question
	})
	else:
	return jsonify({
	'success': False,
	'message': f'Question with SNO {sno} not found'
	}), 404
	except Exception as e:
	return jsonify({
	'success': False,
	'message': str(e)
	}), 500

	@staticchat_bp.route('/suggestions', methods=['GET'])
	def get_suggestions():
	"""Get random suggestions from the database"""
	try:
	if not questions_data:
	return jsonify({
	'success': False,
	'message': "No questions available.",
	'suggestions': []
	})

	# Get parameter for number of suggestions
	count = request.args.get('count', default=5, type=int)

	# Get random questions for suggestions
	import random
	random_questions = random.sample(questions_data, min(count, len(questions_data)))
	suggestions = [q['question'] for q in random_questions]

	return jsonify({
	'success': True,
	'suggestions': suggestions,
	'count': len(suggestions)
	})
	except Exception as e:
	print(f"Error in get_suggestions: {str(e)}")
	return jsonify({
	'success': False,
	'message': str(e),
	'suggestions': []
	}), 500

	@staticchat_bp.route('/scenarios', methods=['GET'])
	def get_scenarios():
	"""Get information about available scenarios"""
	try:
	scenarios_info = {}
	for scenario_name, scenario_data in SCENARIOS.items():
	scenarios_info[scenario_name] = {
	"type": scenario_data.get("type", "scenario"),
	"has_audio": bool(scenario_data.get("audio_url")),
	"has_video": bool(scenario_data.get("video_url")),
	"keywords": scenario_data.get("keywords", [])
	}

	return jsonify({
	'success': True,
	'scenarios': scenarios_info,
	'count': len(scenarios_info)
	})
	except Exception as e:
	return jsonify({
	'success': False,
	'message': str(e)
	}), 500

	@staticchat_bp.route('/transcribe', methods=['POST'])
	def transcribe():
	if "file" not in request.files:
	return jsonify({"error": "No file field named 'file'"}), 400

	f = request.files["file"]
	if not f:
	return jsonify({"error": "No file uploaded"}), 400

	# Optional language from client: en / hi / ta
	language = request.form.get("language") # may be None

	tmp_path = None
	try:
	# Keep a suffix so ffmpeg/whisper detects it better
	suffix = os.path.splitext(f.filename or "")[1].lower()
	if not suffix:
	suffix = ".webm" # safe default for browser uploads

	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
	tmp_path = tmp.name
	f.save(tmp_path)

	# Run local whisper
	result = model.transcribe(
	tmp_path,
	language=language if language else None,
	fp16=False # CPU-only: must be False
	)

	text = (result.get("text") or "").strip()
	return jsonify({"text": text})

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	finally:
	if tmp_path and os.path.exists(tmp_path):
	try:
	os.remove(tmp_path)
	except:
	pass