# CodeBasics FAQ System # Smart FAQ retrieval using TF-IDF and cosine similarity import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np class CodeBasicsFAQ: def __init__(self, csv_path='codebasics_faqs.csv'): """Initialize FAQ system from CSV file""" # Load FAQ data encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252'] df = None for encoding in encodings: try: df = pd.read_csv(csv_path, encoding=encoding) print(f"āœ… Loaded {len(df)} FAQs") break except: continue if df is None: raise Exception("Could not load FAQ CSV") self.questions = df['prompt'].tolist() self.answers = df['response'].tolist() # Create TF-IDF vectorizer self.vectorizer = TfidfVectorizer( lowercase=True, stop_words='english', ngram_range=(1, 2) ) # Fit on all questions self.question_vectors = self.vectorizer.fit_transform(self.questions) print(f"āœ… FAQ System ready!") def find_best_match(self, query, threshold=0.2): """Find best matching FAQ""" query_vector = self.vectorizer.transform([query]) similarities = cosine_similarity(query_vector, self.question_vectors)[0] best_idx = np.argmax(similarities) best_score = similarities[best_idx] if best_score >= threshold: return { 'question': self.questions[best_idx], 'answer': self.answers[best_idx], 'confidence': best_score } return None def answer(self, query): """Get answer for a query""" result = self.find_best_match(query) if result: return { 'status': 'success', 'confidence': f"{result['confidence']*100:.1f}%", 'matched_question': result['question'], 'answer': result['answer'] } else: return { 'status': 'no_match', 'message': 'No matching FAQ found. Try rephrasing your question.' } def search_keyword(self, keyword): """Search FAQs by keyword""" keyword_lower = keyword.lower() matches = [] for i, q in enumerate(self.questions): if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower(): matches.append({ 'question': q, 'answer': self.answers[i] }) return matches def list_all_questions(self): """Return all FAQ questions""" return self.questions # ============================================================================ # USAGE EXAMPLE # ============================================================================ if __name__ == "__main__": # Initialize faq = CodeBasicsFAQ('codebasics_faqs.csv') # Example questions test_questions = [ "Can I take this bootcamp without programming experience?", "Why should I trust Codebasics?", "What are the prerequisites?", "Do I need a laptop?" ] print("\n" + "="*70) print("TESTING FAQ SYSTEM") print("="*70 + "\n") for question in test_questions: print(f"ā“ {question}") result = faq.answer(question) if result['status'] == 'success': print(f"āœ… Match: {result['confidence']}") print(f"šŸ“ Q: {result['matched_question']}") print(f"šŸ’” A: {result['answer'][:100]}...\n") else: print(f"āŒ {result['message']}\n") # Interactive mode print("\n" + "="*70) print("INTERACTIVE MODE") print("="*70) print("Type 'quit' to exit\n") while True: user_q = input("ā“ Your question: ").strip() if user_q.lower() in ['quit', 'exit', 'q']: print("šŸ‘‹ Goodbye!") break if not user_q: continue result = faq.answer(user_q) if result['status'] == 'success': print(f"\n[Confidence: {result['confidence']}]") print(f"\nšŸ“Œ {result['matched_question']}") print(f"\n✨ {result['answer']}\n") else: print(f"\nāŒ {result['message']}\n")