File size: 4,631 Bytes
ba29aa6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# CodeBasics FAQ System
# Smart FAQ retrieval using TF-IDF and cosine similarity
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
class CodeBasicsFAQ:
def __init__(self, csv_path='codebasics_faqs.csv'):
"""Initialize FAQ system from CSV file"""
# Load FAQ data
encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']
df = None
for encoding in encodings:
try:
df = pd.read_csv(csv_path, encoding=encoding)
print(f"β
Loaded {len(df)} FAQs")
break
except:
continue
if df is None:
raise Exception("Could not load FAQ CSV")
self.questions = df['prompt'].tolist()
self.answers = df['response'].tolist()
# Create TF-IDF vectorizer
self.vectorizer = TfidfVectorizer(
lowercase=True,
stop_words='english',
ngram_range=(1, 2)
)
# Fit on all questions
self.question_vectors = self.vectorizer.fit_transform(self.questions)
print(f"β
FAQ System ready!")
def find_best_match(self, query, threshold=0.2):
"""Find best matching FAQ"""
query_vector = self.vectorizer.transform([query])
similarities = cosine_similarity(query_vector, self.question_vectors)[0]
best_idx = np.argmax(similarities)
best_score = similarities[best_idx]
if best_score >= threshold:
return {
'question': self.questions[best_idx],
'answer': self.answers[best_idx],
'confidence': best_score
}
return None
def answer(self, query):
"""Get answer for a query"""
result = self.find_best_match(query)
if result:
return {
'status': 'success',
'confidence': f"{result['confidence']*100:.1f}%",
'matched_question': result['question'],
'answer': result['answer']
}
else:
return {
'status': 'no_match',
'message': 'No matching FAQ found. Try rephrasing your question.'
}
def search_keyword(self, keyword):
"""Search FAQs by keyword"""
keyword_lower = keyword.lower()
matches = []
for i, q in enumerate(self.questions):
if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower():
matches.append({
'question': q,
'answer': self.answers[i]
})
return matches
def list_all_questions(self):
"""Return all FAQ questions"""
return self.questions
# ============================================================================
# USAGE EXAMPLE
# ============================================================================
if __name__ == "__main__":
# Initialize
faq = CodeBasicsFAQ('codebasics_faqs.csv')
# Example questions
test_questions = [
"Can I take this bootcamp without programming experience?",
"Why should I trust Codebasics?",
"What are the prerequisites?",
"Do I need a laptop?"
]
print("\n" + "="*70)
print("TESTING FAQ SYSTEM")
print("="*70 + "\n")
for question in test_questions:
print(f"β {question}")
result = faq.answer(question)
if result['status'] == 'success':
print(f"β
Match: {result['confidence']}")
print(f"π Q: {result['matched_question']}")
print(f"π‘ A: {result['answer'][:100]}...\n")
else:
print(f"β {result['message']}\n")
# Interactive mode
print("\n" + "="*70)
print("INTERACTIVE MODE")
print("="*70)
print("Type 'quit' to exit\n")
while True:
user_q = input("β Your question: ").strip()
if user_q.lower() in ['quit', 'exit', 'q']:
print("π Goodbye!")
break
if not user_q:
continue
result = faq.answer(user_q)
if result['status'] == 'success':
print(f"\n[Confidence: {result['confidence']}]")
print(f"\nπ {result['matched_question']}")
print(f"\n⨠{result['answer']}\n")
else:
print(f"\nβ {result['message']}\n")
|