good / faq_system.py
callidus's picture
Upload faq_system.py with huggingface_hub
ba29aa6 verified
# CodeBasics FAQ System
# Smart FAQ retrieval using TF-IDF and cosine similarity
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
class CodeBasicsFAQ:
def __init__(self, csv_path='codebasics_faqs.csv'):
"""Initialize FAQ system from CSV file"""
# Load FAQ data
encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']
df = None
for encoding in encodings:
try:
df = pd.read_csv(csv_path, encoding=encoding)
print(f"βœ… Loaded {len(df)} FAQs")
break
except:
continue
if df is None:
raise Exception("Could not load FAQ CSV")
self.questions = df['prompt'].tolist()
self.answers = df['response'].tolist()
# Create TF-IDF vectorizer
self.vectorizer = TfidfVectorizer(
lowercase=True,
stop_words='english',
ngram_range=(1, 2)
)
# Fit on all questions
self.question_vectors = self.vectorizer.fit_transform(self.questions)
print(f"βœ… FAQ System ready!")
def find_best_match(self, query, threshold=0.2):
"""Find best matching FAQ"""
query_vector = self.vectorizer.transform([query])
similarities = cosine_similarity(query_vector, self.question_vectors)[0]
best_idx = np.argmax(similarities)
best_score = similarities[best_idx]
if best_score >= threshold:
return {
'question': self.questions[best_idx],
'answer': self.answers[best_idx],
'confidence': best_score
}
return None
def answer(self, query):
"""Get answer for a query"""
result = self.find_best_match(query)
if result:
return {
'status': 'success',
'confidence': f"{result['confidence']*100:.1f}%",
'matched_question': result['question'],
'answer': result['answer']
}
else:
return {
'status': 'no_match',
'message': 'No matching FAQ found. Try rephrasing your question.'
}
def search_keyword(self, keyword):
"""Search FAQs by keyword"""
keyword_lower = keyword.lower()
matches = []
for i, q in enumerate(self.questions):
if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower():
matches.append({
'question': q,
'answer': self.answers[i]
})
return matches
def list_all_questions(self):
"""Return all FAQ questions"""
return self.questions
# ============================================================================
# USAGE EXAMPLE
# ============================================================================
if __name__ == "__main__":
# Initialize
faq = CodeBasicsFAQ('codebasics_faqs.csv')
# Example questions
test_questions = [
"Can I take this bootcamp without programming experience?",
"Why should I trust Codebasics?",
"What are the prerequisites?",
"Do I need a laptop?"
]
print("\n" + "="*70)
print("TESTING FAQ SYSTEM")
print("="*70 + "\n")
for question in test_questions:
print(f"❓ {question}")
result = faq.answer(question)
if result['status'] == 'success':
print(f"βœ… Match: {result['confidence']}")
print(f"πŸ“ Q: {result['matched_question']}")
print(f"πŸ’‘ A: {result['answer'][:100]}...\n")
else:
print(f"❌ {result['message']}\n")
# Interactive mode
print("\n" + "="*70)
print("INTERACTIVE MODE")
print("="*70)
print("Type 'quit' to exit\n")
while True:
user_q = input("❓ Your question: ").strip()
if user_q.lower() in ['quit', 'exit', 'q']:
print("πŸ‘‹ Goodbye!")
break
if not user_q:
continue
result = faq.answer(user_q)
if result['status'] == 'success':
print(f"\n[Confidence: {result['confidence']}]")
print(f"\nπŸ“Œ {result['matched_question']}")
print(f"\n✨ {result['answer']}\n")
else:
print(f"\n❌ {result['message']}\n")