|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import numpy as np |
|
|
|
|
|
class CodeBasicsFAQ: |
|
|
def __init__(self, csv_path='codebasics_faqs.csv'): |
|
|
"""Initialize FAQ system from CSV file""" |
|
|
|
|
|
encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252'] |
|
|
df = None |
|
|
|
|
|
for encoding in encodings: |
|
|
try: |
|
|
df = pd.read_csv(csv_path, encoding=encoding) |
|
|
print(f"β
Loaded {len(df)} FAQs") |
|
|
break |
|
|
except: |
|
|
continue |
|
|
|
|
|
if df is None: |
|
|
raise Exception("Could not load FAQ CSV") |
|
|
|
|
|
self.questions = df['prompt'].tolist() |
|
|
self.answers = df['response'].tolist() |
|
|
|
|
|
|
|
|
self.vectorizer = TfidfVectorizer( |
|
|
lowercase=True, |
|
|
stop_words='english', |
|
|
ngram_range=(1, 2) |
|
|
) |
|
|
|
|
|
|
|
|
self.question_vectors = self.vectorizer.fit_transform(self.questions) |
|
|
print(f"β
FAQ System ready!") |
|
|
|
|
|
def find_best_match(self, query, threshold=0.2): |
|
|
"""Find best matching FAQ""" |
|
|
query_vector = self.vectorizer.transform([query]) |
|
|
similarities = cosine_similarity(query_vector, self.question_vectors)[0] |
|
|
|
|
|
best_idx = np.argmax(similarities) |
|
|
best_score = similarities[best_idx] |
|
|
|
|
|
if best_score >= threshold: |
|
|
return { |
|
|
'question': self.questions[best_idx], |
|
|
'answer': self.answers[best_idx], |
|
|
'confidence': best_score |
|
|
} |
|
|
return None |
|
|
|
|
|
def answer(self, query): |
|
|
"""Get answer for a query""" |
|
|
result = self.find_best_match(query) |
|
|
|
|
|
if result: |
|
|
return { |
|
|
'status': 'success', |
|
|
'confidence': f"{result['confidence']*100:.1f}%", |
|
|
'matched_question': result['question'], |
|
|
'answer': result['answer'] |
|
|
} |
|
|
else: |
|
|
return { |
|
|
'status': 'no_match', |
|
|
'message': 'No matching FAQ found. Try rephrasing your question.' |
|
|
} |
|
|
|
|
|
def search_keyword(self, keyword): |
|
|
"""Search FAQs by keyword""" |
|
|
keyword_lower = keyword.lower() |
|
|
matches = [] |
|
|
|
|
|
for i, q in enumerate(self.questions): |
|
|
if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower(): |
|
|
matches.append({ |
|
|
'question': q, |
|
|
'answer': self.answers[i] |
|
|
}) |
|
|
|
|
|
return matches |
|
|
|
|
|
def list_all_questions(self): |
|
|
"""Return all FAQ questions""" |
|
|
return self.questions |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
faq = CodeBasicsFAQ('codebasics_faqs.csv') |
|
|
|
|
|
|
|
|
test_questions = [ |
|
|
"Can I take this bootcamp without programming experience?", |
|
|
"Why should I trust Codebasics?", |
|
|
"What are the prerequisites?", |
|
|
"Do I need a laptop?" |
|
|
] |
|
|
|
|
|
print("\n" + "="*70) |
|
|
print("TESTING FAQ SYSTEM") |
|
|
print("="*70 + "\n") |
|
|
|
|
|
for question in test_questions: |
|
|
print(f"β {question}") |
|
|
result = faq.answer(question) |
|
|
|
|
|
if result['status'] == 'success': |
|
|
print(f"β
Match: {result['confidence']}") |
|
|
print(f"π Q: {result['matched_question']}") |
|
|
print(f"π‘ A: {result['answer'][:100]}...\n") |
|
|
else: |
|
|
print(f"β {result['message']}\n") |
|
|
|
|
|
|
|
|
print("\n" + "="*70) |
|
|
print("INTERACTIVE MODE") |
|
|
print("="*70) |
|
|
print("Type 'quit' to exit\n") |
|
|
|
|
|
while True: |
|
|
user_q = input("β Your question: ").strip() |
|
|
|
|
|
if user_q.lower() in ['quit', 'exit', 'q']: |
|
|
print("π Goodbye!") |
|
|
break |
|
|
|
|
|
if not user_q: |
|
|
continue |
|
|
|
|
|
result = faq.answer(user_q) |
|
|
|
|
|
if result['status'] == 'success': |
|
|
print(f"\n[Confidence: {result['confidence']}]") |
|
|
print(f"\nπ {result['matched_question']}") |
|
|
print(f"\n⨠{result['answer']}\n") |
|
|
else: |
|
|
print(f"\nβ {result['message']}\n") |
|
|
|