Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 3 |
+
import numpy as np
|
| 4 |
+
import gradio as gr
|
| 5 |
+
|
| 6 |
+
# Step 1: Collect FAQs -
|
| 7 |
+
faqs = {
|
| 8 |
+
"What is CodeAlpha?": "CodeAlpha is a tech platform offering free virtual internships in AI, Web Development, and Data Science to help students gain practical experience.",
|
| 9 |
+
"How long is CodeAlpha internship?": "CodeAlpha internships last 1 month. You must complete minimum 2 out of 4 tasks to receive the completion certificate and Letter of Recommendation.",
|
| 10 |
+
"When is CodeAlpha submission deadline?": "Submission window: 10 May 2026 to 10 June 2026. Last date: 10 June 2026. Certificates issued: 11 June 2026.",
|
| 11 |
+
"Is CodeAlpha certificate valuable?": "Yes, CodeAlpha certificates are recognized and the LOR helps in job applications. It's a verified virtual internship experience.",
|
| 12 |
+
"How to contact CodeAlpha?": "Website: www.codealpha.tech | WhatsApp: +91 9336576683 | Tasks are shared via WhatsApp group.",
|
| 13 |
+
"What is TF-IDF?": "TF-IDF stands for Term Frequency-Inverse Document Frequency. It's an NLP technique used to convert text to numerical vectors for similarity matching.",
|
| 14 |
+
"What is cosine similarity?": "Cosine similarity measures the cosine of angle between two vectors. Used in NLP to find how similar two text documents are. Score from 0 to 1.",
|
| 15 |
+
"What is NLTK used for?": "NLTK is Natural Language Toolkit. Used for text preprocessing: tokenization, stemming, removing stopwords, and cleaning text data."
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
questions = list(faqs.keys())
|
| 19 |
+
answers = list(faqs.values())
|
| 20 |
+
|
| 21 |
+
# Step 2: Preprocess using TF-IDF -// tokenize + clean automatically
|
| 22 |
+
vectorizer = TfidfVectorizer(stop_words='english')
|
| 23 |
+
tfidf_matrix = vectorizer.fit_transform(questions)
|
| 24 |
+
|
| 25 |
+
# Step 3: Match user questions using cosine similarity
|
| 26 |
+
def chatbot_response(message, history):
|
| 27 |
+
if not message.strip():
|
| 28 |
+
return "Please ask me a question about CodeAlpha or NLP concepts!"
|
| 29 |
+
|
| 30 |
+
user_vec = vectorizer.transform([message])
|
| 31 |
+
similarity = cosine_similarity(user_vec, tfidf_matrix)
|
| 32 |
+
idx = np.argmax(similarity)
|
| 33 |
+
confidence = similarity[0][idx]
|
| 34 |
+
|
| 35 |
+
# Step 4: Display best matching answer
|
| 36 |
+
if confidence > 0.3:
|
| 37 |
+
return f"🤖 {answers[idx]}\n\nMatch Score: {confidence:.0%}"
|
| 38 |
+
else:
|
| 39 |
+
return "🤖 I don't have info on that. Try asking about CodeAlpha internship, TF-IDF, or cosine similarity."
|
| 40 |
+
|
| 41 |
+
# Step 5: Create simple chat UI
|
| 42 |
+
demo = gr.ChatInterface(
|
| 43 |
+
fn=chatbot_response,
|
| 44 |
+
title="CodeAlpha FAQ Chatbot - Task 2",
|
| 45 |
+
description="Ask me about CodeAlpha internship or NLP concepts! Uses TF-IDF + Cosine Similarity",
|
| 46 |
+
examples=["What is CodeAlpha?", "When is submission deadline?", "What is TF-IDF?"],
|
| 47 |
+
theme=gr.themes.Soft()
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
demo.launch(share=True)
|