zineb36 commited on
Commit
9b0f954
·
verified ·
1 Parent(s): 623abf6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+ import gradio as gr
5
+
6
+ # Step 1: Collect FAQs -
7
+ faqs = {
8
+ "What is CodeAlpha?": "CodeAlpha is a tech platform offering free virtual internships in AI, Web Development, and Data Science to help students gain practical experience.",
9
+ "How long is CodeAlpha internship?": "CodeAlpha internships last 1 month. You must complete minimum 2 out of 4 tasks to receive the completion certificate and Letter of Recommendation.",
10
+ "When is CodeAlpha submission deadline?": "Submission window: 10 May 2026 to 10 June 2026. Last date: 10 June 2026. Certificates issued: 11 June 2026.",
11
+ "Is CodeAlpha certificate valuable?": "Yes, CodeAlpha certificates are recognized and the LOR helps in job applications. It's a verified virtual internship experience.",
12
+ "How to contact CodeAlpha?": "Website: www.codealpha.tech | WhatsApp: +91 9336576683 | Tasks are shared via WhatsApp group.",
13
+ "What is TF-IDF?": "TF-IDF stands for Term Frequency-Inverse Document Frequency. It's an NLP technique used to convert text to numerical vectors for similarity matching.",
14
+ "What is cosine similarity?": "Cosine similarity measures the cosine of angle between two vectors. Used in NLP to find how similar two text documents are. Score from 0 to 1.",
15
+ "What is NLTK used for?": "NLTK is Natural Language Toolkit. Used for text preprocessing: tokenization, stemming, removing stopwords, and cleaning text data."
16
+ }
17
+
18
+ questions = list(faqs.keys())
19
+ answers = list(faqs.values())
20
+
21
+ # Step 2: Preprocess using TF-IDF -// tokenize + clean automatically
22
+ vectorizer = TfidfVectorizer(stop_words='english')
23
+ tfidf_matrix = vectorizer.fit_transform(questions)
24
+
25
+ # Step 3: Match user questions using cosine similarity
26
+ def chatbot_response(message, history):
27
+ if not message.strip():
28
+ return "Please ask me a question about CodeAlpha or NLP concepts!"
29
+
30
+ user_vec = vectorizer.transform([message])
31
+ similarity = cosine_similarity(user_vec, tfidf_matrix)
32
+ idx = np.argmax(similarity)
33
+ confidence = similarity[0][idx]
34
+
35
+ # Step 4: Display best matching answer
36
+ if confidence > 0.3:
37
+ return f"🤖 {answers[idx]}\n\nMatch Score: {confidence:.0%}"
38
+ else:
39
+ return "🤖 I don't have info on that. Try asking about CodeAlpha internship, TF-IDF, or cosine similarity."
40
+
41
+ # Step 5: Create simple chat UI
42
+ demo = gr.ChatInterface(
43
+ fn=chatbot_response,
44
+ title="CodeAlpha FAQ Chatbot - Task 2",
45
+ description="Ask me about CodeAlpha internship or NLP concepts! Uses TF-IDF + Cosine Similarity",
46
+ examples=["What is CodeAlpha?", "When is submission deadline?", "What is TF-IDF?"],
47
+ theme=gr.themes.Soft()
48
+ )
49
+
50
+ demo.launch(share=True)