File size: 8,030 Bytes
00c9605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a16b513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00c9605
 
a16b513
00c9605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9750d0
00c9605
 
a16b513
00c9605
 
 
 
 
 
 
 
 
 
 
 
 
b3cbca4
00c9605
 
 
e13afc2
448d1e3
cdf90e7
 
448d1e3
 
37efbb7
 
 
 
 
448d1e3
37efbb7
cdf90e7
 
e13afc2
448d1e3
a16b513
6f7517a
 
e13afc2
 
 
 
 
 
 
 
 
6f7517a
 
37efbb7
 
 
 
 
 
 
 
 
 
448d1e3
2e06daf
448d1e3
 
37efbb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f7517a
 
448d1e3
6f7517a
 
2e06daf
37efbb7
5f37ae1
 
60d8889
 
448d1e3
5f37ae1
 
 
 
 
60d8889
 
5f37ae1
 
 
e4414d4
 
2e06daf
5f37ae1
 
 
e4414d4
 
448d1e3
 
60d8889
00c9605
448d1e3
 
 
 
 
37efbb7
 
 
 
 
 
00c9605
 
 
 
 
 
 
a16b513
00c9605
 
b3cbca4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
"""
Vrushket AI Assistant - Portfolio Chatbot
A RAG-powered chatbot that answers questions about Vrushket More
"""

import os
import gradio as gr
from groq import Groq
import chromadb
from chromadb.utils import embedding_functions
from pathlib import Path

# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Initialize ChromaDB with sentence transformers
EMBED_MODEL = "all-MiniLM-L6-v2"
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBED_MODEL)

# Create ChromaDB collection
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(
    name="vrushket_knowledge",
    embedding_function=embedding_func,
    metadata={"hnsw:space": "cosine"}
)

# System prompt for the chatbot
SYSTEM_PROMPT = """
You are Vrushket's AI assistant on his portfolio website. Be helpful, friendly, and CONCISE.

RESPONSE GUIDELINES:
- Keep responses to 2-4 sentences for simple questions
- Use bullet points for lists (max 4-5 items)
- Be warm but professional - avoid excessive enthusiasm or exclamation marks
- Answer what's asked directly, don't over-explain
- If asked about opportunities: Yes, actively looking! Suggest reaching out via email.

CONTACT INFO (share when relevant):
- Email: vmore2@binghamton.edu  
- LinkedIn: linkedin.com/in/vrushketmore
- GitHub: github.com/vmore2

You represent Vrushket - a passionate AI/ML engineer. Stay authentic but brief.
"""


def load_knowledge_base():
    """Load all markdown files from knowledge_base folder into ChromaDB"""
    knowledge_dir = Path(__file__).parent / "knowledge_base"
    
    documents = []
    metadatas = []
    ids = []
    
    doc_id = 0
    for md_file in knowledge_dir.glob("*.md"):
        content = md_file.read_text(encoding="utf-8")
        
        # Split into chunks (by sections)
        chunks = []
        current_chunk = ""
        
        for line in content.split("\n"):
            if line.startswith("## ") and current_chunk:
                chunks.append(current_chunk.strip())
                current_chunk = line + "\n"
            else:
                current_chunk += line + "\n"
        
        if current_chunk:
            chunks.append(current_chunk.strip())
        
        # Add chunks to collection
        for chunk in chunks:
            if len(chunk) > 50:  # Skip very short chunks
                documents.append(chunk)
                metadatas.append({"source": md_file.name})
                ids.append(f"doc_{doc_id}")
                doc_id += 1
    
    # Add to ChromaDB
    if documents:
        collection.add(
            documents=documents,
            metadatas=metadatas,
            ids=ids
        )
    
    return len(documents)

def retrieve_context(query: str, n_results: int = 5) -> str:
    """Retrieve relevant context from knowledge base"""
    results = collection.query(
        query_texts=[query],
        n_results=n_results
    )
    
    if results and results['documents']:
        context_parts = results['documents'][0]
        return "\n\n---\n\n".join(context_parts)
    return ""

def chat(message: str, history: list) -> str:
    """Main chat function with RAG"""
    
    # Retrieve relevant context
    context = retrieve_context(message)
    
    # Build messages for Groq
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "system", "content": f"CONTEXT FROM KNOWLEDGE BASE:\n\n{context}"}
    ]
    
    # Add conversation history
    for human, assistant in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": assistant})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Call Groq API
    try:
        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=messages,
            temperature=0.7,
            max_tokens=350,
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"I apologize, but I'm having trouble responding right now. Please try again or contact Vrushket directly at vmore2@binghamton.edu. Error: {str(e)}"

# Load knowledge base on startup
print("Loading knowledge base...")
num_docs = load_knowledge_base()
print(f"Loaded {num_docs} document chunks into knowledge base")

# Create Gradio interface
TITLE = "💬 Chat with Vrushket's AI"
DESCRIPTION = """
Hey! 👋 Ask me anything about Vrushket's skills, projects, or experience!
"""

CUSTOM_CSS = """
/* DARK BACKGROUND EVERYWHERE */
:root, *, body, .gradio-container, .wrap, .main, .contain {
    --body-background-fill: #0a0f1a !important;
    --background-fill-primary: #0a0f1a !important;
    --background-fill-secondary: #0a0f1a !important;
    --block-background-fill: #0a0f1a !important;
    --neutral-50: #ffffff !important;
    --neutral-100: #f1f5f9 !important;
    --neutral-200: #e2e8f0 !important;
    --body-text-color: #ffffff !important;
    --body-text-color-subdued: #cbd5e1 !important;
    background-color: #0a0f1a !important;
    color: #ffffff !important;
}

/* OUTER MESSAGE CONTAINER - NO BORDER */
.message, .message.bot, .message.user, [data-testid="bot"], [data-testid="user"] {
    background: transparent !important;
    border: none !important;
    box-shadow: none !important;
    padding: 0 !important;
}

/* INNER BUBBLE - THE DARK BOX WE KEEP */
.message-bubble-border, [class*="bubble-border"] {
    background: #1e293b !important;
    border: 1px solid #334155 !important;
    border-radius: 12px !important;
    padding: 12px 16px !important;
}

/* TEXT - WHITE - COMPREHENSIVE COVERAGE */
.message p, .message span, .message div, 
.prose p, .prose span, .prose div,
.markdown, .markdown p, .markdown span, .markdown div,
.chatbot p, .chatbot span, .chatbot div,
[class*="message"] p, [class*="message"] span, [class*="message"] div,
[class*="bot"] p, [class*="bot"] span, [class*="bot"] div,
[class*="user"] p, [class*="user"] span, [class*="user"] div,
.message-bubble-border p, .message-bubble-border span, .message-bubble-border div,
[class*="bubble"] p, [class*="bubble"] span, [class*="bubble"] div {
    color: #ffffff !important;
    font-size: 15px !important;
    line-height: 1.5 !important;
    background: transparent !important;
    -webkit-text-fill-color: #ffffff !important;
}

/* FORCE ALL TEXT WHITE IN CHAT */
.chatbot, .chatbot *, [class*="chatbot"], [class*="chatbot"] *,
.prose, .prose *, .markdown, .markdown * {
    color: #ffffff !important;
    -webkit-text-fill-color: #ffffff !important;
}

/* BOT MESSAGE SPECIFIC */
[data-testid="bot"] *, .bot *, .message.bot * {
    color: #ffffff !important;
    -webkit-text-fill-color: #ffffff !important;
}

/* USER MESSAGE SPECIFIC */
[data-testid="user"] *, .user *, .message.user * {
    color: #ffffff !important;
    -webkit-text-fill-color: #ffffff !important;
}

/* INPUT */
textarea, input, [class*="textbox"] {
    background: #1e293b !important;
    color: #f1f5f9 !important;
    -webkit-text-fill-color: #f1f5f9 !important;
    border: 1px solid #334155 !important;
    border-radius: 12px !important;
}

/* BUTTONS */
button {
    background: #1e293b !important;
    color: #94a3b8 !important;
    border: 1px solid #334155 !important;
    border-radius: 8px !important;
}

button:hover {
    background: #334155 !important;
    color: #f1f5f9 !important;
}

button.primary, [class*="primary"] {
    background: #0891b2 !important;
    color: white !important;
    border: none !important;
}

/* HIDE JUNK */
footer, .built-with, .label-wrap, label span, .avatar-container, .avatar {
    display: none !important;
}

/* CHATBOT AREA */
.chatbot, [class*="chatbot"] {
    background: #0a0f1a !important;
}

/* LINKS */
a, .prose a, .markdown a {
    color: #38bdf8 !important;
    -webkit-text-fill-color: #38bdf8 !important;
}
"""

demo = gr.ChatInterface(
    fn=chat,
    css=CUSTOM_CSS,
)


if __name__ == "__main__":
    demo.launch()