afouda commited on
Commit
8d433bb
·
verified ·
1 Parent(s): dac8618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +271 -171
app.py CHANGED
@@ -1,177 +1,277 @@
1
- import json
2
- import weaviate
3
- import fitz
4
- import docx
5
  import os
 
 
 
 
 
6
  import gradio as gr
7
  from openai import OpenAI
8
- from weaviate.classes.init import Auth
9
- from weaviate.classes.config import Property, DataType
10
- from sklearn.metrics.pairwise import cosine_similarity
11
-
12
- # --- Config ---
13
- WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
14
- WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "YOUR_KEY")
15
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_KEY")
16
-
17
- # --- Clients ---
18
- client = weaviate.WeaviateClient(
19
- url=WEAVIATE_URL,
20
- auth_client_secret=Auth.api_key(WEAVIATE_API_KEY),
 
 
 
 
21
  )
22
- openai_client = OpenAI(api_key=OPENAI_API_KEY)
23
-
24
- # --- Ensure Collections ---
25
- def ensure_collections():
26
- collections = {
27
- "Job": [Property(name="title", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
28
- "Application": [Property(name="name", data_type=DataType.TEXT), Property(name="email", data_type=DataType.TEXT)],
29
- "Memory": [Property(name="content", data_type=DataType.TEXT)],
30
- "Opportunities": [Property(name="details", data_type=DataType.TEXT)],
31
- "Project": [Property(name="name", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
32
- "Team": [Property(name="member", data_type=DataType.TEXT), Property(name="role", data_type=DataType.TEXT)],
33
- }
34
- for cname, props in collections.items():
35
- if not client.collections.exists(cname):
36
- client.collections.create(name=cname, properties=props)
37
-
38
- ensure_collections()
39
-
40
- # --- Embeddings ---
41
- def get_embedding(text):
42
- resp = openai_client.embeddings.create(input=text, model="text-embedding-3-small")
43
- return resp.data[0].embedding
44
-
45
- def recommend_jobs_by_embedding(cv_text, jobs, top_n=3):
46
- cv_embedding = get_embedding(cv_text)
47
- job_embeddings = [get_embedding(j["description"]) for j in jobs]
48
- sims = cosine_similarity([cv_embedding], job_embeddings)[0]
49
- ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
50
- return [job for job, _ in ranked[:top_n]]
51
-
52
- # --- File Upload Handling ---
53
- def process_uploaded_file(file_path):
54
- ext = os.path.splitext(file_path)[1].lower()
55
- text = ""
56
- if ext == ".pdf":
57
- with fitz.open(file_path) as pdf:
58
- for page in pdf:
59
- text += page.get_text()
60
- elif ext == ".docx":
61
- doc = docx.Document(file_path)
62
- for para in doc.paragraphs:
63
- text += para.text + "\n"
64
- elif ext == ".txt":
65
- with open(file_path, "r", encoding="utf-8") as f:
66
- text = f.read()
67
- return text.strip()
68
-
69
- # --- Session Management ---
70
- def initial_session():
71
- return {"state": "idle", "data": {}, "history": []}
72
-
73
- def handle_uploaded_cv_for_session(session, file_path):
74
- text = process_uploaded_file(file_path)
75
- session["data"]["cv_text"] = text
76
- return session
77
-
78
- # --- KB ---
79
- KB_RESPONSES = {
80
- "student_registration": "You can register as a student on the portal...",
81
- "student_internships": "Internships are listed under opportunities section..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
 
84
- # --- RAG Query ---
85
- def rag_query(collection, query_text):
86
- query_embedding = get_embedding(query_text)
87
- results = client.query.get(collection, ["*"]).with_near_vector({"vector": query_embedding}).with_limit(3).do()
88
- return results
89
-
90
- # --- LLM Chat ---
91
- def llm_chat(prompt):
92
- resp = openai_client.chat.completions.create(
93
- model="gpt-4o-mini",
94
- messages=[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": prompt}],
95
- )
96
- return resp.choices[0].message.content
97
-
98
- # --- Flows ---
99
- def apply_flow(session, message):
100
- state = session["state"]
101
- if state == "apply_name":
102
- session["data"]["name"] = message
103
- session["state"] = "apply_email"
104
- return "Please provide your email.", session
105
- elif state == "apply_email":
106
- session["data"]["email"] = message
107
- session["state"] = "apply_cover"
108
- return "Please provide your cover letter.", session
109
- elif state == "apply_cover":
110
- session["data"]["cover"] = message
111
- session["state"] = "idle"
112
- return "Your application has been recorded.", session
113
- return "Let's start your application. What's your name?", {"state": "apply_name", "data": {}}
114
-
115
- def team_flow(session, message):
116
- return "Team flow triggered. Add member info.", session
117
-
118
- def recommend_flow(session, message):
119
- if "cv_text" in session["data"]:
120
- jobs = [{"title": "AI Intern", "description": "Work on NLP"}, {"title": "ML Engineer", "description": "Build models"}]
121
- recs = recommend_jobs_by_embedding(session["data"]["cv_text"], jobs)
122
- return f"Recommended jobs: {[j['title'] for j in recs]}", session
123
- return "Please upload your CV first.", session
124
-
125
- # --- Main Handler ---
126
- def handle_user_message(session, message):
127
- lower = message.lower()
128
-
129
- # KB check
130
- for key, answer in KB_RESPONSES.items():
131
- if key in lower:
132
- return answer, session
133
-
134
- # RAG check
135
- for collection in ["Job", "Application", "Memory", "Opportunities", "Project", "Team"]:
136
- if collection.lower() in lower:
137
- results = rag_query(collection, message)
138
- return f"RAG Results from {collection}: {json.dumps(results, indent=2)}", session
139
-
140
- # Flow triggers
141
- if "apply" in lower:
142
- return apply_flow(session, message)
143
- if "team" in lower:
144
- return team_flow(session, message)
145
- if "recommend" in lower:
146
- return recommend_flow(session, message)
147
-
148
- # Default LLM
149
- return llm_chat(message), session
150
-
151
- # --- Gradio App ---
152
- session = initial_session()
153
-
154
- def chat_with_bot(message, file=None):
155
- global session
156
- if file is not None:
157
- session = handle_uploaded_cv_for_session(session, file.name)
158
- return "CV uploaded successfully!"
159
- reply, session = handle_user_message(session, message)
160
- return reply
161
-
162
- with gr.Blocks(title="Edunatives Chatbot") as demo:
163
- gr.Markdown("# 🎓 Edunatives Chatbot")
164
- chatbot = gr.Chatbot()
165
- msg = gr.Textbox(placeholder="Type your message here...")
166
- file_upload = gr.File(label="Upload CV (PDF/DOCX/TXT)")
167
- clear = gr.Button("Clear Chat")
168
-
169
- def respond(message, history, file):
170
- response = chat_with_bot(message, file)
171
- history.append((message, response))
172
- return history, ""
173
-
174
- msg.submit(respond, [msg, chatbot, file_upload], [chatbot, msg])
175
- clear.click(lambda: ([], ""), None, [chatbot, msg])
176
-
177
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from __future__ import annotations
 
 
3
  import os
4
+ import json
5
+ import time
6
+ from dataclasses import dataclass
7
+ from typing import List, Dict, Any
8
+ import markdown
9
  import gradio as gr
10
  from openai import OpenAI
11
+
12
+ # --- 1. BACKEND LOGIC & CONFIG (from your first script) ---
13
+
14
+ # --- LLM Configuration ---
15
+ MODEL_NAME = "openai/gpt-oss-120b"
16
+ DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa" # Replace with your key if needed
17
+ BASE_URL = "https://api.deepinfra.com/v1/openai"
18
+
19
+ if not DEEPINFRA_API_KEY:
20
+ print("[WARN] DEEPINFRA_API_KEY is not set. The chatbot will likely fail.")
21
+
22
+ client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
23
+
24
+ # --- Language & Routing Configuration ---
25
+ ARABIC_RANGE = (
26
+ (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
27
+ (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
28
  )
29
+
30
+ @dataclass
31
+ class Route:
32
+ audience: str
33
+ intent: str
34
+ language: str
35
+
36
+ KB: Dict[str, Dict[str, str]] = {
37
+ "student_registration": {
38
+ "en": (
39
+ "**How to register / create an account (Student)**\n\n"
40
+ "1. Go to the EduNatives site and choose Sign Up.\n"
41
+ "2. Use your university email if possible and verify it.\n"
42
+ "3. Complete your profile (major, skills, interests).\n"
43
+ "4. Enable notifications for internships/scholarships."
44
+ ),
45
+ "ar": (
46
+ "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
47
+ "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
48
+ "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
49
+ "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
50
+ "٤. فعّل التنبيهات لفرص التدريب والمنح."
51
+ ),
52
+ },
53
+ "student_internships": {
54
+ "en": (
55
+ "**Finding internships & scholarships**\n\n"
56
+ "- Use the search filters: field, location, duration, paid/unpaid.\n"
57
+ "- Follow companies and set up alerts for new opportunities.\n"
58
+ "- Keep your profile and resume updated."
59
+ ),
60
+ "ar": (
61
+ "**كيفية العثور على تدريب أو منحة**\n\n"
62
+ "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
63
+ "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
64
+ "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
65
+ ),
66
+ },
67
+ }
68
+
69
+ KEYS = {
70
+ "student_registration": [
71
+ "register", "sign up", "signup", "create account", "account",
72
+ "تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"
73
+ ],
74
+ "student_internships": [
75
+ "intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity",
76
+ "تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"
77
+ ],
78
+ "student_mentors": [
79
+ "mentor", "advisor", "professor", "supervisor", "faculty", "connect",
80
+ "منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"
81
+ ],
82
+ "university_publish": [
83
+ "publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar",
84
+ "نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"
85
+ ],
86
+ "university_connect": [
87
+ "students", "connect with students", "reach students", "collaborate",
88
+ "طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"
89
+ ],
90
+ "company_post_jobs": [
91
+ "job", "jobs", "post job", "hiring", "hire", "internships", "graduate",
92
+ "وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"
93
+ ],
94
+ "company_find_talent": [
95
+ "talent", "candidate", "recruit", "search", "find", "pipeline",
96
+ "موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"
97
+ ],
98
+ }
99
+
100
+ AUDIENCE_MAP = {
101
+ "student_registration": "student",
102
+ "student_internships": "student",
103
+ "student_mentors": "student",
104
+ "university_publish": "university",
105
+ "university_connect": "university",
106
+ "company_post_jobs": "company",
107
+ "company_find_talent": "company",
108
  }
109
 
110
+ SYSTEM_PROMPT_BASE = (
111
+ "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
112
+ "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
113
+ "Goals by audience:\n"
114
+ "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors.\n"
115
+ "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
116
+ "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
117
+ "General rules:\n"
118
+ "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
119
+ "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
120
+ "- If information is unavailable, state that clearly and suggest the next best step.\n"
121
+ )
122
+ CONTEXT_INJECT_TEMPLATE = (
123
+ "Context to guide your answer (do not repeat verbatim):\n"
124
+ "- Audience: {audience}\n- Intent: {intent}\n- Extra hints: Keep it practical for this audience."
125
+ )
126
+
127
+ # --- Core Functions ---
128
+ def is_arabic(text: str) -> bool:
129
+ for ch in text:
130
+ code = ord(ch)
131
+ for a, b in ARABIC_RANGE:
132
+ if a <= code <= b:
133
+ return True
134
+ return False
135
+
136
+ def route_intent(text: str, forced_audience: str | None = None) -> Route:
137
+ lang = "ar" if is_arabic(text) else "en"
138
+ match_label = None
139
+ text_l = text.lower()
140
+ for label, kws in KEYS.items():
141
+ for kw in kws:
142
+ if kw in text_l:
143
+ match_label = label
144
+ break
145
+ if match_label:
146
+ break
147
+
148
+ if match_label is None:
149
+ audience = forced_audience if forced_audience else "general"
150
+ return Route(audience=audience, intent="general", language=lang)
151
+
152
+ audience = AUDIENCE_MAP.get(match_label, "general")
153
+ if forced_audience and forced_audience in {"student", "university", "company"}:
154
+ audience = forced_audience
155
+
156
+ return Route(audience=audience, intent=match_label, language=lang)
157
+
158
+ def call_llm(user_message: str, history: List[Dict[str, str]], route: Route) -> str:
159
+ messages: List[Dict[str, str]] = [
160
+ {"role": "system", "content": SYSTEM_PROMPT_BASE},
161
+ {"role": "system", "content": CONTEXT_INJECT_TEMPLATE.format(audience=route.audience, intent=route.intent)},
162
+ ]
163
+ MAX_TURNS = 3
164
+ trimmed_history = history[-MAX_TURNS*2:]
165
+ messages.extend(trimmed_history)
166
+ messages.append({"role": "user", "content": user_message})
167
+
168
+ try:
169
+ resp = client.chat.completions.create(
170
+ model=MODEL_NAME,
171
+ messages=messages,
172
+ temperature=0.6,
173
+ top_p=0.9,
174
+ max_tokens=800,
175
+ )
176
+ return resp.choices[0].message.content or ""
177
+ except Exception as e:
178
+ print(f"[ERROR] LLM call failed: {e}")
179
+ return ""
180
+
181
+ def kb_fallback(route: Route) -> str:
182
+ if route.intent in KB:
183
+ block = KB[route.intent]
184
+ return block.get(route.language, block.get("en", ""))
185
+ if route.language == "ar":
186
+ return (
187
+ "عذراً، لم أجد معلومات كافية. يرجى توضيح طلبك أو السؤال عن أحد المواضيع التالية: "
188
+ "(تسجيل، تدريب/منح، مشرفين، نشر أبحاث، وظائف)."
189
+ )
190
+ else:
191
+ return (
192
+ "I couldn't find enough info. Please clarify your request or ask about "
193
+ "registration, internships/scholarships, mentors, publishing research, or jobs."
194
+ )
195
+
196
+
197
+
198
+ with gr.Blocks(css="""
199
+ .chatbot {height: 500px; overflow: auto;}
200
+ .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
201
+ .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
202
+ .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
203
+ /* Basic styling for tables inside the bot bubble */
204
+ .bot-bubble table {border-collapse: collapse; width: 100%;}
205
+ .bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
206
+ .bot-bubble th {background-color: #e9e9e9;}
207
+ """) as demo:
208
+
209
+ gr.Markdown("# 🤖 EduNatives Assistant\nYour smart, bilingual guide for academic and career opportunities.")
210
+
211
+ with gr.Row():
212
+ audience_dd = gr.Dropdown(
213
+ label="Audience",
214
+ choices=["Auto", "Student", "University-Research", "Company"],
215
+ value="Auto",
216
+ interactive=True,
217
+ info="Select your role. 'Auto' detects it from your message."
218
+ )
219
+ clear_btn = gr.Button("🧹 Clear Chat")
220
+
221
+ status = gr.Markdown("Status: Ready.")
222
+ chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
223
+
224
+ chat_history_state = gr.State([])
225
+
226
+ with gr.Row(elem_classes="chatbox-container"):
227
+ msg = gr.Textbox(
228
+ placeholder="اكتب سؤالك هنا... / Ask your question here...",
229
+ lines=2,
230
+ scale=4,
231
+ autofocus=True,
232
+ )
233
+ send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
234
+
235
+ def format_chat_html(history: List[Dict[str, str]]) -> str:
236
+ html = "<div class='chatbot'>"
237
+ for message in history:
238
+ role = message["role"]
239
+ content = message["content"]
240
+ if role == "user":
241
+ # User messages are plain text, no need to render markdown
242
+ html += f"<div class='user-bubble'>{content}</div>"
243
+ elif role == "assistant":
244
+ # Convert bot's markdown response to HTML
245
+ html_content = markdown.markdown(content, extensions=['tables'])
246
+ html += f"<div class='bot-bubble'>{html_content}</div>"
247
+ html += "</div>"
248
+ return html
249
+
250
+ # The 'respond' and 'clear_chat' functions remain exactly the same
251
+ def respond(user_text: str, history: List[Dict[str, str]], audience_choice: str):
252
+ if not user_text.strip():
253
+ return "", format_chat_html(history), history, "Status: Please type a message."
254
+ forced = {
255
+ "Student": "student",
256
+ "University-Research": "university",
257
+ "Company": "company"
258
+ }.get(audience_choice)
259
+ route = route_intent(user_text, forced_audience=forced)
260
+ status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
261
+ answer = call_llm(user_text, history, route)
262
+ if not answer:
263
+ answer = kb_fallback(route)
264
+ history.append({"role": "user", "content": user_text})
265
+ history.append({"role": "assistant", "content": answer})
266
+ updated_html = format_chat_html(history)
267
+ return "", updated_html, history, status_text
268
+
269
+ def clear_chat():
270
+ return "", [], "Status: Ready."
271
+
272
+ send_btn.click(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
273
+ msg.submit(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
274
+ clear_btn.click(clear_chat, outputs=[chatbot_html, chat_history_state, status], queue=False)
275
+
276
+ if __name__ == "__main__":
277
+ demo.launch()