Fayza38 commited on
Commit
1bb94e9
ยท
verified ยท
1 Parent(s): 3115011

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. main.py +333 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ # ู†ุณุฎุฉ ู…ู† ุงู„ู…ุชุทู„ุจุงุช ูˆุชุซุจูŠุชู‡ุง
6
+ COPY ./requirements.txt /code/requirements.txt
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ # ู†ุณุฎุฉ ู…ู† ุจุงู‚ูŠ ู…ู„ูุงุช ุงู„ู…ุดุฑูˆุน
10
+ COPY . .
11
+
12
+ # ุชุดุบูŠู„ ุงู„ุณูŠุฑูุฑ ุนู„ู‰ ุจูˆุฑุช 7860 (ุฅุฌุจุงุฑูŠ ููŠ Hugging Face)
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========================================
2
+ # 1. IMPORTS
3
+ # =========================================
4
+ import asyncio
5
+ import os
6
+ import json
7
+ import uuid
8
+ import cloudinary
9
+ import cloudinary.uploader
10
+ import firebase_admin
11
+ from firebase_admin import credentials, firestore
12
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
13
+ from pydantic import BaseModel
14
+ from gradio_client import Client
15
+ from google.cloud.firestore_v1.base_query import FieldFilter
16
+ import edge_tts
17
+ from dotenv import load_dotenv
18
+
19
+ # =========================================
20
+ # 2. INITIALIZATIONS
21
+ # =========================================
22
+ if not firebase_admin._apps:
23
+ fb_json = os.getenv("FIREBASE_JSON")
24
+
25
+ if fb_json:
26
+ cred_dict = json.loads(fb_json)
27
+ cred = credentials.Certificate(cred_dict)
28
+ else:
29
+ cred = credentials.Certificate("serviceAccountKey.json")
30
+
31
+ firebase_admin.initialize_app(cred)
32
+
33
+ db = firestore.client()
34
+
35
+ # Load environment variables
36
+ load_dotenv()
37
+
38
+ # Cloudinary Configuration
39
+ cloudinary.config(
40
+ cloud_name=os.getenv("CLOUD_NAME"),
41
+ api_key=os.getenv("API_KEY"),
42
+ api_secret=os.getenv("API_SECRET"),
43
+ secure=True
44
+ )
45
+
46
+ app = FastAPI(title="AI Question Service")
47
+ HF_SPACE = "Fayza38/Question_and_answer_model"
48
+ client = None
49
+
50
+ # =========================================
51
+ # 3. MODELS & CONSTANTS
52
+ # =========================================
53
+ TECH_CATEGORIES = {0: "Security",
54
+ 1: "BackEnd",
55
+ 2: "Networking",
56
+ 3: "FrontEnd",
57
+ 4: "DataEngineering",
58
+ 5: "WebDevelopment",
59
+ 6: "FullStack",
60
+ 7: "VersionControl",
61
+ 8: "SystemDesign",
62
+ 9: "MachineLearning",
63
+ 10: "LanguagesAndFrameworks",
64
+ 11: "DatabaseSystems",
65
+ 12: "ArtificialIntelligence",
66
+ 13: "SoftwareTesting",
67
+ 14: "DistributedSystems",
68
+ 15: "DevOps",
69
+ 16: "LowLevelSystems",
70
+ 17: "DatabaseAndSql",
71
+ 18: "GeneralProgramming",
72
+ 19: "DataStructures",
73
+ 20: "Algorithms"}
74
+ DIFFICULTY_MAP = {0: "Easy", 1: "Intermediate", 2: "Hard"}
75
+ SESSION_TYPE_MAP = {0: "Technical", 1: "Behavioral"}
76
+
77
+ class GenerateSessionRequest(BaseModel):
78
+ sessionId: str
79
+ sessionType: int
80
+ difficultyLevel: int = 0
81
+ trackName: int
82
+
83
+ class CleanupRequest(BaseModel):
84
+ audioUrls: list[str]
85
+
86
+ # =========================================
87
+ # 4. STARTUP EVENT
88
+ # =========================================
89
+ @app.on_event("startup")
90
+ async def startup_event():
91
+ global client
92
+ max_retries = 5
93
+ retry_delay = 10
94
+ print("Connecting to Hugging Face Space...")
95
+ for i in range(max_retries):
96
+ try:
97
+ client = Client(HF_SPACE)
98
+ print("Connected Successfully!")
99
+ break
100
+ except Exception as e:
101
+ print(f"Connection attempt {i+1} failed. Retrying in {retry_delay}s...")
102
+ if i < max_retries - 1: await asyncio.sleep(retry_delay)
103
+
104
+ # =========================================
105
+ # 5. HELPERS
106
+ # =========================================
107
+ async def generate_audio(text, filename):
108
+ try:
109
+ # Rate is set to -10% to make the voice slightly slower and clearer
110
+ communicate = edge_tts.Communicate(text, "en-US-GuyNeural", rate="-15%")
111
+ await communicate.save(filename)
112
+
113
+ # Upload to Cloudinary
114
+ upload_result = cloudinary.uploader.upload(
115
+ filename,
116
+ resource_type="video",
117
+ folder="interview_audio"
118
+ )
119
+
120
+ if os.path.exists(filename): os.remove(filename)
121
+ return upload_result["secure_url"]
122
+ except Exception as e:
123
+ print(f"Audio Generation Error: {e}")
124
+ if os.path.exists(filename): os.remove(filename)
125
+ return None
126
+
127
+ async def safe_generate(prompt, retries=3):
128
+ if client is None: raise Exception("Gradio Client not initialized")
129
+ for attempt in range(retries):
130
+ try:
131
+ loop = asyncio.get_running_loop()
132
+ return await loop.run_in_executor(None, lambda: client.predict(prompt=prompt, api_name="/generate_questions"))
133
+ except Exception as e:
134
+ if attempt == retries - 1: raise e
135
+ await asyncio.sleep(2)
136
+
137
+ def parse_question_output(raw_output: str):
138
+ if not raw_output: return None, None
139
+ text = raw_output.split("assistant")[-1].strip() if "assistant" in raw_output else raw_output
140
+ if "Q:" in text and "A:" in text:
141
+ try:
142
+ parts = text.split("A:")
143
+ q = parts[0].replace("Q:", "").strip()
144
+ a = parts[1].split("<|im_end|>")[0].strip()
145
+ return q, a
146
+ except: return None, None
147
+ return None, None
148
+
149
+ # =========================================
150
+ # 6. REFILL & PREFILL LOGIC
151
+ # =========================================
152
+ async def refill_specific_pool(track_id: int, difficulty: int, count: int, session_type: int = 0):
153
+ global client
154
+ while client is None: await asyncio.sleep(5)
155
+
156
+ # Technical (0) vs Behavioral (1)
157
+ if session_type == 1:
158
+ prompt = "Generate ONE unique behavioral interview question (soft skills, situational). Format: Q: [Question] A: [Answer]"
159
+ track_text = "Behavioral"
160
+ level_text = "General"
161
+ else:
162
+ track_text = TECH_CATEGORIES.get(track_id)
163
+ level_text = DIFFICULTY_MAP.get(difficulty)
164
+ prompt = f"Generate ONE unique {track_text} interview question for {level_text} level. Format: Q: [Question] A: [Answer]"
165
+
166
+ success_count = 0
167
+ while success_count < count:
168
+ try:
169
+ raw_output = await safe_generate(prompt)
170
+ q_text, a_text = parse_question_output(raw_output)
171
+
172
+ if q_text and a_text:
173
+ filename = f"{uuid.uuid4()}.mp3"
174
+ audio_url = await generate_audio(q_text, filename)
175
+
176
+ if audio_url:
177
+ db.collection("questions_pool").add({
178
+ "session_type": session_type,
179
+ "track_id": track_id if session_type == 0 else -1,
180
+ "difficulty": difficulty if session_type == 0 else 0,
181
+ "questionText": q_text,
182
+ "questionIdealAnswer": a_text,
183
+ "audio_url": audio_url,
184
+ "created_at": firestore.SERVER_TIMESTAMP
185
+ })
186
+ success_count += 1
187
+ print(f"[{success_count}/{count}] Refilled: {track_text}")
188
+ await asyncio.sleep(2)
189
+ except Exception as e:
190
+ print(f"Error in refill: {e}")
191
+ await asyncio.sleep(5)
192
+
193
+ # =========================================
194
+ # 6. ENDPOINTS
195
+ # =========================================
196
+ @app.post("/generate-session")
197
+ async def generate_session(request: GenerateSessionRequest, background_tasks: BackgroundTasks):
198
+ t_id, diff = request.trackName, request.difficultyLevel
199
+ s_type = request.sessionType # 0: Technical, 1: Behavioral
200
+
201
+ # Query based on the new session types (0 or 1)
202
+ query = db.collection("questions_pool").where(filter=FieldFilter("session_type", "==", s_type))
203
+
204
+ if s_type == 0: # Technical
205
+ query = query.where(filter=FieldFilter("track_id", "==", t_id)) \
206
+ .where(filter=FieldFilter("difficulty", "==", diff))
207
+
208
+ docs_query = query.limit(10).get()
209
+
210
+ final_questions = []
211
+ for index, doc in enumerate(docs_query, start=1):
212
+ data = doc.to_dict()
213
+ final_questions.append({
214
+ "question_id": index,
215
+ "text": data["questionText"],
216
+ "expected_answer": data["questionIdealAnswer"],
217
+ "audio_url": data.get("audio_url", "")
218
+ })
219
+ # Delete after fetching to ensure questions are unique for next users
220
+ db.collection("questions_pool").document(doc.id).delete()
221
+
222
+ # Maintenance task to keep the pool full
223
+ async def maintain_stock():
224
+ agg_query = query.count()
225
+ current_count = agg_query.get()[0][0].value
226
+
227
+ target = 50
228
+ if current_count < target:
229
+ await refill_specific_pool(t_id, diff, target - current_count, session_type=s_type)
230
+
231
+ background_tasks.add_task(maintain_stock)
232
+
233
+ if not final_questions:
234
+ raise HTTPException(status_code=503, detail="Pool empty for this type.")
235
+
236
+ return {"session_id": request.sessionId, "questions": final_questions}
237
+
238
+ @app.get("/system-cleanup")
239
+ async def system_cleanup(background_tasks: BackgroundTasks):
240
+ """Scan and delete all questions with missing or invalid audio URLs"""
241
+ def run_cleanup():
242
+ print("Starting System Cleanup...")
243
+ # Get all documents in the pool
244
+ docs = db.collection("questions_pool").get()
245
+ deleted_count = 0
246
+
247
+ for doc in docs:
248
+ data = doc.to_dict()
249
+ # Check if audio_url is missing, None, or empty string
250
+ if not data.get("audio_url") or data.get("audio_url") == "":
251
+ db.collection("questions_pool").document(doc.id).delete()
252
+ deleted_count += 1
253
+
254
+ print(f"Cleanup finished! Deleted {deleted_count} broken questions.")
255
+
256
+ background_tasks.add_task(run_cleanup)
257
+ return {"message": "Cleanup started in background. Check your console/logs."}
258
+
259
+
260
+ @app.post("/cleanup-audio")
261
+ async def cleanup_audio(request: CleanupRequest, background_tasks: BackgroundTasks):
262
+ def delete_job(urls):
263
+ for url in urls:
264
+ try:
265
+ public_id = "interview_audio/" + url.split('/')[-1].split('.')[0]
266
+ cloudinary.uploader.destroy(public_id, resource_type="video")
267
+ print(f"Deleted: {public_id}")
268
+ except Exception: pass
269
+
270
+ background_tasks.add_task(delete_job, request.audioUrls)
271
+ return {"message": "Cleanup started"}
272
+
273
+ # @app.get("/trigger-full-prefill")
274
+ # async def trigger_full_prefill(background_tasks: BackgroundTasks):
275
+ # """Prefills 30 questions for every track and every difficulty level"""
276
+ # async def full_prefill_task():
277
+ # for t_id in TECH_CATEGORIES.keys():
278
+ # for diff in DIFFICULTY_MAP.keys():
279
+ # print(f"Starting full prefill for Track {t_id}, Level {diff}")
280
+ # await refill_specific_pool(t_id, diff, 30)
281
+
282
+ # background_tasks.add_task(full_prefill_task)
283
+ # return {"message": "Full system prefill started in background (30 questions per track/level)"}
284
+ #?##############################################################################
285
+ # @app.get("/trigger-behavioral-prefill")
286
+ # async def trigger_behavioral_prefill(background_tasks: BackgroundTasks):
287
+ # """Prefills 30 Behavioral questions (No track or difficulty needed)"""
288
+
289
+ # async def run_behavioral_task():
290
+ # print("Starting Behavioral questions prefill...")
291
+ # await refill_specific_pool(track_id=0, difficulty=0, count=30, session_type=2)
292
+ # print("Finished prefilling 30 Behavioral questions!")
293
+
294
+ # background_tasks.add_task(run_behavioral_task)
295
+ # return {"message": "Behavioral prefill (30 questions) started in background."}
296
+
297
+ @app.get("/health")
298
+ async def health(): return {"status": "running", "hf_connected": client is not None}
299
+
300
+
301
+ #?##########################################################################
302
+
303
+ # @app.get("/final-migration-fix")
304
+ # async def final_migration_fix(background_tasks: BackgroundTasks):
305
+ # def run_fix():
306
+ # print("๐Ÿ”„ Starting Final Data Fix...")
307
+ # docs = db.collection("questions_pool").get()
308
+ # updated_count = 0
309
+
310
+ # for doc in docs:
311
+ # data = doc.to_dict()
312
+ # updates = {}
313
+
314
+ # # 1. ุชุตุญูŠุญ ุงู„ู€ session_type (Technical: 0, Behavioral: 1)
315
+ # # ู„ูˆ ูƒุงู† 1 (ู‚ุฏูŠู…) ุฎู„ูŠู‡ 0ุŒ ูˆู„ูˆ ูƒุงู† 2 (ู‚ุฏูŠู…) ุฎู„ูŠู‡ 1
316
+ # curr_type = data.get("session_type")
317
+ # if curr_type == 1: updates["session_type"] = 0
318
+ # elif curr_type == 2: updates["session_type"] = 1
319
+
320
+ # # 2. ุชุตุญูŠุญ ุงู„ู€ difficulty (Easy: 0, Intermediate: 1, Hard: 2)
321
+ # # ุงู„ุฃุณุฆู„ุฉ ุงู„ู‚ุฏูŠู…ุฉ ูƒุงู†ุช 1 ูˆ 2 ูˆ 3ุŒ ู‡ู†ู†ู‚ุต ู…ู†ู‡ุง 1
322
+ # curr_diff = data.get("difficulty")
323
+ # if curr_diff in [1, 2, 3]:
324
+ # updates["difficulty"] = curr_diff - 1
325
+
326
+ # if updates:
327
+ # db.collection("questions_pool").document(doc.id).update(updates)
328
+ # updated_count += 1
329
+
330
+ # print(f"โœ… Final Fix Done! Updated {updated_count} questions.")
331
+
332
+ # background_tasks.add_task(run_fix)
333
+ # return {"message": "Final migration started. Your pool will be ready in a minute!"}
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ cloudinary
4
+ firebase-admin
5
+ python-dotenv
6
+ gradio_client
7
+ edge-tts
8
+ pydantic
9
+ google-cloud-firestore