File size: 13,242 Bytes
1bb94e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# =========================================
# 1. IMPORTS
# =========================================
import asyncio
import os
import json
import uuid
import cloudinary
import cloudinary.uploader
import firebase_admin
from firebase_admin import credentials, firestore
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from gradio_client import Client
from google.cloud.firestore_v1.base_query import FieldFilter
import edge_tts
from dotenv import load_dotenv

# =========================================
# 2. INITIALIZATIONS
# =========================================
if not firebase_admin._apps:
    fb_json = os.getenv("FIREBASE_JSON")
    
    if fb_json:
        cred_dict = json.loads(fb_json)
        cred = credentials.Certificate(cred_dict)
    else:
        cred = credentials.Certificate("serviceAccountKey.json")
        
    firebase_admin.initialize_app(cred)

db = firestore.client()

# Load environment variables
load_dotenv()

# Cloudinary Configuration
cloudinary.config(
    cloud_name=os.getenv("CLOUD_NAME"),
    api_key=os.getenv("API_KEY"),
    api_secret=os.getenv("API_SECRET"),
    secure=True
)

app = FastAPI(title="AI Question Service")
HF_SPACE = "Fayza38/Question_and_answer_model"
client = None

# =========================================
# 3. MODELS & CONSTANTS
# =========================================
TECH_CATEGORIES = {0: "Security", 
                    1: "BackEnd", 
                    2: "Networking", 
                    3: "FrontEnd", 
                    4: "DataEngineering", 
                    5: "WebDevelopment", 
                    6: "FullStack", 
                    7: "VersionControl", 
                    8: "SystemDesign", 
                    9: "MachineLearning", 
                    10: "LanguagesAndFrameworks", 
                    11: "DatabaseSystems", 
                    12: "ArtificialIntelligence", 
                    13: "SoftwareTesting", 
                    14: "DistributedSystems", 
                    15: "DevOps", 
                    16: "LowLevelSystems", 
                    17: "DatabaseAndSql", 
                    18: "GeneralProgramming", 
                    19: "DataStructures", 
                    20: "Algorithms"}
DIFFICULTY_MAP = {0: "Easy", 1: "Intermediate", 2: "Hard"}
SESSION_TYPE_MAP = {0: "Technical", 1: "Behavioral"}

class GenerateSessionRequest(BaseModel):
    sessionId: str
    sessionType: int
    difficultyLevel: int = 0 
    trackName: int

class CleanupRequest(BaseModel):
    audioUrls: list[str]

# =========================================
# 4. STARTUP EVENT
# =========================================
@app.on_event("startup")
async def startup_event():
    global client
    max_retries = 5
    retry_delay = 10
    print("Connecting to Hugging Face Space...")
    for i in range(max_retries):
        try:
            client = Client(HF_SPACE)
            print("Connected Successfully!")
            break
        except Exception as e:
            print(f"Connection attempt {i+1} failed. Retrying in {retry_delay}s...")
            if i < max_retries - 1: await asyncio.sleep(retry_delay)

# =========================================
# 5. HELPERS
# =========================================
async def generate_audio(text, filename):
    try:
        # Rate is set to -10% to make the voice slightly slower and clearer
        communicate = edge_tts.Communicate(text, "en-US-GuyNeural", rate="-15%")
        await communicate.save(filename)
        
        # Upload to Cloudinary
        upload_result = cloudinary.uploader.upload(
            filename,
            resource_type="video",
            folder="interview_audio"
        )
        
        if os.path.exists(filename): os.remove(filename)
        return upload_result["secure_url"]
    except Exception as e:
        print(f"Audio Generation Error: {e}")
        if os.path.exists(filename): os.remove(filename)
        return None

async def safe_generate(prompt, retries=3):
    if client is None: raise Exception("Gradio Client not initialized")
    for attempt in range(retries):
        try:
            loop = asyncio.get_running_loop()
            return await loop.run_in_executor(None, lambda: client.predict(prompt=prompt, api_name="/generate_questions"))
        except Exception as e:
            if attempt == retries - 1: raise e
            await asyncio.sleep(2)

def parse_question_output(raw_output: str):
    if not raw_output: return None, None
    text = raw_output.split("assistant")[-1].strip() if "assistant" in raw_output else raw_output
    if "Q:" in text and "A:" in text:
        try:
            parts = text.split("A:")
            q = parts[0].replace("Q:", "").strip()
            a = parts[1].split("<|im_end|>")[0].strip()
            return q, a
        except: return None, None
    return None, None

# =========================================
# 6. REFILL & PREFILL LOGIC
# =========================================
async def refill_specific_pool(track_id: int, difficulty: int, count: int, session_type: int = 0):
    global client
    while client is None: await asyncio.sleep(5)
    
    # Technical (0) vs Behavioral (1)
    if session_type == 1: 
        prompt = "Generate ONE unique behavioral interview question (soft skills, situational). Format: Q: [Question] A: [Answer]"
        track_text = "Behavioral"
        level_text = "General"
    else: 
        track_text = TECH_CATEGORIES.get(track_id)
        level_text = DIFFICULTY_MAP.get(difficulty)
        prompt = f"Generate ONE unique {track_text} interview question for {level_text} level. Format: Q: [Question] A: [Answer]"

    success_count = 0
    while success_count < count:
        try:
            raw_output = await safe_generate(prompt)
            q_text, a_text = parse_question_output(raw_output)

            if q_text and a_text:
                filename = f"{uuid.uuid4()}.mp3"
                audio_url = await generate_audio(q_text, filename)
                
                if audio_url:
                    db.collection("questions_pool").add({
                        "session_type": session_type,
                        "track_id": track_id if session_type == 0 else -1,
                        "difficulty": difficulty if session_type == 0 else 0,
                        "questionText": q_text,
                        "questionIdealAnswer": a_text,
                        "audio_url": audio_url,
                        "created_at": firestore.SERVER_TIMESTAMP
                    })
                    success_count += 1
                    print(f"[{success_count}/{count}] Refilled: {track_text}")
                    await asyncio.sleep(2)
        except Exception as e:
            print(f"Error in refill: {e}")
            await asyncio.sleep(5)

# =========================================
# 6. ENDPOINTS
# =========================================
@app.post("/generate-session")
async def generate_session(request: GenerateSessionRequest, background_tasks: BackgroundTasks):
    t_id, diff = request.trackName, request.difficultyLevel
    s_type = request.sessionType # 0: Technical, 1: Behavioral

    # Query based on the new session types (0 or 1)
    query = db.collection("questions_pool").where(filter=FieldFilter("session_type", "==", s_type))
    
    if s_type == 0: # Technical
        query = query.where(filter=FieldFilter("track_id", "==", t_id)) \
                    .where(filter=FieldFilter("difficulty", "==", diff))
    
    docs_query = query.limit(10).get()

    final_questions = []
    for index, doc in enumerate(docs_query, start=1):
        data = doc.to_dict()
        final_questions.append({
            "question_id": index,
            "text": data["questionText"],
            "expected_answer": data["questionIdealAnswer"],
            "audio_url": data.get("audio_url", "")
        })
        # Delete after fetching to ensure questions are unique for next users
        db.collection("questions_pool").document(doc.id).delete()

    # Maintenance task to keep the pool full
    async def maintain_stock():
        agg_query = query.count()
        current_count = agg_query.get()[0][0].value
        
        target = 50
        if current_count < target:
            await refill_specific_pool(t_id, diff, target - current_count, session_type=s_type)

    background_tasks.add_task(maintain_stock)

    if not final_questions:
        raise HTTPException(status_code=503, detail="Pool empty for this type.")

    return {"session_id": request.sessionId, "questions": final_questions}

@app.get("/system-cleanup")
async def system_cleanup(background_tasks: BackgroundTasks):
    """Scan and delete all questions with missing or invalid audio URLs"""
    def run_cleanup():
        print("Starting System Cleanup...")
        # Get all documents in the pool
        docs = db.collection("questions_pool").get()
        deleted_count = 0
        
        for doc in docs:
            data = doc.to_dict()
            # Check if audio_url is missing, None, or empty string
            if not data.get("audio_url") or data.get("audio_url") == "":
                db.collection("questions_pool").document(doc.id).delete()
                deleted_count += 1
        
        print(f"Cleanup finished! Deleted {deleted_count} broken questions.")

    background_tasks.add_task(run_cleanup)
    return {"message": "Cleanup started in background. Check your console/logs."}


@app.post("/cleanup-audio")
async def cleanup_audio(request: CleanupRequest, background_tasks: BackgroundTasks):
    def delete_job(urls):
        for url in urls:
            try:
                public_id = "interview_audio/" + url.split('/')[-1].split('.')[0]
                cloudinary.uploader.destroy(public_id, resource_type="video")
                print(f"Deleted: {public_id}")
            except Exception: pass

    background_tasks.add_task(delete_job, request.audioUrls)
    return {"message": "Cleanup started"}

# @app.get("/trigger-full-prefill")
# async def trigger_full_prefill(background_tasks: BackgroundTasks):
#     """Prefills 30 questions for every track and every difficulty level"""
#     async def full_prefill_task():
#         for t_id in TECH_CATEGORIES.keys():
#             for diff in DIFFICULTY_MAP.keys():
#                 print(f"Starting full prefill for Track {t_id}, Level {diff}")
#                 await refill_specific_pool(t_id, diff, 30)
    
#     background_tasks.add_task(full_prefill_task)
#     return {"message": "Full system prefill started in background (30 questions per track/level)"}
#?##############################################################################
# @app.get("/trigger-behavioral-prefill")
# async def trigger_behavioral_prefill(background_tasks: BackgroundTasks):
#     """Prefills 30 Behavioral questions (No track or difficulty needed)"""
    
#     async def run_behavioral_task():
#         print("Starting Behavioral questions prefill...")
#         await refill_specific_pool(track_id=0, difficulty=0, count=30, session_type=2)
#         print("Finished prefilling 30 Behavioral questions!")

#     background_tasks.add_task(run_behavioral_task)
#     return {"message": "Behavioral prefill (30 questions) started in background."}

@app.get("/health")
async def health(): return {"status": "running", "hf_connected": client is not None}


#?##########################################################################

# @app.get("/final-migration-fix")
# async def final_migration_fix(background_tasks: BackgroundTasks):
#     def run_fix():
#         print("๐Ÿ”„ Starting Final Data Fix...")
#         docs = db.collection("questions_pool").get()
#         updated_count = 0
        
#         for doc in docs:
#             data = doc.to_dict()
#             updates = {}
            
#             # 1. ุชุตุญูŠุญ ุงู„ู€ session_type (Technical: 0, Behavioral: 1)
#             # ู„ูˆ ูƒุงู† 1 (ู‚ุฏูŠู…) ุฎู„ูŠู‡ 0ุŒ ูˆู„ูˆ ูƒุงู† 2 (ู‚ุฏูŠู…) ุฎู„ูŠู‡ 1
#             curr_type = data.get("session_type")
#             if curr_type == 1: updates["session_type"] = 0
#             elif curr_type == 2: updates["session_type"] = 1
            
#             # 2. ุชุตุญูŠุญ ุงู„ู€ difficulty (Easy: 0, Intermediate: 1, Hard: 2)
#             # ุงู„ุฃุณุฆู„ุฉ ุงู„ู‚ุฏูŠู…ุฉ ูƒุงู†ุช 1 ูˆ 2 ูˆ 3ุŒ ู‡ู†ู†ู‚ุต ู…ู†ู‡ุง 1
#             curr_diff = data.get("difficulty")
#             if curr_diff in [1, 2, 3]:
#                 updates["difficulty"] = curr_diff - 1
            
#             if updates:
#                 db.collection("questions_pool").document(doc.id).update(updates)
#                 updated_count += 1
                
#         print(f"โœ… Final Fix Done! Updated {updated_count} questions.")

#     background_tasks.add_task(run_fix)
#     return {"message": "Final migration started. Your pool will be ready in a minute!"}