File size: 28,975 Bytes
e224520
 
b0eb554
8957c21
65d6cde
c1ae0f3
e224520
 
8957c21
fb65789
 
 
 
 
 
 
8957c21
e224520
8957c21
 
65d6cde
 
 
8957c21
 
 
 
 
 
 
 
 
e224520
 
 
 
 
 
 
 
 
 
aaaa2c9
e224520
65d6cde
 
 
c7e364e
 
 
 
65d6cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e364e
 
 
 
65d6cde
c7e364e
65d6cde
 
 
 
 
c7e364e
65d6cde
c7e364e
 
bf83ebc
c7e364e
65d6cde
 
c7e364e
 
65d6cde
 
 
 
 
 
 
 
 
 
 
 
 
 
8957c21
 
ac90996
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e224520
8957c21
2e148ba
e224520
f2963eb
e224520
2e148ba
 
 
 
 
f2963eb
2e148ba
 
 
f2963eb
2e148ba
 
 
 
f2963eb
2e148ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83e00b
 
2e148ba
 
e83e00b
 
 
 
2e148ba
 
 
 
 
 
 
 
 
 
 
e83e00b
 
 
2e148ba
 
 
e83e00b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e148ba
 
 
 
 
 
 
 
 
 
e224520
ac90996
8957c21
e224520
ac90996
 
 
 
 
f2963eb
 
 
8957c21
e224520
 
f2963eb
 
ac90996
f2963eb
e224520
 
 
f2963eb
8957c21
e224520
 
 
8957c21
 
f2963eb
8957c21
 
f2963eb
ac90996
f2963eb
 
 
 
 
e224520
 
 
8957c21
f2963eb
 
e224520
 
8957c21
 
e224520
ac90996
 
 
 
 
8957c21
e224520
ac90996
e224520
 
 
 
 
 
 
 
 
 
8957c21
e224520
 
 
 
 
 
8957c21
 
 
ac90996
 
e224520
 
 
 
8957c21
e224520
 
8957c21
 
e224520
ac90996
 
 
 
 
8957c21
e224520
 
 
 
 
8957c21
e224520
 
8957c21
 
 
 
 
ac90996
e224520
 
 
8957c21
e224520
 
 
 
 
 
ac90996
bf83ebc
daa9a7e
bf83ebc
 
 
e224520
 
 
 
 
 
ac90996
 
 
 
 
 
 
 
 
7ac6a18
 
ac90996
 
 
 
65d6cde
 
 
 
 
 
 
 
ac90996
e224520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ac6a18
e224520
 
 
 
 
 
 
 
 
 
 
 
ac90996
e224520
8957c21
 
 
e224520
 
8957c21
e224520
8957c21
e224520
 
8957c21
e224520
 
8957c21
e224520
 
 
c1ae0f3
 
 
 
e224520
8957c21
e224520
 
8957c21
 
e224520
99558d9
 
65d6cde
e224520
 
 
99558d9
e224520
 
99558d9
 
 
 
 
 
e224520
8957c21
e224520
c1ae0f3
8957c21
e224520
8957c21
e224520
5482be1
bfb9606
fb65789
 
f9a514e
fb65789
e224520
 
 
 
 
 
 
 
 
 
 
 
bfb9606
e224520
 
 
 
 
 
 
ac90996
c1ae0f3
e224520
 
 
ac90996
 
 
7ac6a18
ac90996
 
 
8957c21
e224520
8957c21
 
e224520
 
 
 
 
8957c21
e224520
 
 
 
 
ac90996
 
 
7ac6a18
ac90996
 
 
8957c21
e224520
8957c21
 
e224520
 
 
 
 
8957c21
e224520
 
 
 
 
ac90996
 
 
7ac6a18
ac90996
 
 
e224520
8957c21
e224520
 
 
 
 
 
 
 
8957c21
e224520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
import gradio as gr
import csv
import os
import re
from datetime import datetime, timedelta
from huggingface_hub import Repository
from RAG_Learning_Assistant_with_Streaming import RAGLearningAssistant

# Configuration for Student Space
# find name of space
def get_space_name():
    space_id = os.environ.get("SPACE_ID", None)
    if space_id:
        # SPACE_ID usually "username/space-name",we only need space-name
        return space_id.split("/")[-1]
STUDENT_SPACE_NAME = get_space_name()  # get space name automatically
DATA_STORAGE_REPO = "CIV3283/Data_Storage"  # Centralized data storage repo
DATA_BRANCH_NAME = "data_branch"
LOCAL_DATA_DIR = "temp_data_repo"

# Session timeout configuration (in minutes)
SESSION_TIMEOUT_MINUTES = 30  # Adjust this value as needed

# File names in data storage
KNOWLEDGE_FILE = "knowledge_base.md"
VECTOR_DB_FILE = "vector_database.csv"
METADATA_FILE = "vector_metadata.json"
VECTORIZER_FILE = "vectorize_knowledge_base.py"

# Student-specific log files (with space name prefix)
QUERY_LOG_FILE = f"{STUDENT_SPACE_NAME}_query_log.csv"
FEEDBACK_LOG_FILE = f"{STUDENT_SPACE_NAME}_feedback_log.csv"

# Environment variables
HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", None)
if HF_HUB_TOKEN is None:
    raise ValueError("Set HF_HUB_TOKEN in Space Settings -> Secrets")

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
if OPENAI_API_KEY is None:
    raise ValueError("Set OPENAI_API_KEY in Space Settings -> Secrets")

MODEL = "gpt-4.1-nano-2025-04-14"

def check_session_validity(check_id):
    """
    Check if the current session is valid based on:
    1. If user ID matches last query → Allow continue
    2. If user ID doesn't match → Check time interval:
       - If time interval is small → Block (previous user just finished)
       - If time interval is large → Allow (assistant has been idle)
    
    Returns:
        tuple: (is_valid: bool, error_message: str)
    """
    try:
        filepath = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
        
        # If no log file exists, this is the first query - allow it
        if not os.path.exists(filepath):
            print(f"[check_session_validity] No existing log file, allowing first query for student {check_id}")
            return True, ""
        
        # Read the last record from the CSV file
        with open(filepath, 'r', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            rows = list(reader)
            
            # If only header exists, this is effectively the first query
            if len(rows) <= 1:
                print(f"[check_session_validity] Only header in log file, allowing first query for student {check_id}")
                return True, ""
            
            # Get the last record (most recent query)
            last_record = rows[-1]
            
            # CSV format: [student_space, student_id, timestamp, search_info, query_and_response, thumb_feedback]
            if len(last_record) < 3:
                print(f"[check_session_validity] Invalid last record format, allowing query")
                return True, ""
            
            last_student_id = last_record[1]
            last_timestamp_str = last_record[2]
            
            print(f"[check_session_validity] Last record - Student ID: {last_student_id}, Timestamp: {last_timestamp_str}")
            print(f"[check_session_validity] Current request - Student ID: {check_id}")
            
            # If student ID matches, allow continuation
            if last_student_id == check_id:
                print(f"[check_session_validity] Same user, allowing continuation for student {check_id}")
                return True, ""
            
            # If student ID doesn't match, check time interval
            try:
                last_timestamp = datetime.strptime(last_timestamp_str, '%Y-%m-%d %H:%M:%S')
                current_timestamp = datetime.now()
                time_diff = current_timestamp - last_timestamp
                
                print(f"[check_session_validity] Different user - Time difference: {time_diff.total_seconds()} seconds ({time_diff.total_seconds()/60:.1f} minutes)")
                
                # If time difference is small, block access (previous user just finished)
                if time_diff <= timedelta(minutes=SESSION_TIMEOUT_MINUTES):
                    error_msg = "⚠️ The assistant is currently being used by another user. Please return to the load distributor page."
                    print(f"[check_session_validity] Blocking access - Previous user ({last_student_id}) used assistant {time_diff.total_seconds()/60:.1f} minutes ago")
                    return False, error_msg
                
                # If time difference is large, allow access (assistant has been idle)
                print(f"[check_session_validity] Assistant has been idle for {time_diff.total_seconds()/60:.1f} minutes, allowing new user {check_id}")
                return True, ""
                
            except ValueError as e:
                print(f"[check_session_validity] Error parsing timestamp: {e}")
                # If we can't parse the timestamp, allow the query to proceed
                return True, ""
    
    except Exception as e:
        print(f"[check_session_validity] Error checking session validity: {e}")
        import traceback
        print(f"[check_session_validity] Traceback: {traceback.format_exc()}")
        # On error, allow the query to proceed to avoid blocking legitimate users
        return True, ""

def init_data_storage_repo():
    """Initialize connection to centralized data storage repository"""
    try:
        repo = Repository(
            local_dir=LOCAL_DATA_DIR,
            clone_from=DATA_STORAGE_REPO,
            revision=DATA_BRANCH_NAME,
            repo_type="space",
            use_auth_token=HF_HUB_TOKEN
        )
        # Configure git user
        repo.git_config_username_and_email("git_user", f"Student_Space_{STUDENT_SPACE_NAME}")
        repo.git_config_username_and_email("git_email", f"{STUDENT_SPACE_NAME}@student.space")
        
        # Pull latest changes
        print(f"[init_data_storage_repo] Pulling latest changes from {DATA_STORAGE_REPO}...")
        repo.git_pull(rebase=True)
        
        print(f"[init_data_storage_repo] Successfully connected to data storage repo: {DATA_STORAGE_REPO}")
        print(f"[init_data_storage_repo] Local directory: {LOCAL_DATA_DIR}")
        print(f"[init_data_storage_repo] Branch: {DATA_BRANCH_NAME}")
        
        # Check if required files exist
        required_files = [KNOWLEDGE_FILE, VECTOR_DB_FILE, METADATA_FILE]
        for file_name in required_files:
            file_path = os.path.join(LOCAL_DATA_DIR, file_name)
            if os.path.exists(file_path):
                print(f"[init_data_storage_repo] Found required file: {file_name}")
            else:
                print(f"[init_data_storage_repo] Warning: Missing required file: {file_name}")
        
        return repo
        
    except Exception as e:
        print(f"[init_data_storage_repo] Error initializing repository: {e}")
        import traceback
        print(f"[init_data_storage_repo] Traceback: {traceback.format_exc()}")
        return None

def commit_student_logs(commit_message: str):
    """Commit student logs to data storage repository with conflict resolution"""
    if repo is None:
        print("[commit_student_logs] Error: Repository not initialized")
        return False
    
    max_retries = 3
    retry_count = 0
    
    while retry_count < max_retries:
        try:
            # Check if log files exist before adding
            query_log_path = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
            feedback_log_path = os.path.join(LOCAL_DATA_DIR, FEEDBACK_LOG_FILE)
            
            files_to_add = []
            if os.path.exists(query_log_path):
                files_to_add.append(QUERY_LOG_FILE)
                print(f"[commit_student_logs] Found query log: {query_log_path}")
            
            if os.path.exists(feedback_log_path):
                files_to_add.append(FEEDBACK_LOG_FILE)
                print(f"[commit_student_logs] Found feedback log: {feedback_log_path}")
            
            if not files_to_add:
                print("[commit_student_logs] No log files to commit")
                return False
            
            # Add files individually
            for file_name in files_to_add:
                print(f"[commit_student_logs] Adding file: {file_name}")
                repo.git_add(pattern=file_name)
            
            # Check if there are changes to commit
            try:
                import subprocess
                result = subprocess.run(
                    ["git", "status", "--porcelain"], 
                    cwd=LOCAL_DATA_DIR,
                    capture_output=True, 
                    text=True,
                    check=True
                )
                
                if not result.stdout.strip():
                    print("[commit_student_logs] No changes to commit")
                    return True
                
                print(f"[commit_student_logs] Changes detected: {result.stdout.strip()}")
                
            except Exception as status_error:
                print(f"[commit_student_logs] Warning: Could not check git status: {status_error}")
            
            # Commit changes locally first
            print(f"[commit_student_logs] Attempt {retry_count + 1}/{max_retries}: Committing locally: {commit_message}")
            repo.git_commit(commit_message)
            
            # Now try to pull and push
            print("[commit_student_logs] Pulling latest changes...")
            repo.git_pull(rebase=True)
            
            # Push changes
            print("[commit_student_logs] Pushing to remote...")
            repo.git_push()
            
            print(f"[commit_student_logs] Success: {commit_message}")
            return True
            
        except Exception as e:
            error_msg = str(e)
            print(f"[commit_student_logs] Attempt {retry_count + 1} failed: {error_msg}")
            
            # Check if it's a push conflict or pull conflict
            if ("rejected" in error_msg and "fetch first" in error_msg) or ("cannot pull with rebase" in error_msg):
                print("[commit_student_logs] Detected Git conflict, will retry...")
                retry_count += 1
                
                if retry_count < max_retries:
                    # Try to reset and start fresh
                    try:
                        print("[commit_student_logs] Resetting repository state for retry...")
                        # Reset to remote state
                        repo.git_reset("--hard", "HEAD~1")  # Undo the commit
                        repo.git_pull(rebase=True)  # Get latest changes
                        
                        # Wait a bit before retrying to avoid rapid conflicts
                        import time
                        wait_time = retry_count * 2  # 2, 4, 6 seconds
                        print(f"[commit_student_logs] Waiting {wait_time} seconds before retry...")
                        time.sleep(wait_time)
                        continue
                        
                    except Exception as reset_error:
                        print(f"[commit_student_logs] Reset failed: {reset_error}")
                        # If reset fails, try alternative approach
                        try:
                            # Alternative: stash changes and pull
                            repo.git_stash()
                            repo.git_pull(rebase=True)
                            repo.git_stash("pop")
                            continue
                        except Exception as stash_error:
                            print(f"[commit_student_logs] Stash approach failed: {stash_error}")
                            return False
                else:
                    print("[commit_student_logs] Max retries reached, giving up")
                    return False
            else:
                # Other types of errors, don't retry
                print(f"[commit_student_logs] Non-conflict error, not retrying: {error_msg}")
                return False
    
    print("[commit_student_logs] Failed after all retry attempts")
    return False

def save_student_query_to_csv(query, search_info, response, check_id, thumb_feedback=None):
    """Save student query record to centralized CSV file"""
    try:
        # Validate check_id
        if not check_id:
            print("[save_student_query_to_csv] Error: No valid check_id provided")
            return False
        
        # Ensure the local data directory exists
        os.makedirs(LOCAL_DATA_DIR, exist_ok=True)
        
        filepath = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
        file_exists = os.path.isfile(filepath)
        
        print(f"[save_student_query_to_csv] Saving to: {filepath}")
        print(f"[save_student_query_to_csv] File exists: {file_exists}")
        print(f"[save_student_query_to_csv] Student ID: {check_id}")
        
        with open(filepath, 'a', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            if not file_exists:
                print("[save_student_query_to_csv] Writing header row")
                writer.writerow(['student_space', 'student_id', 'timestamp', 'search_info', 'query_and_response', 'thumb_feedback'])
            
            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            query_and_response = f"Query: {query}\nResponse: {response}"
            writer.writerow([STUDENT_SPACE_NAME, check_id, timestamp, search_info, query_and_response, thumb_feedback or ""])
        
        print(f"[save_student_query_to_csv] Query saved to local file: {filepath}")
        
        # Commit student logs to data storage
        print("[save_student_query_to_csv] Attempting to commit to remote repository...")
        commit_success = commit_student_logs(f"Add query log from student {check_id} at {timestamp}")
        
        if commit_success:
            print("[save_student_query_to_csv] Successfully committed to remote repository")
        else:
            print("[save_student_query_to_csv] Failed to commit to remote repository")
        
        return True
    except Exception as e:
        print(f"[save_student_query_to_csv] Error: {e}")
        import traceback
        print(f"[save_student_query_to_csv] Traceback: {traceback.format_exc()}")
        return False

def update_latest_student_query_feedback(feedback_type, check_id):
    """Update thumb feedback for the latest student query in CSV"""
    try:
        # Validate check_id
        if not check_id:
            print("[update_latest_student_query_feedback] Error: No valid check_id provided")
            return False
        
        filepath = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
        if not os.path.exists(filepath):
            print("[update_latest_student_query_feedback] Error: Query log file not found")
            return False
        
        # Read existing data
        rows = []
        with open(filepath, 'r', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            rows = list(reader)
        
        # Update the last row (most recent query)
        if len(rows) > 1:  # Ensure there's at least one data row beyond header
            rows[-1][5] = feedback_type  # thumb_feedback column (index 5 for student format)
            
            # Write back to file
            with open(filepath, 'w', newline='', encoding='utf-8') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(rows)
            
            print(f"[update_latest_student_query_feedback] Updated feedback: {feedback_type}")
            
            # Commit the update
            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            commit_student_logs(f"Update feedback from student {check_id}: {feedback_type} at {timestamp}")
            return True
        
        return False
    except Exception as e:
        print(f"[update_latest_student_query_feedback] Error: {e}")
        return False

def save_student_comment_feedback(comment, check_id):
    """Save student comment feedback to centralized feedback file"""
    try:
        # Validate check_id
        if not check_id:
            print("[save_student_comment_feedback] Error: No valid check_id provided")
            return False
        
        filepath = os.path.join(LOCAL_DATA_DIR, FEEDBACK_LOG_FILE)
        file_exists = os.path.isfile(filepath)
        
        with open(filepath, 'a', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            if not file_exists:
                writer.writerow(['student_space', 'student_id', 'timestamp', 'comment'])
            
            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            writer.writerow([STUDENT_SPACE_NAME, check_id, timestamp, comment])
        
        print(f"[save_student_comment_feedback] Saved comment to {filepath}")
        
        # Commit student logs
        commit_student_logs(f"Add comment feedback from student {check_id} at {timestamp}")
        
        return True
    except Exception as e:
        print(f"[save_student_comment_feedback] Error: {e}")
        return False

def get_url_params(request: gr.Request):
    """Extract URL parameters from request"""
    if request:
        query_params = dict(request.query_params)
        check_id = query_params.get('check', None)
        if check_id:
            return f"RAG Learning Assistant - Student", check_id
        else:
            return "RAG Learning Assistant - Student", None
    return "RAG Learning Assistant - Student", None

def chat_response(message, history, search_info_display, check_id, has_query):
    """Process user input and return streaming response"""
    if not message.strip():
        return history, search_info_display, has_query
    
    # Check access permission first
    if not check_id:
        print(f"[chat_response] Access denied: No valid check ID provided")
        # Raise error dialog for access denial
        raise gr.Error(
            "⚠️ Access Restricted\n\n"
            "Please access this system through the link provided in Moodle.\n\n"
            "If you are a student in this course:\n"
            "1. Go to your Moodle course page\n"
            "2. Find the 'CivASK' link\n"
            "3. Click the link to access the system\n\n"
            "If you continue to experience issues, please contact your instructor.",
            duration=8
        )
    
    # NEW: Check session validity before proceeding
    session_valid, error_message = check_session_validity(check_id)
    if not session_valid:
        print(f"[chat_response] Session invalid for student {check_id}")
        raise gr.Error(error_message, duration=10)
    
    # Valid access and valid session - proceed with normal AI conversation
    print(f"[chat_response] Valid access and session for student ID: {check_id}")
    
    # Convert to messages format if needed
    if history and isinstance(history[0], list):
        # Convert from tuples to messages format
        messages_history = []
        for user_msg, assistant_msg in history:
            messages_history.append({"role": "user", "content": user_msg})
            if assistant_msg:
                messages_history.append({"role": "assistant", "content": assistant_msg})
        history = messages_history
    
    # Add user message
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": ""})
    
    search_info_collected = False
    search_info_content = ""
    content_part = ""
    
    # Process streaming response
    for chunk in assistant.generate_response_stream(message):
        if not search_info_collected:
            if "**Response:**" in chunk:  # Support English markers
                search_info_content += chunk
                search_info_collected = True
                yield history, search_info_content, has_query
            else:
                search_info_content += chunk
                yield history, search_info_content, has_query
        else:
            content_part += chunk
            # Update the last assistant message
            history[-1]["content"] = content_part
            yield history, search_info_content, has_query
    
    # After streaming is complete, save to CSV (only for valid access)
    try:
        print(f"[chat_response] Saving student query to CSV...")
        print(f"Student Space: {STUDENT_SPACE_NAME}")
        print(f"Student ID: {check_id}")
        print(f"Query: {message}")
        
        save_success = save_student_query_to_csv(message, search_info_content, content_part, check_id)
        if save_success:
            print(f"[chat_response] Student query saved successfully")
            has_query = True  # Mark that we have a query to rate
        else:
            print(f"[chat_response] Failed to save student query")
        
    except Exception as e:
        print(f"[chat_response] Error saving student query: {e}")
    
    return history, search_info_content, has_query

# Global variables
repo = None
assistant = None

def main():
    """Main function to initialize and launch the student application"""
    global repo, assistant
    
    # Initialize data storage repository connection
    repo = init_data_storage_repo()
    
    # Initialize RAG assistant with centralized data storage directory
    print(f"[main] Initializing RAG assistant with data directory: {LOCAL_DATA_DIR}")
    print(f"[main] Session timeout set to: {SESSION_TIMEOUT_MINUTES} minutes")
    assistant = RAGLearningAssistant(
        api_key=OPENAI_API_KEY, 
        model=MODEL, 
        vector_db_path=LOCAL_DATA_DIR  # Pass the data storage repo directory
    )
    
    print(f"[main] RAG assistant initialized successfully")
    print(f"[main] Student space: {STUDENT_SPACE_NAME}")
    print(f"[main] Data storage repo: {DATA_STORAGE_REPO}")
    print(f"[main] Query log file: {QUERY_LOG_FILE}")
    print(f"[main] Feedback log file: {FEEDBACK_LOG_FILE}")
    
    # Create interface
    with gr.Blocks(title=f"RAG Assistant - {STUDENT_SPACE_NAME}") as interface:
        check_id_state = gr.State("1")
        has_query_state = gr.State(False)  # Track if there's a query to rate
        title_display = gr.Markdown(f"# RAG Learning Assistant - {STUDENT_SPACE_NAME}", elem_id="title")
        
        # Only Query Check functionality for students
        with gr.Row():
            with gr.Column(scale=4):
                chatbot = gr.Chatbot(label="Ask Your Questions", height=500, resizable = True, type="messages", render_markdown=True, latex_delimiters=[
                                                                                                                        { "left": "$$", "right": "$$", "display": True },
                                                                                                                        { "left": "\(", "right": "\)", "display": False },
                                                                                                                        { "left": "$", "right": "$", "display": False },
                                                                                                                        { "left": "\[", "right": "\]", "display": True }])
                msg = gr.Textbox(placeholder="Type your message here...", label="Your Message", show_label=True)
                
                # Feedback buttons row
                with gr.Row():
                    thumbs_up_btn = gr.Button("👍 Good Answer", variant="secondary", size="sm")
                    thumbs_down_btn = gr.Button("👎 Poor Answer", variant="secondary", size="sm")
                
                feedback_status = gr.Textbox(label="Feedback Status", interactive=False, lines=1)
                
                # Comment section
                with gr.Row():
                    comment_input = gr.Textbox(placeholder="Share your comments or suggestions...", label="Comments", lines=2)
                    submit_comment_btn = gr.Button("Submit Comment", variant="primary")
            
            with gr.Column(scale=1):
                search_info = gr.Markdown(label="Search Analysis Information", value="")
        
        # Event handlers
        def init_from_url(request: gr.Request):
            title, check_id = get_url_params(request)
            print(f"[init_from_url] Extracted check_id: {check_id}")
            return f"# {title}", check_id, False  # Reset has_query state
        
        # Feedback handlers
        def handle_thumbs_up(check_id, has_query):
            if not check_id:
                raise gr.Error(
                    "⚠️ Access Restricted\n\n"
                    "Please access this system through the CivASK link provided in Moodle to use the feedback features.",
                    duration=5
                )
            
            print(f"[handle_thumbs_up] Student: {STUDENT_SPACE_NAME}, check_id: {check_id}")
            
            # Check if student query log exists and has queries
            filepath = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
            if os.path.exists(filepath):
                with open(filepath, 'r', encoding='utf-8') as csvfile:
                    reader = csv.reader(csvfile)
                    rows = list(reader)
                    if len(rows) > 1:  # Has header + at least one data row
                        success = update_latest_student_query_feedback("thumbs_up", check_id)
                        return "👍 Thank you for your positive feedback!" if success else "Failed to save feedback"
            
            return "No query to rate yet"
        
        def handle_thumbs_down(check_id, has_query):
            if not check_id:
                raise gr.Error(
                    "⚠️ Access Restricted\n\n"
                    "Please access this system through the CivASK link provided in Moodle to use the feedback features.",
                    duration=5
                )
            
            print(f"[handle_thumbs_down] Student: {STUDENT_SPACE_NAME}, check_id: {check_id}")
            
            # Check if student query log exists and has queries
            filepath = os.path.join(LOCAL_DATA_DIR, QUERY_LOG_FILE)
            if os.path.exists(filepath):
                with open(filepath, 'r', encoding='utf-8') as csvfile:
                    reader = csv.reader(csvfile)
                    rows = list(reader)
                    if len(rows) > 1:  # Has header + at least one data row
                        success = update_latest_student_query_feedback("thumbs_down", check_id)
                        return "👎 Thank you for your feedback. We'll work to improve!" if success else "Failed to save feedback"
            
            return "No query to rate yet"
        
        def handle_comment_submission(comment, check_id):
            if not check_id:
                raise gr.Error(
                    "⚠️ Access Restricted\n\n"
                    "Please access this system through the CivASK link provided in Moodle to submit comments.",
                    duration=5
                )
            
            if comment.strip():
                success = save_student_comment_feedback(comment.strip(), check_id)
                if success:
                    return "💬 Thank you for your comment!", ""
                else:
                    return "Failed to save comment", comment
            return "Please enter a comment", comment
        
        interface.load(fn=init_from_url, outputs=[title_display, check_id_state, has_query_state])
        
        # Query events
        msg.submit(
            chat_response, 
            [msg, chatbot, search_info, check_id_state, has_query_state], 
            [chatbot, search_info, has_query_state]
        ).then(lambda: "", outputs=[msg])
        
        # Feedback events
        thumbs_up_btn.click(
            handle_thumbs_up,
            inputs=[check_id_state, has_query_state],
            outputs=[feedback_status]
        )
        
        thumbs_down_btn.click(
            handle_thumbs_down,
            inputs=[check_id_state, has_query_state],
            outputs=[feedback_status]
        )
        
        submit_comment_btn.click(
            handle_comment_submission,
            inputs=[comment_input, check_id_state],
            outputs=[feedback_status, comment_input]
        )
    
    interface.launch()

if __name__ == "__main__":
    main()