Chia Woon Yap commited on
Commit
8e9c1b4
·
verified ·
1 Parent(s): a8c1d57

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +612 -0
app.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """app
4
+
5
+ Automatically generated by Colab.
6
+
7
+ Original file is located at
8
+ https://colab.research.google.com/drive/1pwwcBb5Zlw1DA3u5K8W8mjrwBTBWXc1L
9
+ """
10
+
11
+ import gradio as gr
12
+ import numpy as np
13
+ from transformers import pipeline
14
+ import os
15
+ import time
16
+ import groq
17
+ import uuid # For generating unique filenames
18
+
19
+ # Updated imports to address LangChain deprecation warnings:
20
+ from langchain_groq import ChatGroq
21
+ from langchain.schema import HumanMessage
22
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
23
+ from langchain_community.vectorstores import Chroma
24
+ from langchain_community.embeddings import HuggingFaceEmbeddings
25
+ from langchain.docstore.document import Document
26
+
27
+ # Importing chardet (make sure to add chardet to your requirements.txt)
28
+ import chardet
29
+
30
+ import fitz # PyMuPDF for PDFs
31
+ import docx # python-docx for Word files
32
+ import gtts # Google Text-to-Speech library
33
+ from pptx import Presentation # python-pptx for PowerPoint files
34
+ import re
35
+
36
+ # FastAPI imports
37
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
38
+ from fastapi.responses import JSONResponse, FileResponse
39
+ from fastapi.middleware.cors import CORSMiddleware
40
+ import uvicorn
41
+ from typing import Optional
42
+ import io
43
+ import soundfile as sf
44
+ import librosa
45
+
46
+ # Enhanced Whisper model for speech-to-text with better configuration
47
+ try:
48
+ transcriber = pipeline(
49
+ "automatic-speech-recognition",
50
+ model="openai/whisper-small.en", # Upgraded from base to small for better accuracy
51
+ device=-1, # Use CPU (-1) or GPU (0)
52
+ chunk_length_s=30,
53
+ stride_length_s=5,
54
+ batch_size=8
55
+ )
56
+ except Exception as e:
57
+ print(f"Warning: Could not load enhanced Whisper model: {e}")
58
+ # Fallback to basic model
59
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
60
+
61
+ # Set API Key (Ensure it's stored securely in an environment variable)
62
+ groq.api_key = os.getenv("GROQ_API_KEY")
63
+
64
+ # Initialize Chat Model
65
+ chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key)
66
+
67
+ # Initialize Embeddings and chromaDB
68
+ os.makedirs("chroma_db", exist_ok=True)
69
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
+ vectorstore = Chroma(
71
+ embedding_function=embedding_model,
72
+ persist_directory="chroma_db"
73
+ )
74
+
75
+ # Short-term memory for the LLM
76
+ chat_memory = []
77
+
78
+ # Audio processing parameters
79
+ AUDIO_SAMPLE_RATE = 16000 # Whisper works best with 16kHz
80
+
81
+ # Prompt for quiz generation with added remark
82
+ quiz_prompt = """
83
+ You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
84
+
85
+ Generate 20 Questions.
86
+
87
+ Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags.
88
+
89
+ For each question:
90
+ - Provide 4 answer choices (for MCQs), with only one correct answer.
91
+ - Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
92
+ - Include an answer key for all questions.
93
+ - Ensure questions vary in difficulty and encourage comprehension rather than memorization.
94
+ - Additionally, implement an instant feedback mechanism:
95
+ - When a user selects an answer, indicate whether it is correct or incorrect.
96
+ - If incorrect, provide a brief explanation from the document to guide learning.
97
+ - Ensure responses are concise and educational to enhance understanding.
98
+
99
+ Output Example:
100
+ 1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
101
+
102
+ Answer: Agent Core
103
+
104
+ Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
105
+
106
+ 2. What is the main limitation of LLM-based applications?
107
+ a) Limited token capacity
108
+ b) Lack of domain expertise
109
+ c) Prone to hallucination
110
+ d) All of the above
111
+
112
+ Answer: d) All of the above
113
+
114
+ Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
115
+
116
+ 3. Given the following info, what is the value of P(jam|Rain)?
117
+ P(no Rain) = 0.8;
118
+ P(no Jam) = 0.2;
119
+ P(Rain|Jam) = 0.1
120
+
121
+ a) 0.016
122
+ b) 0.025
123
+ c) 0.1
124
+ d) 0.4
125
+
126
+ Answer: d) 0.4
127
+
128
+ Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
129
+ """
130
+
131
+ # Function to clean AI response by removing unwanted formatting
132
+ def clean_response(response):
133
+ """Removes <think> tags, asterisks, and markdown formatting."""
134
+ cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
135
+ cleaned_text = re.sub(r"(\*\*|\*|\[|\])", "", cleaned_text)
136
+ cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE)
137
+ cleaned_text = re.sub(r"\\", "", cleaned_text)
138
+ cleaned_text = re.sub(r"---", "", cleaned_text)
139
+ return cleaned_text.strip()
140
+
141
+ # Function to generate quiz based on content
142
+ def generate_quiz(content):
143
+ prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
144
+ response = chat_model([HumanMessage(content=prompt)])
145
+ cleaned_response = clean_response(response.content)
146
+ return cleaned_response
147
+
148
+ # Function to retrieve relevant documents from vectorstore based on user query
149
+ def retrieve_documents(query):
150
+ results = vectorstore.similarity_search(query, k=3)
151
+ return [doc.page_content for doc in results]
152
+
153
+ # Function to convert tuple format to message format
154
+ def convert_to_message_format(chat_history):
155
+ """Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
156
+ message_format = []
157
+ for user_msg, bot_msg in chat_history:
158
+ message_format.append({"role": "user", "content": user_msg})
159
+ message_format.append({"role": "assistant", "content": bot_msg})
160
+ return message_format
161
+
162
+ # Function to convert message format to tuple format for processing
163
+ def convert_to_tuple_format(chat_history):
164
+ """Convert from message format back to tuple format for processing"""
165
+ tuple_format = []
166
+ for i in range(0, len(chat_history), 2):
167
+ if i+1 < len(chat_history):
168
+ user_msg = chat_history[i]["content"]
169
+ bot_msg = chat_history[i+1]["content"]
170
+ tuple_format.append((user_msg, bot_msg))
171
+ return tuple_format
172
+
173
+ # Function to handle chatbot interactions with short-term memory
174
+ def chat_with_groq(user_input, chat_history):
175
+ try:
176
+ # Convert message format to tuple format for processing
177
+ tuple_history = convert_to_tuple_format(chat_history)
178
+
179
+ # Retrieve relevant documents for additional context
180
+ relevant_docs = retrieve_documents(user_input)
181
+ context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
182
+
183
+ # Construct proper prompting with conversation history
184
+ system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
185
+ conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
186
+ prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
187
+
188
+ # Call the chat model
189
+ response = chat_model([HumanMessage(content=prompt)])
190
+
191
+ # Clean response to remove any unwanted formatting
192
+ cleaned_response_text = clean_response(response.content)
193
+
194
+ # Append conversation history
195
+ chat_memory.append(f"User: {user_input}")
196
+ chat_memory.append(f"AI: {cleaned_response_text}")
197
+
198
+ # Update chat history - add new messages in the correct format
199
+ chat_history.append({"role": "user", "content": user_input})
200
+ chat_history.append({"role": "assistant", "content": cleaned_response_text})
201
+
202
+ # Convert response to speech
203
+ audio_file = speech_playback(cleaned_response_text)
204
+
205
+ return chat_history, "", audio_file
206
+ except Exception as e:
207
+ error_msg = f"Error: {str(e)}"
208
+ chat_history.append({"role": "user", "content": user_input})
209
+ chat_history.append({"role": "assistant", "content": error_msg})
210
+ return chat_history, "", None
211
+
212
+ # Function to play response as speech using gTTS
213
+ def speech_playback(text):
214
+ try:
215
+ # Generate a unique filename for each audio file
216
+ unique_id = str(uuid.uuid4())
217
+ audio_file = f"output_audio_{unique_id}.mp3"
218
+
219
+ # Convert text to speech
220
+ tts = gtts.gTTS(text, lang='en')
221
+ tts.save(audio_file)
222
+
223
+ # Return the path to the audio file
224
+ return audio_file
225
+ except Exception as e:
226
+ print(f"Error in speech_playback: {e}")
227
+ return None
228
+
229
+ # Function to detect encoding safely
230
+ def detect_encoding(file_path):
231
+ try:
232
+ with open(file_path, "rb") as f:
233
+ raw_data = f.read(4096)
234
+ detected = chardet.detect(raw_data)
235
+ encoding = detected["encoding"]
236
+ return encoding if encoding else "utf-8"
237
+ except Exception:
238
+ return "utf-8"
239
+
240
+ # Function to extract text from PDF
241
+ def extract_text_from_pdf(pdf_path):
242
+ try:
243
+ doc = fitz.open(pdf_path)
244
+ text = "\n".join([page.get_text("text") for page in doc])
245
+ return text if text.strip() else "No extractable text found."
246
+ except Exception as e:
247
+ return f"Error extracting text from PDF: {str(e)}"
248
+
249
+ # Function to extract text from Word files (.docx)
250
+ def extract_text_from_docx(docx_path):
251
+ try:
252
+ doc = docx.Document(docx_path)
253
+ text = "\n".join([para.text for para in doc.paragraphs])
254
+ return text if text.strip() else "No extractable text found."
255
+ except Exception as e:
256
+ return f"Error extracting text from Word document: {str(e)}"
257
+
258
+ # Function to extract text from PowerPoint files (.pptx)
259
+ def extract_text_from_pptx(pptx_path):
260
+ try:
261
+ presentation = Presentation(pptx_path)
262
+ text = ""
263
+ for slide in presentation.slides:
264
+ for shape in slide.shapes:
265
+ if hasattr(shape, "text"):
266
+ text += shape.text + "\n"
267
+ return text if text.strip() else "No extractable text found."
268
+ except Exception as e:
269
+ return f"Error extracting text from PowerPoint: {str(e)}"
270
+
271
+ # Function to process documents safely
272
+ def process_document(file):
273
+ try:
274
+ file_extension = os.path.splitext(file.name)[-1].lower()
275
+ if file_extension in [".png", ".jpg", ".jpeg"]:
276
+ return "Error: Images cannot be processed for text extraction."
277
+ if file_extension == ".pdf":
278
+ content = extract_text_from_pdf(file.name)
279
+ elif file_extension == ".docx":
280
+ content = extract_text_from_docx(file.name)
281
+ elif file_extension == ".pptx":
282
+ content = extract_text_from_pptx(file.name)
283
+ else:
284
+ encoding = detect_encoding(file.name)
285
+ with open(file.name, "r", encoding=encoding, errors="replace") as f:
286
+ content = f.read()
287
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
288
+ documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
289
+ vectorstore.add_documents(documents)
290
+
291
+ quiz = generate_quiz(content)
292
+ return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
293
+ except Exception as e:
294
+ return f"Error processing document: {str(e)}"
295
+
296
+ # Enhanced function to handle speech-to-text conversion with audio preprocessing
297
+ def preprocess_audio(audio_data, sample_rate):
298
+ """
299
+ Enhanced audio preprocessing for better STT accuracy
300
+ """
301
+ try:
302
+ # Convert to mono if stereo
303
+ if audio_data.ndim > 1:
304
+ audio_data = np.mean(audio_data, axis=1)
305
+
306
+ # Convert to float32
307
+ audio_data = audio_data.astype(np.float32)
308
+
309
+ # Normalize audio
310
+ max_val = np.max(np.abs(audio_data))
311
+ if max_val > 0:
312
+ audio_data = audio_data / max_val
313
+
314
+ # Resample to 16kHz if needed (Whisper works best with 16kHz)
315
+ if sample_rate != AUDIO_SAMPLE_RATE:
316
+ audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=AUDIO_SAMPLE_RATE)
317
+ sample_rate = AUDIO_SAMPLE_RATE
318
+
319
+ # Apply noise reduction (simple high-pass filter)
320
+ import scipy.signal as sp
321
+ nyquist = sample_rate / 2
322
+ cutoff = 80 # High-pass filter cutoff frequency in Hz
323
+ b, a = sp.butter(2, cutoff/nyquist, btype='high')
324
+ audio_data = sp.filtfilt(b, a, audio_data)
325
+
326
+ return audio_data, sample_rate
327
+
328
+ except Exception as e:
329
+ print(f"Audio preprocessing error: {e}")
330
+ # Return original audio if preprocessing fails
331
+ return audio_data, sample_rate
332
+
333
+ def transcribe_audio(audio):
334
+ """
335
+ Enhanced speech-to-text transcription with better error handling and preprocessing
336
+ """
337
+ try:
338
+ if audio is None:
339
+ return "No audio input detected."
340
+
341
+ sample_rate, audio_data = audio
342
+
343
+ # Preprocess audio
344
+ audio_data, sample_rate = preprocess_audio(audio_data, sample_rate)
345
+
346
+ # Ensure audio is not too short
347
+ if len(audio_data) / sample_rate < 0.5: # Less than 0.5 seconds
348
+ return "Audio too short. Please record at least 1 second of audio."
349
+
350
+ # Ensure audio is not too long (to prevent timeouts)
351
+ max_duration = 30 # seconds
352
+ if len(audio_data) / sample_rate > max_duration:
353
+ # Truncate audio
354
+ max_samples = max_duration * sample_rate
355
+ audio_data = audio_data[:max_samples]
356
+
357
+ # Use Whisper with better configuration
358
+ result = transcriber({
359
+ "sampling_rate": sample_rate,
360
+ "raw": audio_data
361
+ })
362
+
363
+ transcription = result["text"].strip()
364
+
365
+ if not transcription:
366
+ return "No speech detected. Please try again with clearer audio."
367
+
368
+ return transcription
369
+
370
+ except Exception as e:
371
+ error_msg = f"Transcription error: {str(e)}"
372
+ print(error_msg)
373
+ return f"Sorry, I couldn't process the audio. Please try again. Error: {str(e)}"
374
+
375
+ # FastAPI Application
376
+ app = FastAPI(title="Tutor AI API", description="Enhanced Speech-to-Text Tutor AI API")
377
+
378
+ # CORS middleware
379
+ app.add_middleware(
380
+ CORSMiddleware,
381
+ allow_origins=["*"],
382
+ allow_credentials=True,
383
+ allow_methods=["*"],
384
+ allow_headers=["*"],
385
+ )
386
+
387
+ # FastAPI Routes
388
+ @app.get("/")
389
+ async def root():
390
+ return {"message": "Tutor AI API is running", "version": "1.0"}
391
+
392
+ @app.post("/api/transcribe")
393
+ async def api_transcribe_audio(file: UploadFile = File(...)):
394
+ """
395
+ Enhanced API endpoint for speech-to-text transcription
396
+ """
397
+ try:
398
+ # Check if file is audio
399
+ if not file.content_type.startswith('audio/'):
400
+ raise HTTPException(status_code=400, detail="File must be an audio file")
401
+
402
+ # Read audio file
403
+ contents = await file.read()
404
+
405
+ # Convert to numpy array using soundfile
406
+ audio_io = io.BytesIO(contents)
407
+ audio_data, sample_rate = sf.read(audio_io)
408
+
409
+ # Transcribe
410
+ transcription = transcribe_audio((sample_rate, audio_data))
411
+
412
+ return JSONResponse({
413
+ "success": True,
414
+ "transcription": transcription,
415
+ "audio_duration": len(audio_data) / sample_rate if audio_data is not None else 0
416
+ })
417
+
418
+ except Exception as e:
419
+ return JSONResponse({
420
+ "success": False,
421
+ "error": str(e)
422
+ }, status_code=500)
423
+
424
+ @app.post("/api/chat")
425
+ async def api_chat(message: str = Form(...)):
426
+ """
427
+ API endpoint for chat interactions
428
+ """
429
+ try:
430
+ # Simple chat response without memory for API
431
+ prompt = f"You are a helpful AI tutor. Answer the following question accurately and concisely: {message}"
432
+ response = chat_model([HumanMessage(content=prompt)])
433
+ cleaned_response = clean_response(response.content)
434
+
435
+ return JSONResponse({
436
+ "success": True,
437
+ "response": cleaned_response
438
+ })
439
+
440
+ except Exception as e:
441
+ return JSONResponse({
442
+ "success": False,
443
+ "error": str(e)
444
+ }, status_code=500)
445
+
446
+ @app.post("/api/process-document")
447
+ async def api_process_document(file: UploadFile = File(...)):
448
+ """
449
+ API endpoint for document processing
450
+ """
451
+ try:
452
+ # Save uploaded file temporarily
453
+ file_extension = os.path.splitext(file.filename)[-1].lower()
454
+ temp_path = f"temp_{uuid.uuid4()}{file_extension}"
455
+
456
+ with open(temp_path, "wb") as f:
457
+ f.write(await file.read())
458
+
459
+ # Process document based on type
460
+ if file_extension == ".pdf":
461
+ content = extract_text_from_pdf(temp_path)
462
+ elif file_extension == ".docx":
463
+ content = extract_text_from_docx(temp_path)
464
+ elif file_extension == ".pptx":
465
+ content = extract_text_from_pptx(temp_path)
466
+ else:
467
+ # Try text file
468
+ encoding = detect_encoding(temp_path)
469
+ with open(temp_path, "r", encoding=encoding, errors="replace") as f:
470
+ content = f.read()
471
+
472
+ # Clean up temp file
473
+ os.remove(temp_path)
474
+
475
+ # Generate quiz
476
+ quiz = generate_quiz(content)
477
+
478
+ return JSONResponse({
479
+ "success": True,
480
+ "content_preview": content[:500] + "..." if len(content) > 500 else content,
481
+ "quiz": quiz
482
+ })
483
+
484
+ except Exception as e:
485
+ return JSONResponse({
486
+ "success": False,
487
+ "error": str(e)
488
+ }, status_code=500)
489
+
490
+ @app.get("/api/health")
491
+ async def health_check():
492
+ """Health check endpoint"""
493
+ return {"status": "healthy", "timestamp": time.time()}
494
+
495
+ # Clear chat history function
496
+ def clear_chat_history():
497
+ chat_memory.clear()
498
+ return [], None
499
+
500
+ def tutor_ai_chatbot():
501
+ """Main Gradio interface for the Tutor AI Chatbot."""
502
+ with gr.Blocks() as gradio_app:
503
+ gr.Markdown("# 📚 AI Tutor - We.(POC)")
504
+ gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
505
+
506
+ # Chatbot Tab
507
+ with gr.Tab("AI Chatbot"):
508
+ with gr.Row():
509
+ with gr.Column(scale=3):
510
+ chatbot = gr.Chatbot(height=500, type="messages")
511
+
512
+ with gr.Column(scale=1):
513
+ audio_playback = gr.Audio(label="Audio Response", type="filepath")
514
+
515
+ # Move the input controls here to span full width
516
+ with gr.Row():
517
+ msg = gr.Textbox(
518
+ label="Ask a question",
519
+ placeholder="Type your question here...",
520
+ container=False # Removes the default container styling
521
+ )
522
+ submit = gr.Button("Send")
523
+
524
+ with gr.Row():
525
+ with gr.Column(scale=1):
526
+ audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
527
+
528
+ # Voice recording tips - ONLY in AI Chatbot tab
529
+ with gr.Accordion("🎤 Voice Recording Tips", open=False):
530
+ gr.Markdown("""
531
+ **For better speech recognition accuracy:**
532
+ - 🎙️ Speak clearly and at a moderate pace
533
+ - 🔇 Record in a quiet environment
534
+ - 📍 Keep the microphone close to your mouth (10-15 cm)
535
+ - 🎧 Use a good quality microphone if possible
536
+ - 📝 Review the transcribed text before sending
537
+ - 🔄 If transcription is poor, try recording again or type manually
538
+ """)
539
+
540
+ # Clear chat history button
541
+ clear_btn = gr.Button("Clear Chat")
542
+
543
+ # Handle chat interaction
544
+ submit.click(
545
+ chat_with_groq,
546
+ inputs=[msg, chatbot],
547
+ outputs=[chatbot, msg, audio_playback]
548
+ )
549
+
550
+ # Clear chat history function
551
+ clear_btn.click(
552
+ lambda: [], # Return empty list in message format
553
+ inputs=None,
554
+ outputs=[chatbot]
555
+ )
556
+
557
+ # Also allow Enter key to submit
558
+ msg.submit(
559
+ chat_with_groq,
560
+ inputs=[msg, chatbot],
561
+ outputs=[chatbot, msg, audio_playback]
562
+ )
563
+
564
+ # Add some examples of questions students might ask
565
+ with gr.Accordion("Example Questions", open=False):
566
+ gr.Examples(
567
+ examples=[
568
+ "Can you explain the concept of RLHF AI?",
569
+ "What are AI transformers?",
570
+ "What is MoE AI?",
571
+ "What's gate networks AI?",
572
+ "I am making a switch, please generating baking recipe?"
573
+ ],
574
+ inputs=msg
575
+ )
576
+
577
+ # Connect audio input to transcription
578
+ audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg)
579
+
580
+ # Upload Notes & Generate Quiz Tab
581
+ with gr.Tab("Upload Notes & Generate Quiz"):
582
+ with gr.Row():
583
+ with gr.Column(scale=2):
584
+ file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
585
+ with gr.Column(scale=3):
586
+ quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
587
+
588
+ # Connect file input to document processing
589
+ file_input.change(process_document, inputs=file_input, outputs=quiz_output)
590
+
591
+ # Introduction Video Tab - Now with the working video
592
+ with gr.Tab("Introduction Video"):
593
+ with gr.Row():
594
+ with gr.Column(scale=1):
595
+ gr.Markdown("### Welcome to the Introduction Video")
596
+ gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | 徐梦圆 - China-O 音乐人徐梦圆YUAN!")
597
+ # Use the local video file that's stored in your Space
598
+ gr.Video("We_not_me_video.mp4", label="Introduction Video")
599
+
600
+ # Launch the application
601
+ gradio_app.launch(share=False)
602
+
603
+ # Run both FastAPI and Gradio
604
+ if __name__ == "__main__":
605
+ import threading
606
+
607
+ # Start Gradio in a separate thread
608
+ gradio_thread = threading.Thread(target=tutor_ai_chatbot, daemon=True)
609
+ gradio_thread.start()
610
+
611
+ # Start FastAPI
612
+ uvicorn.run(app, host="0.0.0.0", port=8000)