Chia Woon Yap commited on
Commit
3a97a58
Β·
verified Β·
1 Parent(s): 42c8ccb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +419 -477
app.py CHANGED
@@ -16,597 +16,539 @@ import time
16
  import groq
17
  import uuid # For generating unique filenames
18
 
19
- # Updated imports to address LangChain deprecation warnings:
20
- from langchain_groq import ChatGroq
21
- from langchain.schema import HumanMessage
22
- from langchain.text_splitter import RecursiveCharacterTextSplitter
23
- from langchain_community.vectorstores import Chroma
24
- from langchain_community.embeddings import HuggingFaceEmbeddings
25
- from langchain.docstore.document import Document
26
-
27
- # Importing chardet (make sure to add chardet to your requirements.txt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  import chardet
29
-
30
  import fitz # PyMuPDF for PDFs
31
  import docx # python-docx for Word files
32
  import gtts # Google Text-to-Speech library
33
  from pptx import Presentation # python-pptx for PowerPoint files
34
  import re
35
 
36
- # FastAPI imports
37
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
38
- from fastapi.responses import JSONResponse, FileResponse
39
- from fastapi.middleware.cors import CORSMiddleware
40
- import uvicorn
41
- from typing import Optional
42
- import io
43
- import soundfile as sf
44
- import librosa
45
 
46
- # Enhanced Whisper model for speech-to-text with better configuration
47
  try:
48
  transcriber = pipeline(
49
- "automatic-speech-recognition",
50
- model="openai/whisper-small.en", # Upgraded from base to small for better accuracy
51
- device=-1, # Use CPU (-1) or GPU (0)
52
- chunk_length_s=30,
53
- stride_length_s=5,
54
- batch_size=8
55
  )
 
56
  except Exception as e:
57
- print(f"Warning: Could not load enhanced Whisper model: {e}")
58
- # Fallback to basic model
59
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
60
-
61
- # Set API Key (Ensure it's stored securely in an environment variable)
62
- groq.api_key = os.getenv("GROQ_API_KEY")
63
 
64
- # Initialize Chat Model
65
- chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # Initialize Embeddings and chromaDB
68
- os.makedirs("chroma_db", exist_ok=True)
69
- embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
- vectorstore = Chroma(
71
- embedding_function=embedding_model,
72
- persist_directory="chroma_db"
73
- )
 
 
 
 
 
 
 
 
74
 
75
- # Short-term memory for the LLM
76
  chat_memory = []
77
 
78
- # Audio processing parameters
79
- AUDIO_SAMPLE_RATE = 16000 # Whisper works best with 16kHz
80
-
81
- # Prompt for quiz generation with added remark
82
  quiz_prompt = """
83
- You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
84
-
85
- Generate 20 Questions.
86
-
87
- Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags.
88
-
89
- For each question:
90
- - Provide 4 answer choices (for MCQs), with only one correct answer.
91
- - Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
92
- - Include an answer key for all questions.
93
- - Ensure questions vary in difficulty and encourage comprehension rather than memorization.
94
- - Additionally, implement an instant feedback mechanism:
95
- - When a user selects an answer, indicate whether it is correct or incorrect.
96
- - If incorrect, provide a brief explanation from the document to guide learning.
97
- - Ensure responses are concise and educational to enhance understanding.
98
-
99
- Output Example:
100
- 1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
101
-
102
- Answer: Agent Core
103
-
104
- Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
105
-
106
- 2. What is the main limitation of LLM-based applications?
107
- a) Limited token capacity
108
- b) Lack of domain expertise
109
- c) Prone to hallucination
110
- d) All of the above
111
-
112
- Answer: d) All of the above
113
-
114
- Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
115
-
116
- 3. Given the following info, what is the value of P(jam|Rain)?
117
- P(no Rain) = 0.8;
118
- P(no Jam) = 0.2;
119
- P(Rain|Jam) = 0.1
120
-
121
- a) 0.016
122
- b) 0.025
123
- c) 0.1
124
- d) 0.4
125
-
126
- Answer: d) 0.4
127
-
128
- Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
129
  """
130
 
131
- # Function to clean AI response by removing unwanted formatting
132
  def clean_response(response):
133
- """Removes <think> tags, asterisks, and markdown formatting."""
134
- cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
135
- cleaned_text = re.sub(r"(\*\*|\*|\[|\])", "", cleaned_text)
136
- cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE)
137
- cleaned_text = re.sub(r"\\", "", cleaned_text)
138
- cleaned_text = re.sub(r"---", "", cleaned_text)
139
- return cleaned_text.strip()
140
 
141
- # Function to generate quiz based on content
142
  def generate_quiz(content):
143
- prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
144
- response = chat_model([HumanMessage(content=prompt)])
145
- cleaned_response = clean_response(response.content)
146
- return cleaned_response
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Function to retrieve relevant documents from vectorstore based on user query
149
  def retrieve_documents(query):
150
- results = vectorstore.similarity_search(query, k=3)
151
- return [doc.page_content for doc in results]
152
-
153
- # Function to convert tuple format to message format
154
- def convert_to_message_format(chat_history):
155
- """Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
156
- message_format = []
157
- for user_msg, bot_msg in chat_history:
158
- message_format.append({"role": "user", "content": user_msg})
159
- message_format.append({"role": "assistant", "content": bot_msg})
160
- return message_format
161
-
162
- # Function to convert message format to tuple format for processing
163
- def convert_to_tuple_format(chat_history):
164
- """Convert from message format back to tuple format for processing"""
165
- tuple_format = []
166
- for i in range(0, len(chat_history), 2):
167
- if i+1 < len(chat_history):
168
- user_msg = chat_history[i]["content"]
169
- bot_msg = chat_history[i+1]["content"]
170
- tuple_format.append((user_msg, bot_msg))
171
- return tuple_format
172
 
173
- # Function to handle chatbot interactions with short-term memory
174
  def chat_with_groq(user_input, chat_history):
 
175
  try:
176
- # Convert message format to tuple format for processing
177
- tuple_history = convert_to_tuple_format(chat_history)
178
-
179
- # Retrieve relevant documents for additional context
180
- relevant_docs = retrieve_documents(user_input)
181
- context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
182
-
183
- # Construct proper prompting with conversation history
184
- system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
185
- conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
186
- prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
187
 
188
- # Call the chat model
 
 
 
 
 
 
 
 
189
  response = chat_model([HumanMessage(content=prompt)])
190
-
191
- # Clean response to remove any unwanted formatting
192
- cleaned_response_text = clean_response(response.content)
193
-
194
- # Append conversation history
195
- chat_memory.append(f"User: {user_input}")
196
- chat_memory.append(f"AI: {cleaned_response_text}")
197
-
198
- # Update chat history - add new messages in the correct format
199
  chat_history.append({"role": "user", "content": user_input})
200
- chat_history.append({"role": "assistant", "content": cleaned_response_text})
201
-
202
- # Convert response to speech
203
- audio_file = speech_playback(cleaned_response_text)
204
-
205
  return chat_history, "", audio_file
 
206
  except Exception as e:
207
- error_msg = f"Error: {str(e)}"
208
  chat_history.append({"role": "user", "content": user_input})
209
  chat_history.append({"role": "assistant", "content": error_msg})
210
  return chat_history, "", None
211
 
212
- # Function to play response as speech using gTTS
213
  def speech_playback(text):
 
214
  try:
215
- # Generate a unique filename for each audio file
216
- unique_id = str(uuid.uuid4())
217
- audio_file = f"output_audio_{unique_id}.mp3"
218
-
219
- # Convert text to speech
220
- tts = gtts.gTTS(text, lang='en')
 
 
 
 
 
221
  tts.save(audio_file)
222
-
223
- # Return the path to the audio file
224
  return audio_file
225
  except Exception as e:
226
- print(f"Error in speech_playback: {e}")
227
  return None
228
 
229
- # Function to detect encoding safely
230
  def detect_encoding(file_path):
 
231
  try:
232
  with open(file_path, "rb") as f:
233
  raw_data = f.read(4096)
234
  detected = chardet.detect(raw_data)
235
- encoding = detected["encoding"]
236
- return encoding if encoding else "utf-8"
237
  except Exception:
238
  return "utf-8"
239
 
240
- # Function to extract text from PDF
241
  def extract_text_from_pdf(pdf_path):
 
242
  try:
243
  doc = fitz.open(pdf_path)
244
- text = "\n".join([page.get_text("text") for page in doc])
245
- return text if text.strip() else "No extractable text found."
 
 
246
  except Exception as e:
247
- return f"Error extracting text from PDF: {str(e)}"
248
 
249
- # Function to extract text from Word files (.docx)
250
  def extract_text_from_docx(docx_path):
 
251
  try:
252
  doc = docx.Document(docx_path)
253
- text = "\n".join([para.text for para in doc.paragraphs])
254
- return text if text.strip() else "No extractable text found."
255
  except Exception as e:
256
- return f"Error extracting text from Word document: {str(e)}"
257
 
258
- # Function to extract text from PowerPoint files (.pptx)
259
  def extract_text_from_pptx(pptx_path):
 
260
  try:
261
- presentation = Presentation(pptx_path)
262
  text = ""
263
- for slide in presentation.slides:
264
  for shape in slide.shapes:
265
- if hasattr(shape, "text"):
266
  text += shape.text + "\n"
267
- return text if text.strip() else "No extractable text found."
268
  except Exception as e:
269
- return f"Error extracting text from PowerPoint: {str(e)}"
270
 
271
- # Function to process documents safely
272
  def process_document(file):
 
273
  try:
274
- file_extension = os.path.splitext(file.name)[-1].lower()
275
- if file_extension in [".png", ".jpg", ".jpeg"]:
276
- return "Error: Images cannot be processed for text extraction."
277
- if file_extension == ".pdf":
278
- content = extract_text_from_pdf(file.name)
279
- elif file_extension == ".docx":
280
- content = extract_text_from_docx(file.name)
281
- elif file_extension == ".pptx":
282
- content = extract_text_from_pptx(file.name)
283
- else:
284
- encoding = detect_encoding(file.name)
285
- with open(file.name, "r", encoding=encoding, errors="replace") as f:
 
 
 
 
 
 
286
  content = f.read()
287
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
288
- documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
289
- vectorstore.add_documents(documents)
290
-
291
- quiz = generate_quiz(content)
292
- return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
293
- except Exception as e:
294
- return f"Error processing document: {str(e)}"
295
-
296
- # Enhanced function to handle speech-to-text conversion with audio preprocessing
297
- def preprocess_audio(audio_data, sample_rate):
298
- """
299
- Enhanced audio preprocessing for better STT accuracy
300
- """
301
- try:
302
- # Convert to mono if stereo
303
- if audio_data.ndim > 1:
304
- audio_data = np.mean(audio_data, axis=1)
305
 
306
- # Convert to float32
307
- audio_data = audio_data.astype(np.float32)
308
 
309
- # Normalize audio
310
- max_val = np.max(np.abs(audio_data))
311
- if max_val > 0:
312
- audio_data = audio_data / max_val
 
 
 
 
 
 
 
 
313
 
314
- # Resample to 16kHz if needed (Whisper works best with 16kHz)
315
- if sample_rate != AUDIO_SAMPLE_RATE:
316
- audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=AUDIO_SAMPLE_RATE)
317
- sample_rate = AUDIO_SAMPLE_RATE
318
 
319
- # Apply noise reduction (simple high-pass filter)
320
- import scipy.signal as sp
321
- nyquist = sample_rate / 2
322
- cutoff = 80 # High-pass filter cutoff frequency in Hz
323
- b, a = sp.butter(2, cutoff/nyquist, btype='high')
324
- audio_data = sp.filtfilt(b, a, audio_data)
 
 
 
 
 
325
 
326
- return audio_data, sample_rate
327
-
328
  except Exception as e:
329
- print(f"Audio preprocessing error: {e}")
330
- # Return original audio if preprocessing fails
331
- return audio_data, sample_rate
332
 
333
  def transcribe_audio(audio):
334
- """
335
- Enhanced speech-to-text transcription with better error handling and preprocessing
336
- """
337
  try:
338
  if audio is None:
339
- return "No audio input detected."
 
 
 
340
 
341
  sample_rate, audio_data = audio
342
 
343
- # Preprocess audio
344
- audio_data, sample_rate = preprocess_audio(audio_data, sample_rate)
345
-
346
- # Ensure audio is not too short
347
- if len(audio_data) / sample_rate < 0.5: # Less than 0.5 seconds
348
- return "Audio too short. Please record at least 1 second of audio."
349
-
350
- # Ensure audio is not too long (to prevent timeouts)
351
- max_duration = 30 # seconds
352
- if len(audio_data) / sample_rate > max_duration:
353
- # Truncate audio
354
- max_samples = max_duration * sample_rate
355
- audio_data = audio_data[:max_samples]
356
-
357
- # Use Whisper with better configuration
358
- result = transcriber({
359
- "sampling_rate": sample_rate,
360
- "raw": audio_data
361
- })
362
-
363
- transcription = result["text"].strip()
364
-
365
- if not transcription:
366
- return "No speech detected. Please try again with clearer audio."
367
-
368
- return transcription
369
 
370
- except Exception as e:
371
- error_msg = f"Transcription error: {str(e)}"
372
- print(error_msg)
373
- return f"Sorry, I couldn't process the audio. Please try again. Error: {str(e)}"
374
-
375
- # FastAPI Application
376
- app = FastAPI(title="Tutor AI API", description="Enhanced Speech-to-Text Tutor AI API")
377
-
378
- # CORS middleware
379
- app.add_middleware(
380
- CORSMiddleware,
381
- allow_origins=["*"],
382
- allow_credentials=True,
383
- allow_methods=["*"],
384
- allow_headers=["*"],
385
- )
386
-
387
- # FastAPI Routes
388
- @app.get("/")
389
- async def root():
390
- return {"message": "Tutor AI API is running", "version": "1.0"}
391
-
392
- @app.post("/api/transcribe")
393
- async def api_transcribe_audio(file: UploadFile = File(...)):
394
- """
395
- Enhanced API endpoint for speech-to-text transcription
396
- """
397
- try:
398
- # Check if file is audio
399
- if not file.content_type.startswith('audio/'):
400
- raise HTTPException(status_code=400, detail="File must be an audio file")
401
 
402
- # Read audio file
403
- contents = await file.read()
 
 
404
 
405
- # Convert to numpy array using soundfile
406
- audio_io = io.BytesIO(contents)
407
- audio_data, sample_rate = sf.read(audio_io)
 
 
 
408
 
409
  # Transcribe
410
- transcription = transcribe_audio((sample_rate, audio_data))
411
-
412
- return JSONResponse({
413
- "success": True,
414
- "transcription": transcription,
415
- "audio_duration": len(audio_data) / sample_rate if audio_data is not None else 0
416
- })
417
-
418
- except Exception as e:
419
- return JSONResponse({
420
- "success": False,
421
- "error": str(e)
422
- }, status_code=500)
423
-
424
- @app.post("/api/chat")
425
- async def api_chat(message: str = Form(...)):
426
- """
427
- API endpoint for chat interactions
428
- """
429
- try:
430
- # Simple chat response without memory for API
431
- prompt = f"You are a helpful AI tutor. Answer the following question accurately and concisely: {message}"
432
- response = chat_model([HumanMessage(content=prompt)])
433
- cleaned_response = clean_response(response.content)
434
-
435
- return JSONResponse({
436
- "success": True,
437
- "response": cleaned_response
438
- })
439
-
440
- except Exception as e:
441
- return JSONResponse({
442
- "success": False,
443
- "error": str(e)
444
- }, status_code=500)
445
-
446
- @app.post("/api/process-document")
447
- async def api_process_document(file: UploadFile = File(...)):
448
- """
449
- API endpoint for document processing
450
- """
451
- try:
452
- # Save uploaded file temporarily
453
- file_extension = os.path.splitext(file.filename)[-1].lower()
454
- temp_path = f"temp_{uuid.uuid4()}{file_extension}"
455
-
456
- with open(temp_path, "wb") as f:
457
- f.write(await file.read())
458
-
459
- # Process document based on type
460
- if file_extension == ".pdf":
461
- content = extract_text_from_pdf(temp_path)
462
- elif file_extension == ".docx":
463
- content = extract_text_from_docx(temp_path)
464
- elif file_extension == ".pptx":
465
- content = extract_text_from_pptx(temp_path)
466
- else:
467
- # Try text file
468
- encoding = detect_encoding(temp_path)
469
- with open(temp_path, "r", encoding=encoding, errors="replace") as f:
470
- content = f.read()
471
-
472
- # Clean up temp file
473
- os.remove(temp_path)
474
 
475
- # Generate quiz
476
- quiz = generate_quiz(content)
477
 
478
- return JSONResponse({
479
- "success": True,
480
- "content_preview": content[:500] + "..." if len(content) > 500 else content,
481
- "quiz": quiz
482
- })
483
 
484
  except Exception as e:
485
- return JSONResponse({
486
- "success": False,
487
- "error": str(e)
488
- }, status_code=500)
489
 
490
- @app.get("/api/health")
491
- async def health_check():
492
- """Health check endpoint"""
493
- return {"status": "healthy", "timestamp": time.time()}
494
-
495
- # Clear chat history function
496
- def clear_chat_history():
497
  chat_memory.clear()
498
  return [], None
499
 
500
- def tutor_ai_chatbot():
501
- """Main Gradio interface for the Tutor AI Chatbot."""
502
- with gr.Blocks() as gradio_app:
503
- gr.Markdown("# πŸ“š AI Tutor - We.(POC)")
504
- gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
505
-
506
- # Chatbot Tab
507
- with gr.Tab("AI Chatbot"):
 
 
 
 
 
 
 
 
 
 
 
 
508
  with gr.Row():
509
  with gr.Column(scale=3):
510
- chatbot = gr.Chatbot(height=500, type="messages")
511
-
 
 
 
 
 
 
512
  with gr.Column(scale=1):
513
- audio_playback = gr.Audio(label="Audio Response", type="filepath")
 
 
 
 
 
514
 
515
- # Move the input controls here to span full width
516
  with gr.Row():
517
  msg = gr.Textbox(
518
- label="Ask a question",
519
- placeholder="Type your question here...",
520
- container=False # Removes the default container styling
 
 
521
  )
522
- submit = gr.Button("Send")
523
 
524
  with gr.Row():
525
  with gr.Column(scale=1):
526
- audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
 
 
 
 
 
527
 
528
- # Voice recording tips - ONLY in AI Chatbot tab
529
- with gr.Accordion("🎀 Voice Recording Tips", open=False):
530
  gr.Markdown("""
531
- **For better speech recognition accuracy:**
532
- - πŸŽ™οΈ Speak clearly and at a moderate pace
533
- - πŸ”‡ Record in a quiet environment
534
- - πŸ“ Keep the microphone close to your mouth (10-15 cm)
535
- - 🎧 Use a good quality microphone if possible
536
- - πŸ“ Review the transcribed text before sending
537
- - πŸ”„ If transcription is poor, try recording again or type manually
 
 
 
 
 
 
 
538
  """)
539
 
540
- # Clear chat history button
541
- clear_btn = gr.Button("Clear Chat")
542
-
543
- # Handle chat interaction
544
- submit.click(
545
- chat_with_groq,
546
- inputs=[msg, chatbot],
547
- outputs=[chatbot, msg, audio_playback]
548
- )
549
-
550
- # Clear chat history function
551
- clear_btn.click(
552
- lambda: [], # Return empty list in message format
553
- inputs=None,
554
- outputs=[chatbot]
555
- )
556
-
557
- # Also allow Enter key to submit
558
- msg.submit(
559
- chat_with_groq,
560
- inputs=[msg, chatbot],
561
- outputs=[chatbot, msg, audio_playback]
562
- )
563
-
564
- # Add some examples of questions students might ask
565
- with gr.Accordion("Example Questions", open=False):
566
- gr.Examples(
567
- examples=[
568
- "Can you explain the concept of RLHF AI?",
569
- "What are AI transformers?",
570
- "What is MoE AI?",
571
- "What's gate networks AI?",
572
- "I am making a switch, please generating baking recipe?"
573
- ],
574
- inputs=msg
575
- )
576
-
577
- # Connect audio input to transcription
578
- audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg)
579
-
580
- # Upload Notes & Generate Quiz Tab
581
- with gr.Tab("Upload Notes & Generate Quiz"):
582
  with gr.Row():
583
- with gr.Column(scale=2):
584
- file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
585
- with gr.Column(scale=3):
586
- quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
587
-
588
- # Connect file input to document processing
589
- file_input.change(process_document, inputs=file_input, outputs=quiz_output)
590
-
591
- # Introduction Video Tab - Now with the working video
592
- with gr.Tab("Introduction Video"):
 
 
593
  with gr.Row():
594
  with gr.Column(scale=1):
595
- gr.Markdown("### Welcome to the Introduction Video")
596
- gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | εΎζ’¦εœ† - China-O ιŸ³δΉδΊΊεΎζ’¦εœ†YUAN!")
597
- # Use the local video file that's stored in your Space
598
- gr.Video("We_not_me_video.mp4", label="Introduction Video")
599
-
600
- # Launch the application
601
- gradio_app.launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
 
603
- # Run both FastAPI and Gradio
604
  if __name__ == "__main__":
605
- import threading
606
-
607
- # Start Gradio in a separate thread
608
- gradio_thread = threading.Thread(target=tutor_ai_chatbot, daemon=True)
609
- gradio_thread.start()
610
-
611
- # Start FastAPI
612
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
16
  import groq
17
  import uuid # For generating unique filenames
18
 
19
+ # LangChain imports with compatibility handling
20
+ try:
21
+ from langchain_groq import ChatGroq
22
+ from langchain_core.messages import HumanMessage
23
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
24
+ from langchain_community.vectorstores import Chroma
25
+ from langchain_community.embeddings import HuggingFaceEmbeddings
26
+ from langchain_core.documents import Document
27
+ except ImportError:
28
+ # Fallback for older versions
29
+ try:
30
+ from langchain_groq import ChatGroq
31
+ from langchain.schema import HumanMessage
32
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
33
+ from langchain_community.vectorstores import Chroma
34
+ from langchain_community.embeddings import HuggingFaceEmbeddings
35
+ from langchain.docstore.document import Document
36
+ except ImportError as e:
37
+ print(f"Import warning: {e}")
38
+ # Define fallback classes
39
+ class HumanMessage:
40
+ def __init__(self, content):
41
+ self.content = content
42
+ class Document:
43
+ def __init__(self, page_content):
44
+ self.page_content = page_content
45
+
46
+ # Basic imports
47
  import chardet
 
48
  import fitz # PyMuPDF for PDFs
49
  import docx # python-docx for Word files
50
  import gtts # Google Text-to-Speech library
51
  from pptx import Presentation # python-pptx for PowerPoint files
52
  import re
53
 
54
+ print("πŸš€ Initializing AI Tutor Application...")
 
 
 
 
 
 
 
 
55
 
56
+ # Initialize Whisper for speech-to-text
57
  try:
58
  transcriber = pipeline(
59
+ "automatic-speech-recognition",
60
+ model="openai/whisper-base.en"
 
 
 
 
61
  )
62
+ print("βœ… Whisper model loaded successfully")
63
  except Exception as e:
64
+ print(f"❌ Error loading Whisper: {e}")
65
+ transcriber = None
 
 
 
 
66
 
67
+ # Initialize Groq
68
+ groq_api_key = os.getenv("GROQ_API_KEY")
69
+ if groq_api_key:
70
+ try:
71
+ chat_model = ChatGroq(
72
+ model_name="llama-3.3-70b-versatile",
73
+ api_key=groq_api_key,
74
+ temperature=0.7
75
+ )
76
+ CHAT_MODEL_AVAILABLE = True
77
+ print("βœ… Groq chat model initialized")
78
+ except Exception as e:
79
+ print(f"❌ Error initializing Groq: {e}")
80
+ CHAT_MODEL_AVAILABLE = False
81
+ else:
82
+ print("⚠️ GROQ_API_KEY not found in environment variables")
83
+ CHAT_MODEL_AVAILABLE = False
84
 
85
+ # Initialize Vector Store
86
+ try:
87
+ os.makedirs("chroma_db", exist_ok=True)
88
+ embedding_model = HuggingFaceEmbeddings(
89
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
90
+ )
91
+ vectorstore = Chroma(
92
+ embedding_function=embedding_model,
93
+ persist_directory="chroma_db"
94
+ )
95
+ VECTORSTORE_AVAILABLE = True
96
+ print("βœ… Vector store initialized")
97
+ except Exception as e:
98
+ print(f"❌ Error initializing vector store: {e}")
99
+ VECTORSTORE_AVAILABLE = False
100
 
101
+ # Application state
102
  chat_memory = []
103
 
104
+ # Quiz generation prompt
 
 
 
105
  quiz_prompt = """
106
+ You are an AI assistant specialized in education. Given document content, generate a quiz with 10 questions mixing multiple-choice and fill-in-the-blank.
107
+
108
+ Requirements:
109
+ - 10 total questions
110
+ - Mix of MCQs and fill-in-the-blank
111
+ - Based on key concepts from the document
112
+ - Include answer key
113
+ - Remove all markdown formatting
114
+
115
+ Output format:
116
+ 1. [Question text]
117
+ Options (if MCQ): a) b) c) d)
118
+ Answer: [Correct answer]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  """
120
 
 
121
  def clean_response(response):
122
+ """Clean AI response from unwanted formatting."""
123
+ if not response:
124
+ return ""
125
+
126
+ cleaned = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
127
+ cleaned = re.sub(r"(\*\*|\*|\[|\]|#+|\\)", "", cleaned)
128
+ return cleaned.strip()
129
 
 
130
  def generate_quiz(content):
131
+ """Generate quiz from document content."""
132
+ if not CHAT_MODEL_AVAILABLE:
133
+ return "❌ Chat model not available. Please check GROQ_API_KEY configuration."
134
+
135
+ # Limit content length to avoid token limits
136
+ if len(content) > 8000:
137
+ content = content[:8000] + "... [content truncated for efficiency]"
138
+
139
+ try:
140
+ prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
141
+ response = chat_model([HumanMessage(content=prompt)])
142
+ return clean_response(response.content)
143
+ except Exception as e:
144
+ return f"❌ Error generating quiz: {str(e)}"
145
 
 
146
  def retrieve_documents(query):
147
+ """Retrieve relevant documents for context."""
148
+ if not VECTORSTORE_AVAILABLE or not query.strip():
149
+ return []
150
+
151
+ try:
152
+ results = vectorstore.similarity_search(query, k=2)
153
+ return [doc.page_content for doc in results]
154
+ except Exception as e:
155
+ print(f"Document retrieval error: {e}")
156
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
157
 
 
158
  def chat_with_groq(user_input, chat_history):
159
+ """Handle chat interactions with the AI."""
160
  try:
161
+ if not user_input.strip():
162
+ return chat_history, "", None
163
+
164
+ if not CHAT_MODEL_AVAILABLE:
165
+ error_msg = "πŸ€– Chat service is currently unavailable. Please check your API configuration."
166
+ chat_history.append({"role": "user", "content": user_input})
167
+ chat_history.append({"role": "assistant", "content": error_msg})
168
+ return chat_history, "", None
 
 
 
169
 
170
+ # Get relevant context from documents
171
+ relevant_docs = retrieve_documents(user_input)
172
+ context = "\n".join(relevant_docs) if relevant_docs else "No specific context available."
173
+
174
+ # Build enhanced prompt
175
+ system_msg = "You are a helpful AI tutor. Provide accurate, educational, and concise responses. If you don't know something, admit it honestly."
176
+ prompt = f"{system_msg}\n\nRelevant Context:\n{context}\n\nUser Question: {user_input}\n\nAssistant Response:"
177
+
178
+ # Get AI response
179
  response = chat_model([HumanMessage(content=prompt)])
180
+ cleaned_response = clean_response(response.content)
181
+
182
+ # Update chat history
 
 
 
 
 
 
183
  chat_history.append({"role": "user", "content": user_input})
184
+ chat_history.append({"role": "assistant", "content": cleaned_response})
185
+
186
+ # Generate speech output
187
+ audio_file = speech_playback(cleaned_response)
188
+
189
  return chat_history, "", audio_file
190
+
191
  except Exception as e:
192
+ error_msg = f"❌ Error processing your request: {str(e)}"
193
  chat_history.append({"role": "user", "content": user_input})
194
  chat_history.append({"role": "assistant", "content": error_msg})
195
  return chat_history, "", None
196
 
 
197
  def speech_playback(text):
198
+ """Convert text to speech using gTTS."""
199
  try:
200
+ if not text or len(text.strip()) < 10:
201
+ return None
202
+
203
+ # Limit text length for audio generation
204
+ if len(text) > 400:
205
+ text = text[:400] + "..."
206
+
207
+ unique_id = str(uuid.uuid4())[:8]
208
+ audio_file = f"audio_{unique_id}.mp3"
209
+
210
+ tts = gtts.gTTS(text=text, lang='en', slow=False)
211
  tts.save(audio_file)
212
+
 
213
  return audio_file
214
  except Exception as e:
215
+ print(f"πŸ”‡ TTS Error: {e}")
216
  return None
217
 
 
218
  def detect_encoding(file_path):
219
+ """Detect file encoding."""
220
  try:
221
  with open(file_path, "rb") as f:
222
  raw_data = f.read(4096)
223
  detected = chardet.detect(raw_data)
224
+ return detected.get("encoding", "utf-8")
 
225
  except Exception:
226
  return "utf-8"
227
 
 
228
  def extract_text_from_pdf(pdf_path):
229
+ """Extract text from PDF files."""
230
  try:
231
  doc = fitz.open(pdf_path)
232
+ text = ""
233
+ for page in doc:
234
+ text += page.get_text()
235
+ return text.strip() if text.strip() else "No extractable text found in PDF."
236
  except Exception as e:
237
+ return f"PDF extraction error: {str(e)}"
238
 
 
239
  def extract_text_from_docx(docx_path):
240
+ """Extract text from Word documents."""
241
  try:
242
  doc = docx.Document(docx_path)
243
+ text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
244
+ return text.strip() if text.strip() else "No text found in Word document."
245
  except Exception as e:
246
+ return f"Word extraction error: {str(e)}"
247
 
 
248
  def extract_text_from_pptx(pptx_path):
249
+ """Extract text from PowerPoint files."""
250
  try:
251
+ prs = Presentation(pptx_path)
252
  text = ""
253
+ for slide in prs.slides:
254
  for shape in slide.shapes:
255
+ if hasattr(shape, "text") and shape.text:
256
  text += shape.text + "\n"
257
+ return text.strip() if text.strip() else "No text found in PowerPoint."
258
  except Exception as e:
259
+ return f"PowerPoint extraction error: {str(e)}"
260
 
 
261
  def process_document(file):
262
+ """Process uploaded document and generate quiz."""
263
  try:
264
+ if not file:
265
+ return "πŸ“ Please upload a document file first."
266
+
267
+ filename = file.name
268
+ file_ext = os.path.splitext(filename)[-1].lower()
269
+
270
+ print(f"Processing {file_ext} file: {filename}")
271
+
272
+ # Extract text based on file type
273
+ if file_ext == ".pdf":
274
+ content = extract_text_from_pdf(filename)
275
+ elif file_ext == ".docx":
276
+ content = extract_text_from_docx(filename)
277
+ elif file_ext == ".pptx":
278
+ content = extract_text_from_pptx(filename)
279
+ elif file_ext in [".txt", ".md"]:
280
+ encoding = detect_encoding(filename)
281
+ with open(filename, "r", encoding=encoding, errors="ignore") as f:
282
  content = f.read()
283
+ else:
284
+ return f"❌ Unsupported file type: {file_ext}. Please upload PDF, Word, PowerPoint, or text files."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
+ if not content or "error" in content.lower() or "no text" in content.lower():
287
+ return f"❌ Could not extract meaningful content from this file. Error: {content}"
288
 
289
+ # Store in vector database for future queries
290
+ if VECTORSTORE_AVAILABLE and len(content) > 100:
291
+ try:
292
+ text_splitter = RecursiveCharacterTextSplitter(
293
+ chunk_size=500,
294
+ chunk_overlap=50
295
+ )
296
+ texts = text_splitter.split_text(content)
297
+ documents = [Document(page_content=text) for text in texts]
298
+ vectorstore.add_documents(documents)
299
+ except Exception as e:
300
+ print(f"Vector store addition warning: {e}")
301
 
302
+ # Generate quiz from content
303
+ quiz = generate_quiz(content)
 
 
304
 
305
+ success_msg = f"""
306
+ βœ… **Document Processed Successfully!**
307
+
308
+ πŸ“„ **File Type**: {file_ext.upper()}
309
+ πŸ“ **Content Preview**: {content[:200]}...
310
+
311
+ πŸ“‹ **Generated Quiz**:
312
+ {quiz}
313
+ """
314
+
315
+ return success_msg
316
 
 
 
317
  except Exception as e:
318
+ return f"❌ Error processing document: {str(e)}"
 
 
319
 
320
  def transcribe_audio(audio):
321
+ """Transcribe audio to text using Whisper."""
 
 
322
  try:
323
  if audio is None:
324
+ return "🎀 No audio detected. Please record or upload audio."
325
+
326
+ if transcriber is None:
327
+ return "πŸ”‡ Speech-to-text service is currently unavailable."
328
 
329
  sample_rate, audio_data = audio
330
 
331
+ # Basic audio preprocessing
332
+ if audio_data.ndim > 1:
333
+ audio_data = np.mean(audio_data, axis=1) # Convert to mono
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ audio_data = audio_data.astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ # Normalize audio
338
+ max_val = np.max(np.abs(audio_data))
339
+ if max_val > 0:
340
+ audio_data = audio_data / max_val
341
 
342
+ # Check audio length
343
+ audio_duration = len(audio_data) / sample_rate
344
+ if audio_duration < 0.5:
345
+ return "⏱️ Audio too short. Please record at least 1 second."
346
+ if audio_duration > 30:
347
+ return "⏱️ Audio too long. Please keep under 30 seconds."
348
 
349
  # Transcribe
350
+ result = transcriber({"sampling_rate": sample_rate, "raw": audio_data})
351
+ text = result.get("text", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
+ if not text:
354
+ return "πŸ”‡ No speech detected. Please try again with clearer audio."
355
 
356
+ return f"🎀 Transcribed: {text}"
 
 
 
 
357
 
358
  except Exception as e:
359
+ return f"❌ Transcription error: {str(e)}"
 
 
 
360
 
361
+ def clear_chat():
362
+ """Clear chat history."""
 
 
 
 
 
363
  chat_memory.clear()
364
  return [], None
365
 
366
+ def create_interface():
367
+ """Create and configure the Gradio interface."""
368
+ with gr.Blocks(
369
+ theme=gr.themes.Soft(),
370
+ title="AI Tutor - Learning Assistant",
371
+ css="""
372
+ .gradio-container {
373
+ max-width: 1200px !important;
374
+ }
375
+ """
376
+ ) as app:
377
+ gr.Markdown("""
378
+ # πŸŽ“ AI Tutor Assistant
379
+ *Your personal learning companion with speech-to-text capabilities*
380
+ """)
381
+
382
+ # Main chat interface
383
+ with gr.Tab("πŸ’¬ AI Chatbot"):
384
+ gr.Markdown("Chat with your AI tutor using text or voice input!")
385
+
386
  with gr.Row():
387
  with gr.Column(scale=3):
388
+ chatbot = gr.Chatbot(
389
+ label="Conversation History",
390
+ height=500,
391
+ type="messages",
392
+ show_copy_button=True,
393
+ avatar_images=("πŸ‘€", "πŸ€–")
394
+ )
395
+
396
  with gr.Column(scale=1):
397
+ audio_output = gr.Audio(
398
+ label="Audio Response",
399
+ type="filepath",
400
+ visible=True,
401
+ autoplay=True
402
+ )
403
 
 
404
  with gr.Row():
405
  msg = gr.Textbox(
406
+ label="Your message",
407
+ placeholder="Type your question here or use voice input below...",
408
+ scale=4,
409
+ container=False,
410
+ max_lines=3
411
  )
412
+ send_btn = gr.Button("πŸš€ Send", scale=1, variant="primary")
413
 
414
  with gr.Row():
415
  with gr.Column(scale=1):
416
+ audio_input = gr.Audio(
417
+ sources=["microphone"],
418
+ type="numpy",
419
+ label="🎀 Record Audio Question",
420
+ show_download_button=False
421
+ )
422
 
423
+ with gr.Accordion("πŸ’‘ Tips for Better Experience", open=False):
 
424
  gr.Markdown("""
425
+ **🎀 Voice Input Tips:**
426
+ - Speak clearly in a quiet environment
427
+ - Keep microphone 10-15 cm from your mouth
428
+ - Record for 2-5 seconds for best results
429
+
430
+ **πŸ“š Document Tips:**
431
+ - Upload PDF, Word, or PowerPoint files
432
+ - Clear text documents work best
433
+ - Process documents before asking questions about them
434
+
435
+ **πŸ’¬ Chat Tips:**
436
+ - Ask specific questions for better answers
437
+ - Use the clear button to start fresh conversations
438
+ - The AI remembers context from uploaded documents
439
  """)
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  with gr.Row():
442
+ clear_btn = gr.Button("🧹 Clear Chat History", variant="secondary")
443
+ gr.Button("πŸ”„ Refresh Page").click(
444
+ lambda: None,
445
+ None,
446
+ None,
447
+ js="() => window.location.reload()"
448
+ )
449
+
450
+ # Document processing tab
451
+ with gr.Tab("πŸ“š Upload & Generate Quiz"):
452
+ gr.Markdown("Upload your study materials and generate custom quizzes automatically!")
453
+
454
  with gr.Row():
455
  with gr.Column(scale=1):
456
+ file_upload = gr.File(
457
+ label="πŸ“ Upload Study Materials",
458
+ file_types=[".pdf", ".docx", ".pptx", ".txt", ".md"],
459
+ file_count="single",
460
+ height=100
461
+ )
462
+ process_btn = gr.Button("⚑ Process & Generate Quiz", variant="primary")
463
+
464
+ gr.Markdown("""
465
+ **Supported Formats:**
466
+ - PDF documents
467
+ - Word documents (.docx)
468
+ - PowerPoint (.pptx)
469
+ - Text files (.txt, .md)
470
+ """)
471
+
472
+ with gr.Column(scale=2):
473
+ quiz_display = gr.Textbox(
474
+ label="πŸ“‹ Generated Quiz",
475
+ lines=20,
476
+ max_lines=25,
477
+ show_copy_button=True,
478
+ placeholder="Your generated quiz will appear here after processing a document..."
479
+ )
480
+
481
+ # Instructions tab
482
+ with gr.Tab("ℹ️ How to Use"):
483
+ gr.Markdown("""
484
+ ## πŸŽ“ Getting Started with AI Tutor
485
+
486
+ ### 🎀 Using Voice Input
487
+ 1. Go to the **AI Chatbot** tab
488
+ 2. Click the microphone button
489
+ 3. Allow microphone access in your browser
490
+ 4. Speak clearly and wait for transcription
491
+ 5. Review the text and click Send
492
+
493
+ ### πŸ“š Processing Documents
494
+ 1. Go to the **Upload & Generate Quiz** tab
495
+ 2. Upload your study materials (PDF, Word, PowerPoint)
496
+ 3. Click "Process & Generate Quiz"
497
+ 4. Get instant quiz questions based on your content
498
+ 5. Use the chat to ask questions about your documents
499
+
500
+ ### πŸ’¬ Chat Features
501
+ - Ask questions about uploaded documents
502
+ - Get detailed explanations
503
+ - Receive audio responses
504
+ - Clear chat when needed
505
+
506
+ ### πŸ”§ Technical Requirements
507
+ - Modern web browser with microphone access
508
+ - Stable internet connection
509
+ - Groq API key (set as environment variable)
510
+ """)
511
+
512
+ # Event handlers
513
+ send_btn.click(
514
+ fn=chat_with_groq,
515
+ inputs=[msg, chatbot],
516
+ outputs=[chatbot, msg, audio_output]
517
+ )
518
+
519
+ msg.submit(
520
+ fn=chat_with_groq,
521
+ inputs=[msg, chatbot],
522
+ outputs=[chatbot, msg, audio_output]
523
+ )
524
+
525
+ audio_input.change(
526
+ fn=transcribe_audio,
527
+ inputs=[audio_input],
528
+ outputs=[msg]
529
+ )
530
+
531
+ process_btn.click(
532
+ fn=process_document,
533
+ inputs=[file_upload],
534
+ outputs=[quiz_display]
535
+ )
536
+
537
+ clear_btn.click(
538
+ fn=clear_chat,
539
+ outputs=[chatbot, audio_output]
540
+ )
541
+
542
+ return app
543
 
544
+ # Launch the application
545
  if __name__ == "__main__":
546
+ print("🌈 Starting AI Tutor Application...")
547
+ app = create_interface()
548
+ app.launch(
549
+ server_name="0.0.0.0",
550
+ server_port=7860,
551
+ share=False,
552
+ show_error=True,
553
+ debug=True
554
+ )