Spaces:
Sleeping
Sleeping
Chia Woon Yap
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,92 +38,17 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-bas
|
|
| 38 |
# Set API Key (Ensure it's stored securely in an environment variable)
|
| 39 |
groq.api_key = os.getenv("GROQ_API_KEY") # Replace with a valid API key
|
| 40 |
|
| 41 |
-
#___________________________________
|
| 42 |
-
|
| 43 |
-
# Authenticate with Hugging Face API using the token
|
| 44 |
-
#hf_token = os.getenv("HF_TOKEN") # Replace with the environment variable containing your Hugging Face token
|
| 45 |
-
#login(token=hf_token)
|
| 46 |
-
|
| 47 |
-
# Load the LLaVA model
|
| 48 |
-
#model_id = "liuhaotian/LLaVA-7B" # You can change the model ID based on what is available
|
| 49 |
-
#processor = AutoProcessor.from_pretrained(model_id)
|
| 50 |
-
#model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16).cuda()
|
| 51 |
-
|
| 52 |
-
# Load and preprocess an image
|
| 53 |
-
#image = Image.open("your_image.jpg") # Replace with the path to your image
|
| 54 |
-
#inputs = processor(text="Describe this image", images=image, return_tensors="pt").to("cuda")
|
| 55 |
-
|
| 56 |
-
# Generate output from LLaVA model
|
| 57 |
-
#output = model.generate(**inputs)
|
| 58 |
-
|
| 59 |
-
# Decode and print the output
|
| 60 |
-
#print(processor.decode(output[0]))
|
| 61 |
-
|
| 62 |
-
#___________________________________
|
| 63 |
-
|
| 64 |
# Initialize Chat Model
|
| 65 |
chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key) #DeepSeek-R1-Distill-Llama-70b , llama-3.3-70b-versatile , deepseek-r1-distill-qwen-32b
|
| 66 |
|
| 67 |
-
# Initialize Embeddings and chromaDB
|
| 68 |
-
|
| 69 |
-
embedding_model = HuggingFaceEmbeddings()
|
| 70 |
-
vectorstore = Chroma(embedding_function=embedding_model)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# -*- coding: utf-8 -*-
|
| 74 |
-
"""app
|
| 75 |
-
|
| 76 |
-
Automatically generated by Colab.
|
| 77 |
-
|
| 78 |
-
Original file is located at
|
| 79 |
-
https://colab.research.google.com/drive/1jdKA4WQJcLb0_aQ3vtGVM46B1wriSsDv
|
| 80 |
-
"""
|
| 81 |
-
|
| 82 |
-
import gradio as gr
|
| 83 |
-
import numpy as np
|
| 84 |
-
from transformers import pipeline
|
| 85 |
-
import os
|
| 86 |
-
import time
|
| 87 |
-
import groq
|
| 88 |
-
import uuid # For generating unique filenames
|
| 89 |
-
|
| 90 |
-
# Updated imports to address LangChain deprecation warnings:
|
| 91 |
-
from langchain_groq import ChatGroq
|
| 92 |
-
from langchain.schema import HumanMessage
|
| 93 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 94 |
-
from langchain_community.vectorstores import Chroma
|
| 95 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 96 |
-
from langchain.docstore.document import Document
|
| 97 |
-
|
| 98 |
-
# Importing chardet (make sure to add chardet to your requirements.txt)
|
| 99 |
-
import chardet
|
| 100 |
-
|
| 101 |
-
import fitz # PyMuPDF for PDFs
|
| 102 |
-
import docx # python-docx for Word files
|
| 103 |
-
import gtts # Google Text-to-Speech library
|
| 104 |
-
from pptx import Presentation # python-pptx for PowerPoint files
|
| 105 |
-
import re
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# Initialize Whisper model for speech-to-text
|
| 109 |
-
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
| 110 |
-
|
| 111 |
-
# Set API Key (Ensure it's stored securely in an environment variable)
|
| 112 |
-
groq.api_key = os.getenv("GROQ_API_KEY")
|
| 113 |
-
|
| 114 |
-
# Initialize Chat Model
|
| 115 |
-
chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key) #DeepSeek-R1-Distill-Llama-70b | deepseek-r1-distill-qwen-32b
|
| 116 |
-
|
| 117 |
# Initialize Embeddings and chromaDB
|
| 118 |
os.makedirs("chroma_db", exist_ok=True)
|
| 119 |
embedding_model = HuggingFaceEmbeddings()
|
| 120 |
-
#new
|
| 121 |
vectorstore = Chroma(
|
| 122 |
embedding_function=embedding_model,
|
| 123 |
persist_directory="chroma_db" # Set a valid folder name or path
|
| 124 |
)
|
| 125 |
vectorstore.persist()
|
| 126 |
-
#end New
|
| 127 |
|
| 128 |
# Short-term memory for the LLM
|
| 129 |
chat_memory = []
|
|
@@ -171,7 +96,7 @@ d) 0.4
|
|
| 171 |
|
| 172 |
Answer: d) 0.4
|
| 173 |
|
| 174 |
-
Feedback: This question tests understanding of Bayes
|
| 175 |
"""
|
| 176 |
|
| 177 |
# Function to clean AI response by removing unwanted formatting
|
|
@@ -197,7 +122,7 @@ def retrieve_documents(query):
|
|
| 197 |
return [doc.page_content for doc in results]
|
| 198 |
|
| 199 |
# Function to handle chatbot interactions with short-term memory
|
| 200 |
-
def chat_with_groq(user_input):
|
| 201 |
try:
|
| 202 |
# Retrieve relevant documents for additional context
|
| 203 |
relevant_docs = retrieve_documents(user_input)
|
|
@@ -218,14 +143,16 @@ def chat_with_groq(user_input):
|
|
| 218 |
chat_memory.append(f"User: {user_input}")
|
| 219 |
chat_memory.append(f"AI: {cleaned_response_text}")
|
| 220 |
|
|
|
|
|
|
|
|
|
|
| 221 |
# Convert response to speech
|
| 222 |
audio_file = speech_playback(cleaned_response_text)
|
| 223 |
|
| 224 |
-
|
| 225 |
-
return [(user_input, cleaned_response_text)], audio_file
|
| 226 |
except Exception as e:
|
| 227 |
-
|
| 228 |
-
|
| 229 |
|
| 230 |
# Function to play response as speech using gTTS
|
| 231 |
def speech_playback(text):
|
|
@@ -321,37 +248,10 @@ def transcribe_audio(audio):
|
|
| 321 |
y /= np.max(np.abs(y))
|
| 322 |
return transcriber({"sampling_rate": sr, "raw": y})["text"]
|
| 323 |
|
| 324 |
-
#
|
| 325 |
-
def
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
relevant_docs = retrieve_documents(user_input)
|
| 329 |
-
context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
|
| 330 |
-
|
| 331 |
-
# Construct proper prompting with conversation history
|
| 332 |
-
system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
|
| 333 |
-
conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
|
| 334 |
-
prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
|
| 335 |
-
|
| 336 |
-
# Call the chat model
|
| 337 |
-
response = chat_model([HumanMessage(content=prompt)])
|
| 338 |
-
|
| 339 |
-
# Clean response to remove any unwanted formatting
|
| 340 |
-
cleaned_response_text = clean_response(response.content)
|
| 341 |
-
|
| 342 |
-
# Append conversation history
|
| 343 |
-
chat_memory.append(f"User: {user_input}")
|
| 344 |
-
chat_memory.append(f"AI: {cleaned_response_text}")
|
| 345 |
-
|
| 346 |
-
# Convert response to speech
|
| 347 |
-
audio_file = speech_playback(cleaned_response_text)
|
| 348 |
-
|
| 349 |
-
# Return both chat response and audio file path
|
| 350 |
-
return [(user_input, cleaned_response_text)], audio_file # Return as a tuple
|
| 351 |
-
except Exception as e:
|
| 352 |
-
return [("Error", str(e))], None
|
| 353 |
-
|
| 354 |
-
#__________________________________________________________________________________________________________________________
|
| 355 |
|
| 356 |
def tutor_ai_chatbot():
|
| 357 |
"""Main Gradio interface for the Tutor AI Chatbot."""
|
|
@@ -363,17 +263,14 @@ def tutor_ai_chatbot():
|
|
| 363 |
with gr.Tab("AI Chatbot"):
|
| 364 |
with gr.Row():
|
| 365 |
with gr.Column(scale=3):
|
| 366 |
-
#chatbot = gr.Chatbot(height=500) # Chatbot display area
|
| 367 |
chatbot = gr.Chatbot(height=500, type="messages")
|
| 368 |
with gr.Row():
|
| 369 |
msg = gr.Textbox(label="Ask a question", placeholder="Type your question here...")
|
| 370 |
submit = gr.Button("Send")
|
| 371 |
|
| 372 |
-
#with gr.Row():
|
| 373 |
with gr.Column(scale=1):
|
| 374 |
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio") # Audio input for speech-to-text
|
| 375 |
|
| 376 |
-
|
| 377 |
with gr.Column(scale=1):
|
| 378 |
audio_playback = gr.Audio(label="Audio Response", type="filepath")
|
| 379 |
|
|
@@ -383,21 +280,18 @@ def tutor_ai_chatbot():
|
|
| 383 |
# Handle chat interaction
|
| 384 |
submit.click(
|
| 385 |
chat_with_groq,
|
| 386 |
-
inputs=[msg],
|
| 387 |
-
outputs=[chatbot, audio_playback]
|
| 388 |
)
|
| 389 |
|
| 390 |
# Clear chat history function
|
| 391 |
-
|
| 392 |
-
return None, None
|
| 393 |
-
|
| 394 |
-
clear_btn.click(clear_chat_history, inputs=None, outputs=[chatbot, audio_playback]) #,audio_input
|
| 395 |
|
| 396 |
# Also allow Enter key to submit
|
| 397 |
msg.submit(
|
| 398 |
chat_with_groq,
|
| 399 |
-
inputs=[msg],
|
| 400 |
-
outputs=[chatbot, audio_playback]
|
| 401 |
)
|
| 402 |
|
| 403 |
# Add some examples of questions students might ask
|
|
@@ -413,35 +307,30 @@ def tutor_ai_chatbot():
|
|
| 413 |
inputs=msg
|
| 414 |
)
|
| 415 |
|
|
|
|
|
|
|
|
|
|
| 416 |
# Upload Notes & Generate Quiz Tab
|
| 417 |
with gr.Tab("Upload Notes & Generate Quiz"):
|
| 418 |
with gr.Row():
|
| 419 |
with gr.Column(scale=2):
|
| 420 |
file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
|
| 421 |
-
#generate_btn = gr.Button("Generate Quiz")
|
| 422 |
with gr.Column(scale=3):
|
| 423 |
quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
|
| 424 |
|
|
|
|
|
|
|
| 425 |
|
| 426 |
# Introduction Video
|
| 427 |
with gr.Tab("Introduction Video"):
|
| 428 |
with gr.Row():
|
| 429 |
with gr.Column(scale=1):
|
| 430 |
-
|
| 431 |
-
gr.Markdown("
|
| 432 |
-
gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | 徐梦圆 - China-O 音乐人徐梦圆YUAN! ") # Adding descriptive text
|
| 433 |
-
#gr.Video("https://github.com/lesterchia1/AI_tutor/raw/main/We%20not%20me%20video.mp4", label="Introduction Video")
|
| 434 |
gr.Video("https://huggingface.co/spaces/Lesterchia174/FPOC2_AI-Tutor_Chatbot/raw/main/We%20not%20me%20video.mp4", label="Introduction Video")
|
| 435 |
|
| 436 |
-
|
| 437 |
-
# Connect the button to the document processing function
|
| 438 |
-
audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg) # transcribe and fill the msg textbox
|
| 439 |
-
file_input.change(process_document, inputs=file_input, outputs=quiz_output)
|
| 440 |
-
|
| 441 |
-
|
| 442 |
# Launch the application
|
| 443 |
-
app.launch(share=True)
|
| 444 |
-
|
| 445 |
|
| 446 |
# Launch the AI chatbot
|
| 447 |
if __name__ == "__main__":
|
|
|
|
| 38 |
# Set API Key (Ensure it's stored securely in an environment variable)
|
| 39 |
groq.api_key = os.getenv("GROQ_API_KEY") # Replace with a valid API key
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# Initialize Chat Model
|
| 42 |
chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key) #DeepSeek-R1-Distill-Llama-70b , llama-3.3-70b-versatile , deepseek-r1-distill-qwen-32b
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Initialize Embeddings and chromaDB
|
| 45 |
os.makedirs("chroma_db", exist_ok=True)
|
| 46 |
embedding_model = HuggingFaceEmbeddings()
|
|
|
|
| 47 |
vectorstore = Chroma(
|
| 48 |
embedding_function=embedding_model,
|
| 49 |
persist_directory="chroma_db" # Set a valid folder name or path
|
| 50 |
)
|
| 51 |
vectorstore.persist()
|
|
|
|
| 52 |
|
| 53 |
# Short-term memory for the LLM
|
| 54 |
chat_memory = []
|
|
|
|
| 96 |
|
| 97 |
Answer: d) 0.4
|
| 98 |
|
| 99 |
+
Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
|
| 100 |
"""
|
| 101 |
|
| 102 |
# Function to clean AI response by removing unwanted formatting
|
|
|
|
| 122 |
return [doc.page_content for doc in results]
|
| 123 |
|
| 124 |
# Function to handle chatbot interactions with short-term memory
|
| 125 |
+
def chat_with_groq(user_input, chat_history):
|
| 126 |
try:
|
| 127 |
# Retrieve relevant documents for additional context
|
| 128 |
relevant_docs = retrieve_documents(user_input)
|
|
|
|
| 143 |
chat_memory.append(f"User: {user_input}")
|
| 144 |
chat_memory.append(f"AI: {cleaned_response_text}")
|
| 145 |
|
| 146 |
+
# Update chat history for Gradio
|
| 147 |
+
chat_history.append((user_input, cleaned_response_text))
|
| 148 |
+
|
| 149 |
# Convert response to speech
|
| 150 |
audio_file = speech_playback(cleaned_response_text)
|
| 151 |
|
| 152 |
+
return chat_history, "", audio_file
|
|
|
|
| 153 |
except Exception as e:
|
| 154 |
+
chat_history.append((user_input, f"Error: {str(e)}"))
|
| 155 |
+
return chat_history, "", None
|
| 156 |
|
| 157 |
# Function to play response as speech using gTTS
|
| 158 |
def speech_playback(text):
|
|
|
|
| 248 |
y /= np.max(np.abs(y))
|
| 249 |
return transcriber({"sampling_rate": sr, "raw": y})["text"]
|
| 250 |
|
| 251 |
+
# Clear chat history function
|
| 252 |
+
def clear_chat_history():
|
| 253 |
+
chat_memory.clear()
|
| 254 |
+
return [], None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
def tutor_ai_chatbot():
|
| 257 |
"""Main Gradio interface for the Tutor AI Chatbot."""
|
|
|
|
| 263 |
with gr.Tab("AI Chatbot"):
|
| 264 |
with gr.Row():
|
| 265 |
with gr.Column(scale=3):
|
|
|
|
| 266 |
chatbot = gr.Chatbot(height=500, type="messages")
|
| 267 |
with gr.Row():
|
| 268 |
msg = gr.Textbox(label="Ask a question", placeholder="Type your question here...")
|
| 269 |
submit = gr.Button("Send")
|
| 270 |
|
|
|
|
| 271 |
with gr.Column(scale=1):
|
| 272 |
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio") # Audio input for speech-to-text
|
| 273 |
|
|
|
|
| 274 |
with gr.Column(scale=1):
|
| 275 |
audio_playback = gr.Audio(label="Audio Response", type="filepath")
|
| 276 |
|
|
|
|
| 280 |
# Handle chat interaction
|
| 281 |
submit.click(
|
| 282 |
chat_with_groq,
|
| 283 |
+
inputs=[msg, chatbot],
|
| 284 |
+
outputs=[chatbot, msg, audio_playback]
|
| 285 |
)
|
| 286 |
|
| 287 |
# Clear chat history function
|
| 288 |
+
clear_btn.click(clear_chat_history, inputs=None, outputs=[chatbot, audio_playback])
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
# Also allow Enter key to submit
|
| 291 |
msg.submit(
|
| 292 |
chat_with_groq,
|
| 293 |
+
inputs=[msg, chatbot],
|
| 294 |
+
outputs=[chatbot, msg, audio_playback]
|
| 295 |
)
|
| 296 |
|
| 297 |
# Add some examples of questions students might ask
|
|
|
|
| 307 |
inputs=msg
|
| 308 |
)
|
| 309 |
|
| 310 |
+
# Connect audio input to transcription
|
| 311 |
+
audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg)
|
| 312 |
+
|
| 313 |
# Upload Notes & Generate Quiz Tab
|
| 314 |
with gr.Tab("Upload Notes & Generate Quiz"):
|
| 315 |
with gr.Row():
|
| 316 |
with gr.Column(scale=2):
|
| 317 |
file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
|
|
|
|
| 318 |
with gr.Column(scale=3):
|
| 319 |
quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
|
| 320 |
|
| 321 |
+
# Connect file input to document processing
|
| 322 |
+
file_input.change(process_document, inputs=file_input, outputs=quiz_output)
|
| 323 |
|
| 324 |
# Introduction Video
|
| 325 |
with gr.Tab("Introduction Video"):
|
| 326 |
with gr.Row():
|
| 327 |
with gr.Column(scale=1):
|
| 328 |
+
gr.Markdown("### Welcome to the Introduction Video")
|
| 329 |
+
gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | 徐梦圆 - China-O 音乐人徐梦圆YUAN! ")
|
|
|
|
|
|
|
| 330 |
gr.Video("https://huggingface.co/spaces/Lesterchia174/FPOC2_AI-Tutor_Chatbot/raw/main/We%20not%20me%20video.mp4", label="Introduction Video")
|
| 331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
# Launch the application
|
| 333 |
+
app.launch(share=True)
|
|
|
|
| 334 |
|
| 335 |
# Launch the AI chatbot
|
| 336 |
if __name__ == "__main__":
|