Chia Woon Yap commited on
Commit
d6f71f7
·
verified ·
1 Parent(s): 19a0f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -76
app.py CHANGED
@@ -11,38 +11,25 @@ from transformers import pipeline
11
  import os
12
  import time
13
  import groq
14
- import uuid # For generating unique filenames
15
 
16
- # Add torch imports at the top
17
- import torch
18
- import torchaudio
19
-
20
-
21
- # NEW IMPORTS (current):
22
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
23
  from langchain_text_splitters import RecursiveCharacterTextSplitter
24
  from langchain_core.documents import Document
25
- from langchain_community.document_loaders import TextLoader, PyPDFLoader
26
  from langchain_community.embeddings import HuggingFaceEmbeddings
27
- from langchain_community.llms import HuggingFaceHub
28
- #from langchain_community.chains import RetrievalQA
29
- #from langchain.chains.retrieval_qa.base import RetrievalQA # This one might still be in main langchain
30
- from langchain_community.vectorstores import Chroma #from old library
31
  from langchain_groq import ChatGroq
32
 
33
- # Importing chardet (make sure to add chardet to your requirements.txt)
34
  import chardet
35
-
36
  import fitz # PyMuPDF for PDFs
37
  import docx # python-docx for Word files
38
  import gtts # Google Text-to-Speech library
39
  from pptx import Presentation # python-pptx for PowerPoint files
40
  import re
41
 
42
- # Initialize Whisper model for speech-to-text
43
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
44
-
45
- # Set API Key (Ensure it's stored securely in an environment variable)
46
  groq.api_key = os.getenv("GROQ_API_KEY")
47
 
48
  # Initialize Chat Model
@@ -59,7 +46,7 @@ vectorstore = Chroma(
59
  # Short-term memory for the LLM
60
  chat_memory = []
61
 
62
- # Prompt for quiz generation with added remark
63
  quiz_prompt = """
64
  You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
65
  Generate 20 Questions.
@@ -109,13 +96,10 @@ def clean_response(response):
109
  # Function to generate quiz based on content
110
  def generate_quiz(content):
111
  prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
112
- #response = chat_model([HumanMessage(content=prompt)])
113
- # Use invoke method instead of direct calling
114
  response = chat_model.invoke([HumanMessage(content=prompt)])
115
  cleaned_response = clean_response(response.content)
116
  return cleaned_response
117
 
118
-
119
  # Function to retrieve relevant documents from vectorstore based on user query
120
  def retrieve_documents(query):
121
  results = vectorstore.similarity_search(query, k=3)
@@ -123,7 +107,6 @@ def retrieve_documents(query):
123
 
124
  # Function to convert tuple format to message format
125
  def convert_to_message_format(chat_history):
126
- #Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
127
  message_format = []
128
  for user_msg, bot_msg in chat_history:
129
  message_format.append({"role": "user", "content": user_msg})
@@ -132,7 +115,6 @@ def convert_to_message_format(chat_history):
132
 
133
  # Function to convert message format to tuple format for processing
134
  def convert_to_tuple_format(chat_history):
135
- #Convert from message format back to tuple format for processing"""
136
  tuple_format = []
137
  for i in range(0, len(chat_history), 2):
138
  if i+1 < len(chat_history):
@@ -153,12 +135,11 @@ def chat_with_groq(user_input, chat_history):
153
 
154
  # Construct proper prompting with conversation history
155
  system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
156
- conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
157
  prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
158
 
159
  # Call the chat model
160
- #response = chat_model([HumanMessage(content=prompt)])
161
- response = chat_model.invoke([HumanMessage(content=prompt)]) # Call the chat model using invoke method
162
 
163
  # Clean response to remove any unwanted formatting
164
  cleaned_response_text = clean_response(response.content)
@@ -167,7 +148,7 @@ def chat_with_groq(user_input, chat_history):
167
  chat_memory.append(f"User: {user_input}")
168
  chat_memory.append(f"AI: {cleaned_response_text}")
169
 
170
- # Update chat history - add new messages in the correct format
171
  chat_history.append({"role": "user", "content": user_input})
172
  chat_history.append({"role": "assistant", "content": cleaned_response_text})
173
 
@@ -266,40 +247,6 @@ def process_document(file):
266
  return f"Error processing document: {str(e)}"
267
 
268
  # Function to handle speech-to-text conversion
269
-
270
- #def transcribe_audio(audio):
271
- # sr, y = audio
272
- # if y.ndim > 1:
273
- # y = y.mean(axis=1)
274
- # y = y.astype(np.float32)
275
- # y /= np.max(np.abs(y))
276
- # return transcriber({"sampling_rate": sr, "raw": y})["text"]
277
-
278
- """
279
- # Real-time Whisper setup - cache the model
280
- #@gr.cache_resource
281
- #def load_realtime_whisper():
282
- # """Load optimized Whisper model for real-time transcription"""
283
- # device = "cuda" if torch.cuda.is_available() else "cpu"
284
- # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
285
-
286
- # # Use tiny model for real-time speed
287
- # realtime_transcriber = pipeline(
288
- # "automatic-speech-recognition",
289
- # model="openai/whisper-tiny.en",
290
- # device=device,
291
- # torch_dtype=torch_dtype,
292
- # )
293
-
294
- # return realtime_transcriber
295
-
296
-
297
-
298
-
299
-
300
-
301
- # Load model at startup
302
- # Function to handle speech-to-text conversion
303
  def transcribe_audio(audio):
304
  """Simple working transcription"""
305
  if audio is None:
@@ -336,20 +283,10 @@ def clear_chat_history():
336
  chat_memory.clear()
337
  return [], None
338
 
339
-
340
-
341
- # the remaining is the same
342
-
343
-
344
- # Clear chat history function
345
- def clear_chat_history():
346
- chat_memory.clear()
347
- return [], None
348
-
349
  def tutor_ai_chatbot():
350
  """Main Gradio interface for the Tutor AI Chatbot."""
351
  with gr.Blocks() as app:
352
- gr.Markdown("# 📚 AI Tutor - We.(POC)")
353
  gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
354
 
355
  # Chatbot Tab
@@ -366,7 +303,7 @@ def tutor_ai_chatbot():
366
  msg = gr.Textbox(
367
  label="Ask a question",
368
  placeholder="Type your question here...",
369
- container=False # Removes the default container styling
370
  )
371
  submit = gr.Button("Send")
372
 
@@ -398,7 +335,7 @@ def tutor_ai_chatbot():
398
 
399
  # Clear chat history function
400
  clear_btn.click(
401
- lambda: [], # Return empty list in message format
402
  inputs=None,
403
  outputs=[chatbot]
404
  )
@@ -451,4 +388,4 @@ def tutor_ai_chatbot():
451
 
452
  # Launch the AI chatbot
453
  if __name__ == "__main__":
454
- tutor_ai_chatbot()
 
11
  import os
12
  import time
13
  import groq
14
+ import uuid
15
 
16
+ # LangChain imports
 
 
 
 
 
17
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
18
  from langchain_text_splitters import RecursiveCharacterTextSplitter
19
  from langchain_core.documents import Document
 
20
  from langchain_community.embeddings import HuggingFaceEmbeddings
21
+ from langchain_community.vectorstores import Chroma
 
 
 
22
  from langchain_groq import ChatGroq
23
 
24
+ # Other imports
25
  import chardet
 
26
  import fitz # PyMuPDF for PDFs
27
  import docx # python-docx for Word files
28
  import gtts # Google Text-to-Speech library
29
  from pptx import Presentation # python-pptx for PowerPoint files
30
  import re
31
 
32
+ # Set API Key
 
 
 
33
  groq.api_key = os.getenv("GROQ_API_KEY")
34
 
35
  # Initialize Chat Model
 
46
  # Short-term memory for the LLM
47
  chat_memory = []
48
 
49
+ # Prompt for quiz generation
50
  quiz_prompt = """
51
  You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
52
  Generate 20 Questions.
 
96
  # Function to generate quiz based on content
97
  def generate_quiz(content):
98
  prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
 
 
99
  response = chat_model.invoke([HumanMessage(content=prompt)])
100
  cleaned_response = clean_response(response.content)
101
  return cleaned_response
102
 
 
103
  # Function to retrieve relevant documents from vectorstore based on user query
104
  def retrieve_documents(query):
105
  results = vectorstore.similarity_search(query, k=3)
 
107
 
108
  # Function to convert tuple format to message format
109
  def convert_to_message_format(chat_history):
 
110
  message_format = []
111
  for user_msg, bot_msg in chat_history:
112
  message_format.append({"role": "user", "content": user_msg})
 
115
 
116
  # Function to convert message format to tuple format for processing
117
  def convert_to_tuple_format(chat_history):
 
118
  tuple_format = []
119
  for i in range(0, len(chat_history), 2):
120
  if i+1 < len(chat_history):
 
135
 
136
  # Construct proper prompting with conversation history
137
  system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
138
+ conversation_history = "\n".join(chat_memory[-10:])
139
  prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
140
 
141
  # Call the chat model
142
+ response = chat_model.invoke([HumanMessage(content=prompt)])
 
143
 
144
  # Clean response to remove any unwanted formatting
145
  cleaned_response_text = clean_response(response.content)
 
148
  chat_memory.append(f"User: {user_input}")
149
  chat_memory.append(f"AI: {cleaned_response_text}")
150
 
151
+ # Update chat history
152
  chat_history.append({"role": "user", "content": user_input})
153
  chat_history.append({"role": "assistant", "content": cleaned_response_text})
154
 
 
247
  return f"Error processing document: {str(e)}"
248
 
249
  # Function to handle speech-to-text conversion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def transcribe_audio(audio):
251
  """Simple working transcription"""
252
  if audio is None:
 
283
  chat_memory.clear()
284
  return [], None
285
 
 
 
 
 
 
 
 
 
 
 
286
  def tutor_ai_chatbot():
287
  """Main Gradio interface for the Tutor AI Chatbot."""
288
  with gr.Blocks() as app:
289
+ gr.Markdown("# AI Tutor - We.(POC)")
290
  gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
291
 
292
  # Chatbot Tab
 
303
  msg = gr.Textbox(
304
  label="Ask a question",
305
  placeholder="Type your question here...",
306
+ container=False
307
  )
308
  submit = gr.Button("Send")
309
 
 
335
 
336
  # Clear chat history function
337
  clear_btn.click(
338
+ lambda: [],
339
  inputs=None,
340
  outputs=[chatbot]
341
  )
 
388
 
389
  # Launch the AI chatbot
390
  if __name__ == "__main__":
391
+ tutor_ai_chatbot()