Chia Woon Yap commited on
Commit
8aef97e
·
verified ·
1 Parent(s): 8b814a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -0
app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1pwwcBb5Zlw1DA3u5K8W8mjrwBTBWXc1L
8
+ """
9
+
10
+ import gradio as gr
11
+ import numpy as np
12
+ from transformers import pipeline
13
+ import os
14
+ import time
15
+ import groq
16
+ import uuid # For generating unique filenames
17
+
18
+ # Updated imports to address LangChain deprecation warnings:
19
+ from langchain_groq import ChatGroq
20
+ from langchain.schema import HumanMessage
21
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
22
+ from langchain_community.vectorstores import Chroma
23
+ from langchain_community.embeddings import HuggingFaceEmbeddings
24
+ from langchain.docstore.document import Document
25
+
26
+ # Importing chardet (make sure to add chardet to your requirements.txt)
27
+ import chardet
28
+
29
+ import fitz # PyMuPDF for PDFs
30
+ import docx # python-docx for Word files
31
+ import gtts # Google Text-to-Speech library
32
+ from pptx import Presentation # python-pptx for PowerPoint files
33
+ import re
34
+
35
+ # Initialize Whisper model for speech-to-text
36
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
37
+
38
+ # Set API Key (Ensure it's stored securely in an environment variable)
39
+ groq.api_key = os.getenv("GROQ_API_KEY")
40
+
41
+ # Initialize Chat Model
42
+ chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key)
43
+
44
+ # Initialize Embeddings and chromaDB
45
+ os.makedirs("chroma_db", exist_ok=True)
46
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
47
+ vectorstore = Chroma(
48
+ embedding_function=embedding_model,
49
+ persist_directory="chroma_db"
50
+ )
51
+
52
+ # Short-term memory for the LLM
53
+ chat_memory = []
54
+
55
+ # Prompt for quiz generation with added remark
56
+ quiz_prompt = """
57
+ You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
58
+ Generate 20 Questions.
59
+ Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags.
60
+ For each question:
61
+ - Provide 4 answer choices (for MCQs), with only one correct answer.
62
+ - Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
63
+ - Include an answer key for all questions.
64
+ - Ensure questions vary in difficulty and encourage comprehension rather than memorization.
65
+ - Additionally, implement an instant feedback mechanism:
66
+ - When a user selects an answer, indicate whether it is correct or incorrect.
67
+ - If incorrect, provide a brief explanation from the document to guide learning.
68
+ - Ensure responses are concise and educational to enhance understanding.
69
+ Output Example:
70
+ 1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
71
+
72
+ Answer: Agent Core
73
+
74
+ Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
75
+
76
+ 2. What is the main limitation of LLM-based applications?
77
+ a) Limited token capacity
78
+ b) Lack of domain expertise
79
+ c) Prone to hallucination
80
+ d) All of the above
81
+
82
+ Answer: d) All of the above
83
+
84
+ Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
85
+
86
+ 3. Given the following info, what is the value of P(jam|Rain)?
87
+ P(no Rain) = 0.8;
88
+ P(no Jam) = 0.2;
89
+ P(Rain|Jam) = 0.1
90
+
91
+ a) 0.016
92
+ b) 0.025
93
+ c) 0.1
94
+ d) 0.4
95
+
96
+ Answer: d) 0.4
97
+
98
+ Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
99
+ """
100
+
101
+ # Function to clean AI response by removing unwanted formatting
102
+ def clean_response(response):
103
+ """Removes <think> tags, asterisks, and markdown formatting."""
104
+ cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
105
+ cleaned_text = re.sub(r"(\*\*|\*|\[|\])", "", cleaned_text)
106
+ cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE)
107
+ cleaned_text = re.sub(r"\\", "", cleaned_text)
108
+ cleaned_text = re.sub(r"---", "", cleaned_text)
109
+ return cleaned_text.strip()
110
+
111
+ # Function to generate quiz based on content
112
+ def generate_quiz(content):
113
+ prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
114
+ response = chat_model([HumanMessage(content=prompt)])
115
+ cleaned_response = clean_response(response.content)
116
+ return cleaned_response
117
+
118
+ # Function to retrieve relevant documents from vectorstore based on user query
119
+ def retrieve_documents(query):
120
+ results = vectorstore.similarity_search(query, k=3)
121
+ return [doc.page_content for doc in results]
122
+
123
+ # Function to handle chatbot interactions with short-term memory
124
+ def chat_with_groq(user_input, chat_history):
125
+ try:
126
+ # Retrieve relevant documents for additional context
127
+ relevant_docs = retrieve_documents(user_input)
128
+ context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
129
+
130
+ # Construct proper prompting with conversation history
131
+ system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
132
+ conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
133
+ prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
134
+
135
+ # Call the chat model
136
+ response = chat_model([HumanMessage(content=prompt)])
137
+
138
+ # Clean response to remove any unwanted formatting
139
+ cleaned_response_text = clean_response(response.content)
140
+
141
+ # Append conversation history
142
+ chat_memory.append(f"User: {user_input}")
143
+ chat_memory.append(f"AI: {cleaned_response_text}")
144
+
145
+ # Update chat history for Gradio
146
+ chat_history.append((user_input, cleaned_response_text))
147
+
148
+ # Convert response to speech
149
+ audio_file = speech_playback(cleaned_response_text)
150
+
151
+ return chat_history, "", audio_file
152
+ except Exception as e:
153
+ chat_history.append((user_input, f"Error: {str(e)}"))
154
+ return chat_history, "", None
155
+
156
+ # Function to play response as speech using gTTS
157
+ def speech_playback(text):
158
+ try:
159
+ # Generate a unique filename for each audio file
160
+ unique_id = str(uuid.uuid4())
161
+ audio_file = f"output_audio_{unique_id}.mp3"
162
+
163
+ # Convert text to speech
164
+ tts = gtts.gTTS(text, lang='en')
165
+ tts.save(audio_file)
166
+
167
+ # Return the path to the audio file
168
+ return audio_file
169
+ except Exception as e:
170
+ print(f"Error in speech_playback: {e}")
171
+ return None
172
+
173
+ # Function to detect encoding safely
174
+ def detect_encoding(file_path):
175
+ try:
176
+ with open(file_path, "rb") as f:
177
+ raw_data = f.read(4096)
178
+ detected = chardet.detect(raw_data)
179
+ encoding = detected["encoding"]
180
+ return encoding if encoding else "utf-8"
181
+ except Exception:
182
+ return "utf-8"
183
+
184
+ # Function to extract text from PDF
185
+ def extract_text_from_pdf(pdf_path):
186
+ try:
187
+ doc = fitz.open(pdf_path)
188
+ text = "\n".join([page.get_text("text") for page in doc])
189
+ return text if text.strip() else "No extractable text found."
190
+ except Exception as e:
191
+ return f"Error extracting text from PDF: {str(e)}"
192
+
193
+ # Function to extract text from Word files (.docx)
194
+ def extract_text_from_docx(docx_path):
195
+ try:
196
+ doc = docx.Document(docx_path)
197
+ text = "\n".join([para.text for para in doc.paragraphs])
198
+ return text if text.strip() else "No extractable text found."
199
+ except Exception as e:
200
+ return f"Error extracting text from Word document: {str(e)}"
201
+
202
+ # Function to extract text from PowerPoint files (.pptx)
203
+ def extract_text_from_pptx(pptx_path):
204
+ try:
205
+ presentation = Presentation(pptx_path)
206
+ text = ""
207
+ for slide in presentation.slides:
208
+ for shape in slide.shapes:
209
+ if hasattr(shape, "text"):
210
+ text += shape.text + "\n"
211
+ return text if text.strip() else "No extractable text found."
212
+ except Exception as e:
213
+ return f"Error extracting text from PowerPoint: {str(e)}"
214
+
215
+ # Function to process documents safely
216
+ def process_document(file):
217
+ try:
218
+ file_extension = os.path.splitext(file.name)[-1].lower()
219
+ if file_extension in [".png", ".jpg", ".jpeg"]:
220
+ return "Error: Images cannot be processed for text extraction."
221
+ if file_extension == ".pdf":
222
+ content = extract_text_from_pdf(file.name)
223
+ elif file_extension == ".docx":
224
+ content = extract_text_from_docx(file.name)
225
+ elif file_extension == ".pptx":
226
+ content = extract_text_from_pptx(file.name)
227
+ else:
228
+ encoding = detect_encoding(file.name)
229
+ with open(file.name, "r", encoding=encoding, errors="replace") as f:
230
+ content = f.read()
231
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
232
+ documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
233
+ vectorstore.add_documents(documents)
234
+
235
+ quiz = generate_quiz(content)
236
+ return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
237
+ except Exception as e:
238
+ return f"Error processing document: {str(e)}"
239
+
240
+ # Function to handle speech-to-text conversion
241
+ def transcribe_audio(audio):
242
+ sr, y = audio
243
+ if y.ndim > 1:
244
+ y = y.mean(axis=1)
245
+ y = y.astype(np.float32)
246
+ y /= np.max(np.abs(y))
247
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
248
+
249
+ # Clear chat history function
250
+ def clear_chat_history():
251
+ chat_memory.clear()
252
+ return [], None
253
+
254
+ def tutor_ai_chatbot():
255
+ """Main Gradio interface for the Tutor AI Chatbot."""
256
+ with gr.Blocks() as app:
257
+ gr.Markdown("# 📚 AI Tutor - We.(POC)")
258
+ gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
259
+
260
+ # Chatbot Tab
261
+ with gr.Tab("AI Chatbot"):
262
+ with gr.Row():
263
+ with gr.Column(scale=3):
264
+ chatbot = gr.Chatbot(height=500, type="messages")
265
+ with gr.Row():
266
+ msg = gr.Textbox(label="Ask a question", placeholder="Type your question here...")
267
+ submit = gr.Button("Send")
268
+
269
+ with gr.Column(scale=1):
270
+ audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
271
+
272
+ with gr.Column(scale=1):
273
+ audio_playback = gr.Audio(label="Audio Response", type="filepath")
274
+
275
+ # Clear chat history button
276
+ clear_btn = gr.Button("Clear Chat")
277
+
278
+ # Handle chat interaction
279
+ submit.click(
280
+ chat_with_groq,
281
+ inputs=[msg, chatbot],
282
+ outputs=[chatbot, msg, audio_playback]
283
+ )
284
+
285
+ # Clear chat history function
286
+ clear_btn.click(clear_chat_history, inputs=None, outputs=[chatbot, audio_playback])
287
+
288
+ # Also allow Enter key to submit
289
+ msg.submit(
290
+ chat_with_groq,
291
+ inputs=[msg, chatbot],
292
+ outputs=[chatbot, msg, audio_playback]
293
+ )
294
+
295
+ # Add some examples of questions students might ask
296
+ with gr.Accordion("Example Questions", open=False):
297
+ gr.Examples(
298
+ examples=[
299
+ "Can you explain the concept of RLHF AI?",
300
+ "What are AI transformers?",
301
+ "What is MoE AI?",
302
+ "What's gate networks AI?",
303
+ "I am making a switch, please generating baking recipe?"
304
+ ],
305
+ inputs=msg
306
+ )
307
+
308
+ # Connect audio input to transcription
309
+ audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg)
310
+
311
+ # Upload Notes & Generate Quiz Tab
312
+ with gr.Tab("Upload Notes & Generate Quiz"):
313
+ with gr.Row():
314
+ with gr.Column(scale=2):
315
+ file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
316
+ with gr.Column(scale=3):
317
+ quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
318
+
319
+ # Connect file input to document processing
320
+ file_input.change(process_document, inputs=file_input, outputs=quiz_output)
321
+
322
+ # Introduction Video Tab - Fixed with a placeholder
323
+ with gr.Tab("Introduction Video"):
324
+ with gr.Row():
325
+ with gr.Column(scale=1):
326
+ gr.Markdown("### Welcome to the Introduction Video")
327
+ gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | 徐梦圆 - China-O 音乐人徐梦圆YUAN!")
328
+ gr.Markdown("**Video coming soon!**")
329
+ gr.Markdown("To add a video, please upload it to your Hugging Face Space files and update the video path.")
330
+
331
+ # Alternative: You can upload a video file to your Space and use:
332
+ # gr.Video("we_not_me_video.mp4", label="Introduction Video")
333
+
334
+ # Launch the application
335
+ # Remove share=True for Hugging Face Spaces
336
+ app.launch(share=False)
337
+
338
+ # Launch the AI chatbot
339
+ if __name__ == "__main__":
340
+ tutor_ai_chatbot()