Chia Woon Yap commited on
Commit
6024ffc
Β·
verified Β·
1 Parent(s): 8b326b1

Rename apptbc.py to app3.py

Browse files
Files changed (1) hide show
  1. apptbc.py β†’ app3.py +68 -30
apptbc.py β†’ app3.py RENAMED
@@ -1,8 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  """app
3
-
4
  Automatically generated by Colab.
5
-
6
  Original file is located at
7
  https://colab.research.google.com/drive/1pwwcBb5Zlw1DA3u5K8W8mjrwBTBWXc1L
8
  """
@@ -15,13 +13,35 @@ import time
15
  import groq
16
  import uuid # For generating unique filenames
17
 
18
- # Updated imports to address LangChain deprecation warnings:
19
- from langchain_groq import ChatGroq
20
- from langchain.schema import HumanMessage
21
- from langchain.text_splitter import RecursiveCharacterTextSplitter
22
- from langchain_community.vectorstores import Chroma
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  from langchain_community.embeddings import HuggingFaceEmbeddings
24
- from langchain.docstore.document import Document
 
 
 
 
25
 
26
  # Importing chardet (make sure to add chardet to your requirements.txt)
27
  import chardet
@@ -68,33 +88,24 @@ For each question:
68
  - Ensure responses are concise and educational to enhance understanding.
69
  Output Example:
70
  1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
71
-
72
  Answer: Agent Core
73
-
74
  Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
75
-
76
  2. What is the main limitation of LLM-based applications?
77
  a) Limited token capacity
78
  b) Lack of domain expertise
79
  c) Prone to hallucination
80
  d) All of the above
81
-
82
  Answer: d) All of the above
83
-
84
  Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
85
-
86
  3. Given the following info, what is the value of P(jam|Rain)?
87
  P(no Rain) = 0.8;
88
  P(no Jam) = 0.2;
89
  P(Rain|Jam) = 0.1
90
-
91
  a) 0.016
92
  b) 0.025
93
  c) 0.1
94
  d) 0.4
95
-
96
  Answer: d) 0.4
97
-
98
  Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
99
  """
100
 
@@ -111,10 +122,13 @@ def clean_response(response):
111
  # Function to generate quiz based on content
112
  def generate_quiz(content):
113
  prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
114
- response = chat_model([HumanMessage(content=prompt)])
 
 
115
  cleaned_response = clean_response(response.content)
116
  return cleaned_response
117
 
 
118
  # Function to retrieve relevant documents from vectorstore based on user query
119
  def retrieve_documents(query):
120
  results = vectorstore.similarity_search(query, k=3)
@@ -156,7 +170,8 @@ def chat_with_groq(user_input, chat_history):
156
  prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
157
 
158
  # Call the chat model
159
- response = chat_model([HumanMessage(content=prompt)])
 
160
 
161
  # Clean response to remove any unwanted formatting
162
  cleaned_response_text = clean_response(response.content)
@@ -273,33 +288,44 @@ def process_document(file):
273
  # y /= np.max(np.abs(y))
274
  # return transcriber({"sampling_rate": sr, "raw": y})["text"]
275
 
 
276
  #Quick Fixes You Can Try First:
277
 
278
  def transcribe_audio(audio):
 
 
 
 
279
  sr, y = audio
 
 
280
  if y.ndim > 1:
281
  y = y.mean(axis=1)
282
- y = y.astype(np.float32)
283
 
284
- # Improved normalization
285
  max_val = np.max(np.abs(y))
286
  if max_val > 0:
287
- y /= max_val
288
 
289
- # Use better model
290
- better_transcriber = pipeline(
291
  "automatic-speech-recognition",
292
- model="openai/whisper-small.en", # More accurate
293
- chunk_length_s=30
 
 
 
 
 
 
 
294
  )
295
 
296
- return better_transcriber({"sampling_rate": sr, "raw": y})["text"]
297
 
298
  # the remaining is the same
299
 
300
 
301
-
302
-
303
  # Clear chat history function
304
  def clear_chat_history():
305
  chat_memory.clear()
@@ -333,6 +359,18 @@ def tutor_ai_chatbot():
333
  with gr.Column(scale=1):
334
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
335
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  # Clear chat history button
337
  clear_btn = gr.Button("Clear Chat")
338
 
@@ -398,4 +436,4 @@ def tutor_ai_chatbot():
398
 
399
  # Launch the AI chatbot
400
  if __name__ == "__main__":
401
- tutor_ai_chatbot()
 
1
  # -*- coding: utf-8 -*-
2
  """app
 
3
  Automatically generated by Colab.
 
4
  Original file is located at
5
  https://colab.research.google.com/drive/1pwwcBb5Zlw1DA3u5K8W8mjrwBTBWXc1L
6
  """
 
13
  import groq
14
  import uuid # For generating unique filenames
15
 
16
+ # OLD Updated imports to address LangChain deprecation warnings:
17
+ #from langchain_groq import ChatGroq
18
+ #*from langchain.schema import HumanMessage
19
+ #from langchain_core.messages import HumanMessage
20
+ #*from langchain_text_splitters import RecursiveCharacterTextSplitter
21
+ #try:
22
+ # For newer versions
23
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
24
+ #except ImportError:
25
+ # For older versions
26
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
27
+ #from langchain_community.vectorstores import Chroma
28
+ #from langchain_community.embeddings import HuggingFaceEmbeddings
29
+ #*from langchain.docstore.document import Document
30
+ #from langchain_core.documents import Document
31
+ #from langchain.chains import RetrievalQA # This one might still be in main langchain
32
+
33
+
34
+ # NEW IMPORTS (current):
35
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
36
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
37
+ from langchain_core.documents import Document
38
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader
39
  from langchain_community.embeddings import HuggingFaceEmbeddings
40
+ from langchain_community.llms import HuggingFaceHub
41
+ #from langchain_community.chains import RetrievalQA
42
+ #from langchain.chains.retrieval_qa.base import RetrievalQA # This one might still be in main langchain
43
+ from langchain_community.vectorstores import Chroma #from old library
44
+ from langchain_groq import ChatGroq
45
 
46
  # Importing chardet (make sure to add chardet to your requirements.txt)
47
  import chardet
 
88
  - Ensure responses are concise and educational to enhance understanding.
89
  Output Example:
90
  1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
 
91
  Answer: Agent Core
 
92
  Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
 
93
  2. What is the main limitation of LLM-based applications?
94
  a) Limited token capacity
95
  b) Lack of domain expertise
96
  c) Prone to hallucination
97
  d) All of the above
 
98
  Answer: d) All of the above
 
99
  Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
 
100
  3. Given the following info, what is the value of P(jam|Rain)?
101
  P(no Rain) = 0.8;
102
  P(no Jam) = 0.2;
103
  P(Rain|Jam) = 0.1
 
104
  a) 0.016
105
  b) 0.025
106
  c) 0.1
107
  d) 0.4
 
108
  Answer: d) 0.4
 
109
  Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
110
  """
111
 
 
122
  # Function to generate quiz based on content
123
  def generate_quiz(content):
124
  prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
125
+ #response = chat_model([HumanMessage(content=prompt)])
126
+ # Use invoke method instead of direct calling
127
+ response = chat_model.invoke([HumanMessage(content=prompt)])
128
  cleaned_response = clean_response(response.content)
129
  return cleaned_response
130
 
131
+
132
  # Function to retrieve relevant documents from vectorstore based on user query
133
  def retrieve_documents(query):
134
  results = vectorstore.similarity_search(query, k=3)
 
170
  prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
171
 
172
  # Call the chat model
173
+ #response = chat_model([HumanMessage(content=prompt)])
174
+ response = chat_model.invoke([HumanMessage(content=prompt)]) # Call the chat model using invoke method
175
 
176
  # Clean response to remove any unwanted formatting
177
  cleaned_response_text = clean_response(response.content)
 
288
  # y /= np.max(np.abs(y))
289
  # return transcriber({"sampling_rate": sr, "raw": y})["text"]
290
 
291
+
292
  #Quick Fixes You Can Try First:
293
 
294
  def transcribe_audio(audio):
295
+ """Real-time optimized transcription"""
296
+ if audio is None:
297
+ return ""
298
+
299
  sr, y = audio
300
+
301
+ # Quick preprocessing
302
  if y.ndim > 1:
303
  y = y.mean(axis=1)
 
304
 
305
+ y = y.astype(np.float32)
306
  max_val = np.max(np.abs(y))
307
  if max_val > 0:
308
+ y = y / max_val
309
 
310
+ # Use tiny model for real-time speed
311
+ realtime_transcriber = pipeline(
312
  "automatic-speech-recognition",
313
+ model="openai/whisper-tiny.en", # Fastest model
314
+ device="cuda" if torch.cuda.is_available() else "cpu",
315
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
316
+ generate_kwargs={
317
+ "language": "english",
318
+ "task": "transcribe",
319
+ "temperature": 0.0, # More deterministic
320
+ "no_repeat_ngram_size": 2
321
+ }
322
  )
323
 
324
+ return realtime_transcriber({"sampling_rate": sr, "raw": y})["text"]
325
 
326
  # the remaining is the same
327
 
328
 
 
 
329
  # Clear chat history function
330
  def clear_chat_history():
331
  chat_memory.clear()
 
359
  with gr.Column(scale=1):
360
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
361
 
362
+ # Voice recording tips - ONLY in AI Chatbot tab
363
+ with gr.Accordion("🎀 Voice Recording Tips", open=False):
364
+ gr.Markdown("""
365
+ **For better speech recognition accuracy:**
366
+ - πŸŽ™οΈ Speak clearly and at a moderate pace
367
+ - πŸ”‡ Record in a quiet environment
368
+ - πŸ“ Keep the microphone close to your mouth (10-15 cm)
369
+ - 🎧 Use a good quality microphone if possible
370
+ - πŸ“ Review the transcribed text before sending
371
+ - πŸ”„ If transcription is poor, try recording again or type manually
372
+ """)
373
+
374
  # Clear chat history button
375
  clear_btn = gr.Button("Clear Chat")
376
 
 
436
 
437
  # Launch the AI chatbot
438
  if __name__ == "__main__":
439
+ tutor_ai_chatbot()