Spaces:

dnzblgn
/

RAG_Audio_files

Sleeping

App Files Files Community

dnzblgn commited on Mar 21, 2025

Commit

b58d41a

verified ·

1 Parent(s): b8dc088

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -14

app.py CHANGED Viewed

@@ -9,38 +9,46 @@ from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import HuggingFaceEndpoint
-# Load Whisper model
 model = whisper.load_model("tiny")
-# Global states
 vector_db = None
 qa_chain = None
-# Function to transcribe and initialize RAG
 def transcribe_and_setup(audio_file_path):
     global vector_db, qa_chain
     if audio_file_path is None:
         return "No audio uploaded.", None, None, ""
     result = model.transcribe(audio_file_path)
-    transcript = result['text']
-    # Build vector DB
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
     splits = text_splitter.create_documents([transcript])
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_db = FAISS.from_documents(splits, embeddings)
-    # Create QA chain
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     retriever = vector_db.as_retriever()
     llm = HuggingFaceEndpoint(
-        repo_id="mistralai/Mistral-7B-v0.1",
-        huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
         temperature=0.5,
-        max_new_tokens=512,
-        task="text-generation"
     )
     qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
@@ -50,14 +58,14 @@ def transcribe_and_setup(audio_file_path):
 def answer_question(question):
     global qa_chain
     if qa_chain is None:
-        return "Please upload an audio file and process it first."
     response = qa_chain.invoke({"question": question, "chat_history": []})
-    return response['answer']
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Monochrome(), css="footer {display:none !important;}") as demo:
     gr.Markdown("## 🎙️ **Audio Intelligence Assistant**")
-    gr.Markdown("Upload an audio file, get the transcript, and ask questions about the content!")
     with gr.Row():
         with gr.Column(scale=1):
@@ -66,7 +74,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css="footer {display:none !importan
             status_output = gr.Textbox(label="🛠️ Status", interactive=False)
             transcript_output = gr.Textbox(label="📝 Transcript", lines=10, interactive=False)
         with gr.Column(scale=1):
-            question_input = gr.Textbox(label="❓ Ask a question about the audio", placeholder="What is the audio about?")
             ask_button = gr.Button("💬 Ask")
             answer_output = gr.Textbox(label="🤖 Answer", lines=5)
@@ -83,3 +91,4 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css="footer {display:none !importan
     )
 demo.launch()

 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import HuggingFaceEndpoint
+# Load Whisper model (you can use "base", "small", "medium", or "large")
 model = whisper.load_model("tiny")
+# Model config for Hugging Face Inference API
+hub = {
+    "HF_MODEL_ID": "mistralai/Mistral-7B-Instruct-v0.2",  # Must be Inference API compatible
+    "HF_TASK": "text-generation",
+    "HF_API_TOKEN": os.environ["HUGGING_FACE_READ_TOKEN"]
+}
+# Global state
 vector_db = None
 qa_chain = None
+# Function to transcribe and initialize RAG pipeline
 def transcribe_and_setup(audio_file_path):
     global vector_db, qa_chain
     if audio_file_path is None:
         return "No audio uploaded.", None, None, ""
+    # Transcribe with Whisper
     result = model.transcribe(audio_file_path)
+    transcript = result["text"]
+    # Split and embed transcript
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
     splits = text_splitter.create_documents([transcript])
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_db = FAISS.from_documents(splits, embeddings)
+    # Create retriever + LLM QA chain
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     retriever = vector_db.as_retriever()
     llm = HuggingFaceEndpoint(
+        repo_id=hub["HF_MODEL_ID"],
+        task=hub["HF_TASK"],
+        huggingfacehub_api_token=hub["HF_API_TOKEN"],
         temperature=0.5,
+        max_new_tokens=512
     )
     qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
 def answer_question(question):
     global qa_chain
     if qa_chain is None:
+        return "Please upload and process an audio file first."
     response = qa_chain.invoke({"question": question, "chat_history": []})
+    return response["answer"]
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Monochrome(), css="footer {display:none !important;}") as demo:
     gr.Markdown("## 🎙️ **Audio Intelligence Assistant**")
+    gr.Markdown("Upload an audio file, get the transcript, and ask questions about its content!")
     with gr.Row():
         with gr.Column(scale=1):
             status_output = gr.Textbox(label="🛠️ Status", interactive=False)
             transcript_output = gr.Textbox(label="📝 Transcript", lines=10, interactive=False)
         with gr.Column(scale=1):
+            question_input = gr.Textbox(label="❓ Ask a question about the audio", placeholder="e.g., What was discussed?")
             ask_button = gr.Button("💬 Ask")
             answer_output = gr.Textbox(label="🤖 Answer", lines=5)
     )
 demo.launch()