Spaces:
Sleeping
Sleeping
Commit
·
a38c567
1
Parent(s):
e63867f
Added Audio Support
Browse files- __pycache__/utils.cpython-312.pyc +0 -0
- app.py +42 -11
- requirements.txt +4 -1
- utils.py +19 -1
__pycache__/utils.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ
|
|
|
app.py
CHANGED
|
@@ -7,7 +7,8 @@ from utils import (
|
|
| 7 |
retrieve_context_approx,
|
| 8 |
build_prompt,
|
| 9 |
ask_gemini,
|
| 10 |
-
load_documents_gradio,
|
|
|
|
| 11 |
)
|
| 12 |
|
| 13 |
client = authenticate()
|
|
@@ -37,20 +38,50 @@ def handle_question(query):
|
|
| 37 |
answer = ask_gemini(prompt, client)
|
| 38 |
return f"### My Insights :\n\n{answer.strip()}"
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
#gr.Image(value="bg.JPG", visible=True)
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
demo.launch(share=True) # Or demo.deploy(hf_space="your-username/your-space-name")
|
|
|
|
| 7 |
retrieve_context_approx,
|
| 8 |
build_prompt,
|
| 9 |
ask_gemini,
|
| 10 |
+
load_documents_gradio,
|
| 11 |
+
transcribe
|
| 12 |
)
|
| 13 |
|
| 14 |
client = authenticate()
|
|
|
|
| 38 |
answer = ask_gemini(prompt, client)
|
| 39 |
return f"### My Insights :\n\n{answer.strip()}"
|
| 40 |
|
| 41 |
+
def route_question(text_input, audio_input):
|
| 42 |
+
if text_input.strip():
|
| 43 |
+
return handle_question(text_input)
|
| 44 |
+
elif audio_input is not None:
|
| 45 |
+
transcribed = transcribe(audio_input)
|
| 46 |
+
return handle_question(transcribed)
|
| 47 |
+
else:
|
| 48 |
+
return "Please provide a question by typing or speaking."
|
| 49 |
|
| 50 |
+
def show_audio():
|
| 51 |
+
return gr.update(visible=True)
|
|
|
|
| 52 |
|
| 53 |
+
css="""
|
| 54 |
+
#micbttn {
|
| 55 |
+
background-color: #FFCCCB;
|
| 56 |
+
font-size: 30px;
|
| 57 |
+
height: 59px;
|
| 58 |
+
}
|
| 59 |
|
| 60 |
+
#micINP {
|
| 61 |
+
background-color: #FFCCCB;
|
| 62 |
+
}
|
| 63 |
+
"""
|
| 64 |
|
| 65 |
+
with gr.Blocks(css=css, theme='NoCrypt/miku') as demo:
|
| 66 |
+
gr.Markdown("## Ask Questions from Your Uploaded Documents")
|
| 67 |
+
file_input = gr.File(label="Upload Your File", file_types=['.pdf', '.txt', '.docx', '.csv', '.json', '.pptx', '.xml', '.xlsx'], file_count='multiple')
|
| 68 |
|
| 69 |
+
process_btn = gr.Button("Process Document")
|
| 70 |
+
status = gr.Textbox(label="Processing Status")
|
| 71 |
+
|
| 72 |
+
gr.Markdown("### Ask your question (type or speak):")
|
| 73 |
+
|
| 74 |
+
with gr.Row():
|
| 75 |
+
text_question = gr.Textbox(placeholder="Type your question...", scale=9, show_label=False)
|
| 76 |
+
mic_btn = gr.Button("🎤", scale=1, elem_id="micbttn")
|
| 77 |
+
|
| 78 |
+
audio_input = gr.Audio(sources=["microphone"], type="numpy", visible=False, label=None, elem_id="micINP")
|
| 79 |
+
|
| 80 |
+
submit_btn = gr.Button("Submit")
|
| 81 |
+
answer = gr.Markdown()
|
| 82 |
+
|
| 83 |
+
process_btn.click(upload_and_process, inputs=file_input, outputs=status)
|
| 84 |
+
mic_btn.click(show_audio, outputs=audio_input)
|
| 85 |
+
submit_btn.click(route_question, inputs=[text_question, audio_input], outputs=answer)
|
| 86 |
|
| 87 |
demo.launch(share=True) # Or demo.deploy(hf_space="your-username/your-space-name")
|
requirements.txt
CHANGED
|
@@ -11,4 +11,7 @@ unstructured[pdf]
|
|
| 11 |
unstructured[docx]
|
| 12 |
unstructured[ppt]
|
| 13 |
unstructured[excel]
|
| 14 |
-
unstructured[xml]
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
unstructured[docx]
|
| 12 |
unstructured[ppt]
|
| 13 |
unstructured[excel]
|
| 14 |
+
unstructured[xml]
|
| 15 |
+
torch
|
| 16 |
+
torchaudio
|
| 17 |
+
transformers
|
utils.py
CHANGED
|
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
|
|
| 12 |
from google import genai
|
| 13 |
from google.genai import types
|
| 14 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 15 |
from langchain_community.document_loaders import(
|
| 16 |
UnstructuredPDFLoader,
|
| 17 |
TextLoader,
|
|
@@ -138,4 +139,21 @@ def ask_gemini(prompt, client):
|
|
| 138 |
contents=[prompt],
|
| 139 |
config=types.GenerateContentConfig(max_output_tokens=2048, temperature=0.5, seed=42),
|
| 140 |
)
|
| 141 |
-
return response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from google import genai
|
| 13 |
from google.genai import types
|
| 14 |
from sentence_transformers import SentenceTransformer
|
| 15 |
+
from transformers import pipeline
|
| 16 |
from langchain_community.document_loaders import(
|
| 17 |
UnstructuredPDFLoader,
|
| 18 |
TextLoader,
|
|
|
|
| 139 |
contents=[prompt],
|
| 140 |
config=types.GenerateContentConfig(max_output_tokens=2048, temperature=0.5, seed=42),
|
| 141 |
)
|
| 142 |
+
return response.text
|
| 143 |
+
|
| 144 |
+
# Speech2Text:
|
| 145 |
+
def transcribe(audio, model="openai/whisper-base.en"):
|
| 146 |
+
if audio is None:
|
| 147 |
+
raise ValueError("No audio detected!")
|
| 148 |
+
|
| 149 |
+
transcriber = pipeline("automatic-speech-recognition", model=model)
|
| 150 |
+
sr, y = audio # Sampling rate (KHz) and y= amplitude array
|
| 151 |
+
|
| 152 |
+
if y.ndim > 1: # Convert to Mono (CH=1) if Stereo (CH=2; L & R)
|
| 153 |
+
y = y.mean(1)
|
| 154 |
+
|
| 155 |
+
y = y.astype(np.float32)
|
| 156 |
+
y /= np.max(np.abs(y)) # Normalizing the amplitude values in range [-1,1]
|
| 157 |
+
|
| 158 |
+
result = transcriber({"sampling_rate" : sr, "raw" : y})
|
| 159 |
+
return result["text"]
|