Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,6 +26,21 @@ from langgraph.prebuilt import ToolNode, tools_condition
|
|
| 26 |
from sentence_transformers import SentenceTransformer
|
| 27 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 28 |
# from langchain.agents import create_tool_calling_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# (Keep Constants as is)
|
| 31 |
# --- Constants ---
|
|
@@ -964,6 +979,36 @@ safe_workflow.add_edge("tool_executor", "safety")
|
|
| 964 |
|
| 965 |
safe_app = safe_workflow.compile()
|
| 966 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
# --------------------------
|
| 968 |
# Define user query function
|
| 969 |
# --------------------------
|
|
@@ -975,10 +1020,20 @@ def answer_question(user_question):
|
|
| 975 |
agent_answer = result["output"]
|
| 976 |
return agent_answer
|
| 977 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 978 |
# --------------------------
|
| 979 |
# Gradio UI
|
| 980 |
# --------------------------
|
| 981 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 982 |
gr.Markdown("# Ask the Main Agent")
|
| 983 |
|
| 984 |
user_input = gr.Textbox(
|
|
@@ -986,12 +1041,20 @@ with gr.Blocks() as demo:
|
|
| 986 |
placeholder="Type any question here...",
|
| 987 |
lines=2
|
| 988 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
|
| 990 |
answer_output = gr.Textbox(
|
| 991 |
label="Agent Response"
|
| 992 |
)
|
| 993 |
|
| 994 |
-
|
|
|
|
| 995 |
|
| 996 |
submit_btn.click(
|
| 997 |
fn=answer_question,
|
|
@@ -999,4 +1062,10 @@ with gr.Blocks() as demo:
|
|
| 999 |
outputs=answer_output
|
| 1000 |
)
|
| 1001 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1002 |
demo.launch()
|
|
|
|
| 26 |
from sentence_transformers import SentenceTransformer
|
| 27 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 28 |
# from langchain.agents import create_tool_calling_agent
|
| 29 |
+
import torch
|
| 30 |
+
import gradio as gr
|
| 31 |
+
from transformers import pipeline
|
| 32 |
+
|
| 33 |
+
audio_model_id = "Sandiago21/whisper-large-v2-greek" # update with your model id
|
| 34 |
+
audio_pipe = pipeline("automatic-speech-recognition", model=audio_model_id)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# title = "Automatic Speech Recognition (ASR)"
|
| 38 |
+
# description = """
|
| 39 |
+
# Demo for automatic speech recognition in Greek. Demo uses [Sandiago21/whisper-large-v2-greek](https://huggingface.co/Sandiago21/whisper-large-v2-greek) checkpoint, which is based on OpenAI's
|
| 40 |
+
# [Whisper](https://huggingface.co/openai/whisper-large-v2) model and is fine-tuned in Greek Audio dataset
|
| 41 |
+
# ")
|
| 42 |
+
# """
|
| 43 |
+
|
| 44 |
|
| 45 |
# (Keep Constants as is)
|
| 46 |
# --- Constants ---
|
|
|
|
| 979 |
|
| 980 |
safe_app = safe_workflow.compile()
|
| 981 |
|
| 982 |
+
|
| 983 |
+
|
| 984 |
+
def transcribe_speech(filepath):
|
| 985 |
+
output = pipe(
|
| 986 |
+
filepath,
|
| 987 |
+
max_new_tokens=256,
|
| 988 |
+
generate_kwargs={
|
| 989 |
+
"task": "transcribe",
|
| 990 |
+
"language": "greek",
|
| 991 |
+
}, # update with the language you've fine-tuned on
|
| 992 |
+
chunk_length_s=30,
|
| 993 |
+
batch_size=8,
|
| 994 |
+
)
|
| 995 |
+
return output["text"]
|
| 996 |
+
|
| 997 |
+
|
| 998 |
+
mic_transcribe = gr.Interface(
|
| 999 |
+
fn=transcribe_speech,
|
| 1000 |
+
inputs=gr.Audio(sources="microphone", type="filepath"),
|
| 1001 |
+
outputs=gr.Textbox(),
|
| 1002 |
+
title=title,
|
| 1003 |
+
description=description,
|
| 1004 |
+
)
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
|
| 1008 |
+
|
| 1009 |
+
|
| 1010 |
+
|
| 1011 |
+
|
| 1012 |
# --------------------------
|
| 1013 |
# Define user query function
|
| 1014 |
# --------------------------
|
|
|
|
| 1020 |
agent_answer = result["output"]
|
| 1021 |
return agent_answer
|
| 1022 |
|
| 1023 |
+
def answer_from_audio(audio):
|
| 1024 |
+
text = transcribe_speech(audio)
|
| 1025 |
+
return answer_question(text)
|
| 1026 |
+
|
| 1027 |
+
|
| 1028 |
# --------------------------
|
| 1029 |
# Gradio UI
|
| 1030 |
# --------------------------
|
| 1031 |
with gr.Blocks() as demo:
|
| 1032 |
+
# gr.TabbedInterface(
|
| 1033 |
+
# [mic_transcribe, file_transcribe],
|
| 1034 |
+
# ["Transcribe Microphone", "Transcribe Audio File"],
|
| 1035 |
+
# )
|
| 1036 |
+
|
| 1037 |
gr.Markdown("# Ask the Main Agent")
|
| 1038 |
|
| 1039 |
user_input = gr.Textbox(
|
|
|
|
| 1041 |
placeholder="Type any question here...",
|
| 1042 |
lines=2
|
| 1043 |
)
|
| 1044 |
+
|
| 1045 |
+
# 🎤 AUDIO INPUT (new)
|
| 1046 |
+
audio_input = gr.Audio(
|
| 1047 |
+
sources=["microphone"],
|
| 1048 |
+
type="filepath",
|
| 1049 |
+
label="Or speak your question"
|
| 1050 |
+
)
|
| 1051 |
|
| 1052 |
answer_output = gr.Textbox(
|
| 1053 |
label="Agent Response"
|
| 1054 |
)
|
| 1055 |
|
| 1056 |
+
submit_text_btn = gr.Button("Ask (Text)")
|
| 1057 |
+
submit_audio_btn = gr.Button("Ask (Voice)")
|
| 1058 |
|
| 1059 |
submit_btn.click(
|
| 1060 |
fn=answer_question,
|
|
|
|
| 1062 |
outputs=answer_output
|
| 1063 |
)
|
| 1064 |
|
| 1065 |
+
submit_audio_btn.click(
|
| 1066 |
+
fn=answer_from_audio,
|
| 1067 |
+
inputs=audio_input,
|
| 1068 |
+
outputs=answer_output
|
| 1069 |
+
)
|
| 1070 |
+
|
| 1071 |
demo.launch()
|