Sandiago21 commited on
Commit
34d05cf
·
verified ·
1 Parent(s): 30237ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -1
app.py CHANGED
@@ -26,6 +26,21 @@ from langgraph.prebuilt import ToolNode, tools_condition
26
  from sentence_transformers import SentenceTransformer
27
  from sklearn.metrics.pairwise import cosine_similarity
28
  # from langchain.agents import create_tool_calling_agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # (Keep Constants as is)
31
  # --- Constants ---
@@ -964,6 +979,36 @@ safe_workflow.add_edge("tool_executor", "safety")
964
 
965
  safe_app = safe_workflow.compile()
966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
  # --------------------------
968
  # Define user query function
969
  # --------------------------
@@ -975,10 +1020,20 @@ def answer_question(user_question):
975
  agent_answer = result["output"]
976
  return agent_answer
977
 
 
 
 
 
 
978
  # --------------------------
979
  # Gradio UI
980
  # --------------------------
981
  with gr.Blocks() as demo:
 
 
 
 
 
982
  gr.Markdown("# Ask the Main Agent")
983
 
984
  user_input = gr.Textbox(
@@ -986,12 +1041,20 @@ with gr.Blocks() as demo:
986
  placeholder="Type any question here...",
987
  lines=2
988
  )
 
 
 
 
 
 
 
989
 
990
  answer_output = gr.Textbox(
991
  label="Agent Response"
992
  )
993
 
994
- submit_btn = gr.Button("Ask")
 
995
 
996
  submit_btn.click(
997
  fn=answer_question,
@@ -999,4 +1062,10 @@ with gr.Blocks() as demo:
999
  outputs=answer_output
1000
  )
1001
 
 
 
 
 
 
 
1002
  demo.launch()
 
26
  from sentence_transformers import SentenceTransformer
27
  from sklearn.metrics.pairwise import cosine_similarity
28
  # from langchain.agents import create_tool_calling_agent
29
+ import torch
30
+ import gradio as gr
31
+ from transformers import pipeline
32
+
33
+ audio_model_id = "Sandiago21/whisper-large-v2-greek" # update with your model id
34
+ audio_pipe = pipeline("automatic-speech-recognition", model=audio_model_id)
35
+
36
+
37
+ # title = "Automatic Speech Recognition (ASR)"
38
+ # description = """
39
+ # Demo for automatic speech recognition in Greek. Demo uses [Sandiago21/whisper-large-v2-greek](https://huggingface.co/Sandiago21/whisper-large-v2-greek) checkpoint, which is based on OpenAI's
40
+ # [Whisper](https://huggingface.co/openai/whisper-large-v2) model and is fine-tuned in Greek Audio dataset
41
+ # ![Automatic Speech Recognition (ASR)"](https://datasets-server.huggingface.co/assets/huggingface-course/audio-course-images/--/huggingface-course--audio-course-images/train/2/image/image.png "Diagram of Automatic Speech Recognition (ASR)")
42
+ # """
43
+
44
 
45
  # (Keep Constants as is)
46
  # --- Constants ---
 
979
 
980
  safe_app = safe_workflow.compile()
981
 
982
+
983
+
984
+ def transcribe_speech(filepath):
985
+ output = pipe(
986
+ filepath,
987
+ max_new_tokens=256,
988
+ generate_kwargs={
989
+ "task": "transcribe",
990
+ "language": "greek",
991
+ }, # update with the language you've fine-tuned on
992
+ chunk_length_s=30,
993
+ batch_size=8,
994
+ )
995
+ return output["text"]
996
+
997
+
998
+ mic_transcribe = gr.Interface(
999
+ fn=transcribe_speech,
1000
+ inputs=gr.Audio(sources="microphone", type="filepath"),
1001
+ outputs=gr.Textbox(),
1002
+ title=title,
1003
+ description=description,
1004
+ )
1005
+
1006
+
1007
+
1008
+
1009
+
1010
+
1011
+
1012
  # --------------------------
1013
  # Define user query function
1014
  # --------------------------
 
1020
  agent_answer = result["output"]
1021
  return agent_answer
1022
 
1023
+ def answer_from_audio(audio):
1024
+ text = transcribe_speech(audio)
1025
+ return answer_question(text)
1026
+
1027
+
1028
  # --------------------------
1029
  # Gradio UI
1030
  # --------------------------
1031
  with gr.Blocks() as demo:
1032
+ # gr.TabbedInterface(
1033
+ # [mic_transcribe, file_transcribe],
1034
+ # ["Transcribe Microphone", "Transcribe Audio File"],
1035
+ # )
1036
+
1037
  gr.Markdown("# Ask the Main Agent")
1038
 
1039
  user_input = gr.Textbox(
 
1041
  placeholder="Type any question here...",
1042
  lines=2
1043
  )
1044
+
1045
+ # 🎤 AUDIO INPUT (new)
1046
+ audio_input = gr.Audio(
1047
+ sources=["microphone"],
1048
+ type="filepath",
1049
+ label="Or speak your question"
1050
+ )
1051
 
1052
  answer_output = gr.Textbox(
1053
  label="Agent Response"
1054
  )
1055
 
1056
+ submit_text_btn = gr.Button("Ask (Text)")
1057
+ submit_audio_btn = gr.Button("Ask (Voice)")
1058
 
1059
  submit_btn.click(
1060
  fn=answer_question,
 
1062
  outputs=answer_output
1063
  )
1064
 
1065
+ submit_audio_btn.click(
1066
+ fn=answer_from_audio,
1067
+ inputs=audio_input,
1068
+ outputs=answer_output
1069
+ )
1070
+
1071
  demo.launch()