Spaces:
Sleeping
Sleeping
Update app.py
#1
by
Jason0829
- opened
app.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
# coding: utf-8
|
| 3 |
|
| 4 |
-
# In[7]:
|
| 5 |
-
|
| 6 |
-
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
from sentence_transformers import SentenceTransformer, util
|
| 10 |
import torch
|
|
|
|
| 11 |
|
| 12 |
# 載入語義搜索模型
|
| 13 |
model_checkpoint = "sickcell69/cti-semantic-search-minilm"
|
|
@@ -20,7 +18,7 @@ data = pd.read_json(data_path)
|
|
| 20 |
|
| 21 |
# 載入嵌入文件
|
| 22 |
embeddings_path = 'corpus_embeddings.pt'
|
| 23 |
-
corpus_embeddings = torch.load(embeddings_path
|
| 24 |
|
| 25 |
def semantic_search(query):
|
| 26 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
|
@@ -33,21 +31,28 @@ def semantic_search(query):
|
|
| 33 |
|
| 34 |
return "\n".join(results)
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
iface = gr.Interface(
|
| 37 |
fn=semantic_search,
|
| 38 |
-
inputs="text",
|
| 39 |
outputs="text",
|
| 40 |
title="語義搜索應用",
|
| 41 |
-
description="
|
|
|
|
| 42 |
)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
if __name__ == "__main__":
|
| 45 |
#iface.launch()
|
| 46 |
iface.launch(share=True) #網頁跑不出來
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
# In[ ]:
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
# coding: utf-8
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
from sentence_transformers import SentenceTransformer, util
|
| 7 |
import torch
|
| 8 |
+
import openai # New import for Whisper API
|
| 9 |
|
| 10 |
# 載入語義搜索模型
|
| 11 |
model_checkpoint = "sickcell69/cti-semantic-search-minilm"
|
|
|
|
| 18 |
|
| 19 |
# 載入嵌入文件
|
| 20 |
embeddings_path = 'corpus_embeddings.pt'
|
| 21 |
+
corpus_embeddings = torch.load(embeddings_path)
|
| 22 |
|
| 23 |
def semantic_search(query):
|
| 24 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
|
|
|
| 31 |
|
| 32 |
return "\n".join(results)
|
| 33 |
|
| 34 |
+
# New function to transcribe audio using Whisper API
|
| 35 |
+
def transcribe_audio(audio_file):
|
| 36 |
+
audio_bytes = audio_file.read()
|
| 37 |
+
response = openai.Audio.transcribe("whisper-1", audio_bytes)
|
| 38 |
+
return response['text']
|
| 39 |
+
|
| 40 |
+
# Modified interface to include audio input
|
| 41 |
iface = gr.Interface(
|
| 42 |
fn=semantic_search,
|
| 43 |
+
inputs=["text", "file"], # Add audio file input
|
| 44 |
outputs="text",
|
| 45 |
title="語義搜索應用",
|
| 46 |
+
description="輸入一個查詢或上傳一個音頻文件,然後模型將返回最相似的結果。",
|
| 47 |
+
examples=["example_audio.wav"] # Example audio file
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# New function to handle both text and audio inputs
|
| 51 |
+
def handle_input(input_text, audio_file):
|
| 52 |
+
if audio_file is not None:
|
| 53 |
+
input_text = transcribe_audio(audio_file)
|
| 54 |
+
return semantic_search(input_text)
|
| 55 |
+
|
| 56 |
if __name__ == "__main__":
|
| 57 |
#iface.launch()
|
| 58 |
iface.launch(share=True) #網頁跑不出來
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|