Spaces:
Sleeping
Sleeping
File size: 9,662 Bytes
0457db3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import gradio as gr
import torch
import os
import requests ,json
from dotenv import load_dotenv
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, WhisperProcessor, WhisperForConditionalGeneration
import re
import librosa
device = (
torch.device("mps") if torch.backends.mps.is_available() else
torch.device("cuda") if torch.cuda.is_available() else
torch.device("cpu")
)
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
model = AutoModel.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
model = model.to(device)
model_id = "openai/whisper-small"
processor_transcribe = WhisperProcessor.from_pretrained(model_id)
model_transcribe = WhisperForConditionalGeneration.from_pretrained(model_id)
load_dotenv()
secret_key = os.getenv("MY_SECRET_KEY")
invoke_url = os.getenv("invoke_url")
stream = True
headers = {
"Authorization": secret_key,
"Accept": "text/event-stream" if stream else "application/json"
}
def gen_llm(text):
payload = {
"model": "meta/llama-4-maverick-17b-128e-instruct",
"messages": [{"role":"user","content":text}],
"max_tokens": 512,
"temperature": 1.00,
"top_p": 1.00,
"frequency_penalty": 0.00,
"presence_penalty": 0.00,
"stream": stream
}
response = requests.post(invoke_url, headers=headers, json=payload, stream=stream)
joined_text = []
full_text = []
if stream:
for line in response.iter_lines():
if not line or not line.strip():
continue # skip empty lines
try:
# Decode bytes to string
line_str = line.decode('utf-8')
# Some streaming APIs prefix lines with "data: "
if line_str.startswith("data: "):
line_str = line_str[6:]
# Skip keep-alive pings or [DONE] markers
if line_str.strip() == "[DONE]":
break
# Parse JSON and extract content
line_dict = json.loads(line_str)
content = line_dict["choices"][0]["delta"].get("content")
if content is not None:
# print(content)
full_text.append(content)
except json.JSONDecodeError as e:
return (f"JSON decode error: {e}")
except (KeyError, IndexError, TypeError) as e:
return (f"Malformed line or missing fields: {e}")
else:
return (response.json())
joined_text = ''.join(full_text)
return joined_text
def embed_text(texts):
inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
inputs = {key: val.to(device) for key, val in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
embeddings = outputs.last_hidden_state[:, 0, :] # [CLS] token
embeddings = F.normalize(embeddings, p=2, dim=1) # Normalize for cosine similarity
return embeddings
def check_and_read_txt(file):
if file is None:
return gr.update(visible=False), ""
try:
with open(file.name, 'r') as f:
content = f.read()
corpus_list = [s.strip() for s in re.split(r'\.\s+', content) if s.strip()]
global corpus
corpus = corpus_list[0].split('\n')
corpus_embeddings = embed_text(corpus)
global state_global
state_global = corpus_embeddings
return gr.update(visible=True), content, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
except Exception as e:
return gr.update(visible=True), f"Error reading file: {str(e)}", gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
def save_text(input_text):
corpus_list = [s.strip() for s in re.split(r'\.\s+', input_text) if s.strip()]
global corpus
corpus = corpus_list[0].split('\n')
corpus_embeddings = embed_text(corpus)
global state_global
state_global = corpus_embeddings
return input_text
transcribe_list = ['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese', 'burmese', 'valencian', 'flemish', 'haitian', 'letzeburgesch', 'pushto', 'panjabi', 'moldavian', 'moldovan', 'sinhalese', 'castilian', 'mandarin']
global selected_lang
selected_lang = "english"
def transcribe(audio):
if audio is None:
return "No audio recorded."
global selected_lang
print("selected_lang",selected_lang)
forced_decoder_ids = processor_transcribe.get_decoder_prompt_ids(language=selected_lang, task="transcribe")
audio_array, sampling_rate = librosa.load(audio, sr=16000)
inputs = processor_transcribe(audio_array, sampling_rate=16000, return_tensors="pt")
input_features = inputs.input_features.to(model_transcribe.device)
predicted_ids = model_transcribe.generate(input_features, forced_decoder_ids=forced_decoder_ids)
transcription = processor_transcribe.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
def select_lang(language):
global selected_lang
selected_lang = language
chatbot = gr.Chatbot(label="💬 Chat",type="messages", visible=False)
chat_input = gr.Textbox(label="Enter text here:", visible=False)
file_input = gr.File(label="Upload File", file_types=[".txt", "*"])
output_text = gr.Textbox(label="Result", lines=20, interactive=True, visible=False)
btn = gr.Button("Update Information", visible=False)
transcribe_button = gr.Button("Transcribe", visible=False)
transcribe_dropdown = gr.Dropdown(choices=transcribe_list, label="Transcibe Language", visible=False)
audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio", visible=False)
state_text = gr.State("")
global state_global
state_global = ""
global corpus
corpus = []
with gr.Blocks() as demo:
# gr.Markdown("### Upload a file to check if it's a .txt file")
with gr.Row():
with gr.Column(scale=1):
file_input.render()
output_text.render()
btn.render()
file_input.change(
fn=check_and_read_txt,
inputs=file_input,
outputs=[output_text, output_text, btn, chatbot, chat_input, transcribe_button,transcribe_dropdown,audio_input]
)
btn.click(
fn=save_text,
inputs=output_text,
outputs=state_text
)
with gr.Column(scale=2):
chatbot.render()
chat_input.render()
with gr.Row():
audio_input.render()
transcribe_button.render()
transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=chat_input)
transcribe_dropdown.render()
transcribe_dropdown.change(fn=select_lang, inputs=transcribe_dropdown)
def respond(message, history):
if history is None:
history = []
query_embedding = embed_text([message])
cosine_scores = torch.matmul(query_embedding, state_global.T).squeeze()
top_k_indices = torch.topk(cosine_scores, k=3).indices
context = [corpus[i] for i in top_k_indices]
prompt = (
f"Use the following information to answer the user's question : '{context}'\n"
f"This is chat history : {history}\n"
f"Then answer this question : '{message}'\n"
f"Respond clearly and helpfully as a assistant. Keep your response focused and informative, but not overly brief. No need to explain how you got the answer.\n"
f"Answer in the same language as the user input. If the answer is not in the context, say something like: "
f"'I do not quite understand. Could you please try rephrasing or describing it differently?'\n"
)
# print(prompt)
bot_ans = gen_llm(prompt)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": bot_ans})
return history, "" # Return history and clear input
chat_input.submit(fn=respond, inputs=[chat_input, chatbot], outputs=[chatbot, chat_input])
if __name__ == "__main__":
demo.launch()
|