File size: 9,662 Bytes
0457db3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import gradio as gr
import torch
import os
import requests ,json
from dotenv import load_dotenv
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, WhisperProcessor, WhisperForConditionalGeneration
import re
import librosa

device = (
    torch.device("mps") if torch.backends.mps.is_available() else
    torch.device("cuda") if torch.cuda.is_available() else
    torch.device("cpu")
)
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
model = AutoModel.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
model = model.to(device)

model_id = "openai/whisper-small"
processor_transcribe = WhisperProcessor.from_pretrained(model_id)
model_transcribe = WhisperForConditionalGeneration.from_pretrained(model_id)

load_dotenv()
secret_key = os.getenv("MY_SECRET_KEY")
invoke_url = os.getenv("invoke_url")
stream = True

headers = {
  "Authorization": secret_key,
  "Accept": "text/event-stream" if stream else "application/json"
}

def gen_llm(text):
  payload = {
  "model": "meta/llama-4-maverick-17b-128e-instruct",
  "messages": [{"role":"user","content":text}],
  "max_tokens": 512,
  "temperature": 1.00,
  "top_p": 1.00,
  "frequency_penalty": 0.00,
  "presence_penalty": 0.00,
  "stream": stream
  }

  response = requests.post(invoke_url, headers=headers, json=payload, stream=stream)

  joined_text = []
  full_text = []

  if stream:
      for line in response.iter_lines():
          if not line or not line.strip():
              continue  # skip empty lines

          try:
              # Decode bytes to string
              line_str = line.decode('utf-8')

              # Some streaming APIs prefix lines with "data: "
              if line_str.startswith("data: "):
                  line_str = line_str[6:]

              # Skip keep-alive pings or [DONE] markers
              if line_str.strip() == "[DONE]":
                  break

              # Parse JSON and extract content
              line_dict = json.loads(line_str)
              content = line_dict["choices"][0]["delta"].get("content")
              if content is not None:
                  # print(content)
                  full_text.append(content)

          except json.JSONDecodeError as e:
              return (f"JSON decode error: {e}")
          except (KeyError, IndexError, TypeError) as e:
              return (f"Malformed line or missing fields: {e}")
  else:
      return (response.json())

  joined_text = ''.join(full_text)
  return joined_text

def embed_text(texts):
    inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :]  # [CLS] token
        embeddings = F.normalize(embeddings, p=2, dim=1)  # Normalize for cosine similarity
    return embeddings

def check_and_read_txt(file):
    if file is None:
        return gr.update(visible=False), ""
    try:
        with open(file.name, 'r') as f:
            content = f.read()
            corpus_list = [s.strip() for s in re.split(r'\.\s+', content) if s.strip()]
            global corpus
            corpus = corpus_list[0].split('\n')
            corpus_embeddings = embed_text(corpus)
            global state_global
            state_global = corpus_embeddings
        return gr.update(visible=True), content, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    except Exception as e:
        return gr.update(visible=True), f"Error reading file: {str(e)}", gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    
def save_text(input_text):
    corpus_list = [s.strip() for s in re.split(r'\.\s+', input_text) if s.strip()]
    global corpus
    corpus = corpus_list[0].split('\n')
    corpus_embeddings = embed_text(corpus)
    global state_global
    state_global = corpus_embeddings
    return input_text

transcribe_list = ['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese', 'burmese', 'valencian', 'flemish', 'haitian', 'letzeburgesch', 'pushto', 'panjabi', 'moldavian', 'moldovan', 'sinhalese', 'castilian', 'mandarin']
global selected_lang
selected_lang = "english"

def transcribe(audio):
    if audio is None:
        return "No audio recorded."
    global selected_lang
    print("selected_lang",selected_lang)
    forced_decoder_ids = processor_transcribe.get_decoder_prompt_ids(language=selected_lang, task="transcribe")
    audio_array, sampling_rate = librosa.load(audio, sr=16000)
    inputs = processor_transcribe(audio_array, sampling_rate=16000, return_tensors="pt")
    input_features = inputs.input_features.to(model_transcribe.device)
    predicted_ids = model_transcribe.generate(input_features, forced_decoder_ids=forced_decoder_ids)
    transcription = processor_transcribe.batch_decode(predicted_ids, skip_special_tokens=True)[0]
    return transcription

def select_lang(language):
    global selected_lang
    selected_lang = language

chatbot = gr.Chatbot(label="💬 Chat",type="messages",  visible=False)
chat_input = gr.Textbox(label="Enter text here:", visible=False)

file_input = gr.File(label="Upload File", file_types=[".txt", "*"])
output_text = gr.Textbox(label="Result", lines=20, interactive=True, visible=False)
btn = gr.Button("Update Information", visible=False)
transcribe_button = gr.Button("Transcribe", visible=False)
transcribe_dropdown = gr.Dropdown(choices=transcribe_list, label="Transcibe Language", visible=False)
audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio", visible=False)

state_text = gr.State("")
global state_global
state_global = ""
global corpus
corpus = []

with gr.Blocks() as demo:
    # gr.Markdown("### Upload a file to check if it's a .txt file")
    with gr.Row():
        with gr.Column(scale=1):
            file_input.render()
            output_text.render()
            btn.render()
            
            file_input.change(
                fn=check_and_read_txt,
                inputs=file_input,
                outputs=[output_text, output_text, btn, chatbot, chat_input, transcribe_button,transcribe_dropdown,audio_input]
            )
            
            btn.click(
                fn=save_text,
                inputs=output_text,
                outputs=state_text
            )
        with gr.Column(scale=2):
            chatbot.render()
            chat_input.render()
            with gr.Row():
                audio_input.render()
                transcribe_button.render()
                transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=chat_input)
                transcribe_dropdown.render()
                transcribe_dropdown.change(fn=select_lang, inputs=transcribe_dropdown)
            
            def respond(message, history):
                if history is None:
                    history = []
                query_embedding = embed_text([message])
                cosine_scores = torch.matmul(query_embedding, state_global.T).squeeze()
                top_k_indices = torch.topk(cosine_scores, k=3).indices
                context = [corpus[i] for i in top_k_indices]
                prompt = ( 
                          f"Use the following information to answer the user's question : '{context}'\n"
                          f"This is chat history : {history}\n"
                          f"Then answer this question : '{message}'\n"
                          f"Respond clearly and helpfully as a assistant. Keep your response focused and informative, but not overly brief. No need to explain how you got the answer.\n"
                          f"Answer in the same language as the user input. If the answer is not in the context, say something like: "
                          f"'I do not quite understand. Could you please try rephrasing or describing it differently?'\n"
                        )
                # print(prompt)
                bot_ans = gen_llm(prompt)
                history.append({"role": "user", "content": message})
                history.append({"role": "assistant", "content": bot_ans})
                return history, ""   # Return history and clear input

            chat_input.submit(fn=respond, inputs=[chat_input, chatbot], outputs=[chatbot, chat_input])
            
if __name__ == "__main__":
    demo.launch()