File size: 10,589 Bytes
2dc4fb9
 
a971d1c
d6e4129
2dc4fb9
990db9b
3683c2c
 
 
a971d1c
2dc4fb9
 
7ac8cfa
2dc4fb9
7ac8cfa
 
a971d1c
990db9b
 
 
 
 
 
2dc4fb9
 
990db9b
d6e4129
2dc4fb9
a971d1c
3683c2c
2dc4fb9
d6e4129
 
990db9b
d6e4129
990db9b
d6e4129
7ac8cfa
 
2dc4fb9
7ac8cfa
d6e4129
 
a971d1c
7ac8cfa
 
 
2dc4fb9
 
 
3683c2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a971d1c
 
8dc383f
 
a971d1c
8dc383f
a971d1c
 
8dc383f
a971d1c
 
 
 
8dc383f
a971d1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dc383f
 
 
 
 
 
 
 
 
 
a971d1c
 
 
 
 
 
 
 
 
 
8dc383f
a971d1c
 
8dc383f
 
a971d1c
8dc383f
a971d1c
 
2dc4fb9
 
f81cf03
2dc4fb9
7ac8cfa
d6e4129
7ac8cfa
 
a971d1c
2dc4fb9
 
 
7ac8cfa
a971d1c
2dc4fb9
 
 
f81cf03
2dc4fb9
a971d1c
2dc4fb9
 
a971d1c
2dc4fb9
a971d1c
 
 
 
 
 
 
 
 
2dc4fb9
a971d1c
f81cf03
 
a971d1c
f81cf03
 
 
a971d1c
f81cf03
a971d1c
 
7ac8cfa
a971d1c
 
7ac8cfa
a971d1c
7ac8cfa
 
 
 
 
 
 
 
 
 
 
 
 
a971d1c
7ac8cfa
2dc4fb9
 
f81cf03
 
2dc4fb9
a971d1c
2dc4fb9
 
7ac8cfa
 
 
 
 
 
 
 
 
 
 
a971d1c
7ac8cfa
2dc4fb9
 
f81cf03
2dc4fb9
 
a971d1c
2dc4fb9
7ac8cfa
 
 
 
 
a971d1c
 
7ac8cfa
 
 
 
82c886b
3683c2c
 
a971d1c
3683c2c
a971d1c
3683c2c
 
 
 
 
a971d1c
3683c2c
a971d1c
3683c2c
a971d1c
3683c2c
 
 
a971d1c
3683c2c
 
 
 
 
427e302
3683c2c
f81cf03
3683c2c
f81cf03
3683c2c
a971d1c
3683c2c
a971d1c
3683c2c
427e302
f81cf03
427e302
 
 
f81cf03
 
427e302
f81cf03
 
427e302
 
 
 
 
 
f81cf03
 
a971d1c
f81cf03
 
a971d1c
427e302
3683c2c
2dc4fb9
7ac8cfa
a971d1c
 
 
 
 
 
2dc4fb9
 
990db9b
7ac8cfa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import gradio as gr
from faster_whisper import WhisperModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from duckduckgo_search import DDGS
import time
import torch
import base64
import tempfile
import os
from datetime import datetime

# Initialize models
print("Loading Whisper model...")
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")

print("Loading LLM...")
model_name = "Qwen/Qwen2.5-1.5B-Instruct"  # Upgraded to 1.5B for better quality
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32,
    device_map="cpu",
    low_cpu_mem_usage=True
)

# Initialize DuckDuckGo Search
ddgs = DDGS(timeout=3)

def search_web(query, max_results=3):
    """Perform web search using DuckDuckGo"""
    try:
        results = ddgs.text(
            keywords=query,
            region='wt-wt',
            safesearch='moderate',
            timelimit='m',
            max_results=max_results
        )
        
        context = ""
        for i, result in enumerate(results[:max_results], 1):
            title = result.get('title', '')
            body = result.get('body', '')
            context += f"\n[Source {i}] {title}\n{body}\n"
        
        return context.strip() if context else "No search results found."
    
    except Exception as e:
        return f"Search failed: {str(e)}"

def transcribe_audio_base64(audio_base64):
    """Transcribe audio from base64 string (for Pluely STT endpoint)"""
    try:
        audio_bytes = base64.b64decode(audio_base64)
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
            temp_audio.write(audio_bytes)
            temp_path = temp_audio.name
        
        segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
        transcription = " ".join([seg.text for seg in segments])
        
        os.unlink(temp_path)
        
        return {"text": transcription.strip()}
    
    except Exception as e:
        return {"error": f"Transcription failed: {str(e)}"}

def generate_answer(text_input):
    """Generate complete answer with context"""
    try:
        if not text_input or text_input.strip() == "":
            return "No input provided"
        
        # Get current date for context
        current_date = datetime.now().strftime("%B %d, %Y")
        
        # Web search for current information
        search_results = search_web(text_input, max_results=3)
        
        # Enhanced prompt for comprehensive responses
        messages = [
            {"role": "system", "content": f"""You are a knowledgeable assistant providing comprehensive, well-researched answers. Today's date is {current_date}.

When answering:
1. Provide the direct answer first
2. Add relevant context and background information
3. Include recent developments or current status when applicable
4. Be informative but concise (150-200 words)
5. Use the web search results to ensure accuracy and currency"""},
            {"role": "user", "content": f"""Based on these current web search results:

{search_results}

Question: {text_input}

Provide a comprehensive answer that includes:
- Direct answer to the question
- Relevant context and background
- Recent developments (as of {current_date})
- Key points the user should know

Answer:"""}
        ]
        
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        inputs = tokenizer([text], return_tensors="pt").to("cpu")
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=250,  # Increased from 80 to 250
                temperature=0.3,     # Slightly higher for more natural responses
                do_sample=True,
                top_p=0.9,
                repetition_penalty=1.1,
                pad_token_id=tokenizer.eos_token_id
            )
        
        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
        return response.strip()
        
    except Exception as e:
        return f"Error: {str(e)}"

def process_audio(audio_path, question_text):
    """Main pipeline - returns tuple (answer, time)"""
    start_time = time.time()
    
    # Transcribe if audio provided
    if audio_path:
        try:
            segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
            question = " ".join([seg.text for seg in segments])
        except Exception as e:
            return f"❌ Transcription error: {str(e)}", 0.0
    else:
        question = question_text
    
    if not question or question.strip() == "":
        return "❌ No input provided", 0.0
    
    transcription_time = time.time() - start_time
    
    # Web search
    search_start = time.time()
    search_results = search_web(question, max_results=3)
    search_time = time.time() - search_start
    
    # Generate answer
    llm_start = time.time()
    answer = generate_answer(question)
    llm_time = time.time() - llm_start
    
    total_time = time.time() - start_time
    time_emoji = "🟢" if total_time < 5.0 else "🟡" if total_time < 7.0 else "🔴"
    
    timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
    
    return answer + timing_info, total_time

# Wrapper functions
def audio_handler(audio_path):
    """Wrapper for audio input"""
    return process_audio(audio_path, None)

def text_handler(text_input):
    """Wrapper for text input"""
    return process_audio(None, text_input)

# Gradio interface
with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🎯 Enhanced Political Q&A System
    **Comprehensive answers with context** - Powered by Qwen2.5-1.5B
    
    **Features:** Whisper-tiny + Qwen2.5-1.5B + DuckDuckGo + Rich contextual responses
    """)
    
    with gr.Tab("🎙️ Audio Input"):
        with gr.Row():
            with gr.Column():
                audio_input = gr.Audio(
                    sources=["microphone", "upload"],
                    type="filepath",
                    label="Record or upload audio"
                )
                audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
            
            with gr.Column():
                audio_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
                audio_time = gr.Number(label="Response Time (seconds)", precision=2)
        
        audio_submit.click(
            fn=audio_handler,
            inputs=[audio_input],
            outputs=[audio_output, audio_time],
            api_name="audio_query"
        )
    
    with gr.Tab("✍️ Text Input"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Type your question",
                    placeholder="Who is the current US president?",
                    lines=3
                )
                text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
            
            with gr.Column():
                text_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
                text_time = gr.Number(label="Response Time (seconds)", precision=2)
        
        text_submit.click(
            fn=text_handler,
            inputs=[text_input],
            outputs=[text_output, text_time],
            api_name="text_query"
        )
        
        gr.Examples(
            examples=[
                ["Who won the 2024 US presidential election?"],
                ["What is the current inflation rate in India?"],
                ["Who is the prime minister of UK and what are their key policies?"],
                ["Explain the latest developments in AI regulation"]
            ],
            inputs=text_input
        )
    
    # API endpoints for Pluely
    with gr.Tab("🔌 Pluely Integration"):
        gr.Markdown("""
        ## API Endpoints for Pluely
        
        ### STT Endpoint (Audio Transcription)
        ```
        curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
          -H "Content-Type: application/json" \\
          -d '{"data": ["BASE64_AUDIO_DATA"]}'
        ```
        **Response:** `{"data": [{"text": "transcribed text"}]}`
        
        ### AI Endpoint (Enhanced Responses)
        ```
        curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
          -H "Content-Type: application/json" \\
          -d '{"data": ["Your question here"]}'
        ```
        **Response:** `{"data": ["Comprehensive answer with context"]}`
        
        ## Pluely Configuration
        
        ### Custom STT Provider:
        ```
        curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
        ```
        **Response Path:** `data[0].text` | **Streaming:** OFF
        
        ### Custom AI Provider:
        ```
        curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
        ```
        **Response Path:** `data[0]` | **Streaming:** OFF
        """)
        
        # Hidden components for API endpoints
        with gr.Row(visible=False):
            stt_input = gr.Textbox()
            stt_output = gr.JSON()
            ai_input = gr.Textbox()
            ai_output = gr.Textbox()
        
        stt_btn = gr.Button("STT", visible=False)
        stt_btn.click(
            fn=transcribe_audio_base64,
            inputs=[stt_input],
            outputs=[stt_output],
            api_name="transcribe_stt"
        )
        
        ai_btn = gr.Button("AI", visible=False)
        ai_btn.click(
            fn=generate_answer,
            inputs=[ai_input],
            outputs=[ai_output],
            api_name="answer_ai"
        )
    
    gr.Markdown("""
    ---
    **Model:** Qwen2.5-1.5B-Instruct (3x larger for better answers)  
    **Output:** 150-200 words with context and background  
    **Date-aware:** Responses reference current date ({})  
    
    🟢 = Under 5s | 🟡 = 5-7s | 🔴 = Over 7s
    """.format(datetime.now().strftime("%B %d, %Y")))

if __name__ == "__main__":
    demo.queue(max_size=5)
    demo.launch()