Update app.py
Browse files
app.py
CHANGED
|
@@ -128,35 +128,12 @@ def generate_answer(text_input):
|
|
| 128 |
if not text_input or text_input.strip() == "":
|
| 129 |
return "No input provided"
|
| 130 |
|
| 131 |
-
#
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
messages = [
|
| 136 |
-
{"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
|
| 137 |
-
{"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
|
| 138 |
-
]
|
| 139 |
|
| 140 |
-
|
| 141 |
-
messages,
|
| 142 |
-
tokenize=False,
|
| 143 |
-
add_generation_prompt=True
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
-
inputs = tokenizer([text], return_tensors="pt").to("cpu")
|
| 147 |
-
|
| 148 |
-
with torch.no_grad():
|
| 149 |
-
outputs = model.generate(
|
| 150 |
-
**inputs,
|
| 151 |
-
max_new_tokens=80,
|
| 152 |
-
temperature=0.2,
|
| 153 |
-
do_sample=True,
|
| 154 |
-
top_p=0.85,
|
| 155 |
-
pad_token_id=tokenizer.eos_token_id
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 159 |
-
return response.strip()
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
return f"Error: {str(e)}"
|
|
@@ -255,26 +232,26 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
|
|
| 255 |
inputs=text_input
|
| 256 |
)
|
| 257 |
|
| 258 |
-
# API endpoints for Pluely
|
| 259 |
with gr.Tab("🔌 Pluely Integration"):
|
| 260 |
gr.Markdown("""
|
| 261 |
## Dedicated Endpoints for Pluely
|
| 262 |
|
| 263 |
-
### 1. STT Endpoint (Audio Transcription)
|
| 264 |
```
|
| 265 |
curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
|
| 266 |
-H "Content-Type: application/json" \\
|
| 267 |
-d '{"data": ["BASE64_AUDIO_DATA"]}'
|
| 268 |
```
|
| 269 |
-
**
|
| 270 |
|
| 271 |
-
### 2. AI Endpoint
|
| 272 |
```
|
| 273 |
curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai_stream \\
|
| 274 |
-H "Content-Type: application/json" \\
|
| 275 |
-d '{"data": ["Your question here"]}'
|
| 276 |
```
|
| 277 |
-
**
|
| 278 |
|
| 279 |
---
|
| 280 |
|
|
@@ -283,29 +260,43 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
|
|
| 283 |
### Custom STT Provider:
|
| 284 |
**Curl Command:**
|
| 285 |
```
|
| 286 |
-
curl
|
| 287 |
-
--header 'Content-Type: application/json' \\
|
| 288 |
-
--data '{"data": ["{{AUDIO_BASE64}}"]}'
|
| 289 |
```
|
| 290 |
**Response Content Path:** `data[0].text`
|
| 291 |
-
**Streaming:** OFF
|
| 292 |
|
| 293 |
### Custom AI Provider (Streaming):
|
| 294 |
**Curl Command:**
|
| 295 |
```
|
| 296 |
-
curl
|
| 297 |
-
--header 'Content-Type: application/json' \\
|
| 298 |
-
--data '{"data": ["{{TEXT}}"]}'
|
| 299 |
```
|
| 300 |
-
**Response Content Path:**
|
| 301 |
-
**Streaming:**
|
| 302 |
-
|
| 303 |
-
### Benefits:
|
| 304 |
-
- ⚡ Instant feedback as answer generates
|
| 305 |
-
- 🎯 Better user experience - see words appear in real-time
|
| 306 |
-
- ⏱️ Perceived latency reduced by 50%+
|
| 307 |
-
- 🔄 No actual performance penalty
|
| 308 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
gr.Markdown("""
|
| 311 |
---
|
|
@@ -314,25 +305,6 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
|
|
| 314 |
**Streaming Mode:** Words appear as they're generated - much faster perceived response!
|
| 315 |
""")
|
| 316 |
|
| 317 |
-
# Register API endpoints
|
| 318 |
-
@demo.api(api_name="transcribe_stt")
|
| 319 |
-
def api_transcribe(audio_base64: str):
|
| 320 |
-
"""API endpoint for audio transcription (Pluely STT) - Non-streaming"""
|
| 321 |
-
result = transcribe_audio_base64(audio_base64)
|
| 322 |
-
return result
|
| 323 |
-
|
| 324 |
-
@demo.api(api_name="answer_ai")
|
| 325 |
-
def api_answer(text: str):
|
| 326 |
-
"""API endpoint for text-to-answer (Pluely AI) - Non-streaming fallback"""
|
| 327 |
-
answer = generate_answer(text)
|
| 328 |
-
return answer
|
| 329 |
-
|
| 330 |
-
@demo.api(api_name="answer_ai_stream")
|
| 331 |
-
def api_answer_stream(text: str):
|
| 332 |
-
"""API endpoint for streaming text-to-answer (Pluely AI) - Streaming enabled"""
|
| 333 |
-
for chunk in generate_answer_stream(text):
|
| 334 |
-
yield chunk
|
| 335 |
-
|
| 336 |
if __name__ == "__main__":
|
| 337 |
demo.queue(max_size=5)
|
| 338 |
demo.launch()
|
|
|
|
| 128 |
if not text_input or text_input.strip() == "":
|
| 129 |
return "No input provided"
|
| 130 |
|
| 131 |
+
# Get the last chunk from streaming
|
| 132 |
+
final_answer = ""
|
| 133 |
+
for chunk in generate_answer_stream(text_input):
|
| 134 |
+
final_answer = chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
return final_answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
except Exception as e:
|
| 139 |
return f"Error: {str(e)}"
|
|
|
|
| 232 |
inputs=text_input
|
| 233 |
)
|
| 234 |
|
| 235 |
+
# API endpoints for Pluely - Using Gradio's built-in interface functions
|
| 236 |
with gr.Tab("🔌 Pluely Integration"):
|
| 237 |
gr.Markdown("""
|
| 238 |
## Dedicated Endpoints for Pluely
|
| 239 |
|
| 240 |
+
### 1. STT Endpoint (Audio Transcription)
|
| 241 |
```
|
| 242 |
curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
|
| 243 |
-H "Content-Type: application/json" \\
|
| 244 |
-d '{"data": ["BASE64_AUDIO_DATA"]}'
|
| 245 |
```
|
| 246 |
+
**Response Format:** `{"data": [{"text": "transcribed text"}]}`
|
| 247 |
|
| 248 |
+
### 2. AI Endpoint - Streaming
|
| 249 |
```
|
| 250 |
curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai_stream \\
|
| 251 |
-H "Content-Type: application/json" \\
|
| 252 |
-d '{"data": ["Your question here"]}'
|
| 253 |
```
|
| 254 |
+
**Response Format:** Streaming text chunks
|
| 255 |
|
| 256 |
---
|
| 257 |
|
|
|
|
| 260 |
### Custom STT Provider:
|
| 261 |
**Curl Command:**
|
| 262 |
```
|
| 263 |
+
curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
|
|
|
|
|
|
|
| 264 |
```
|
| 265 |
**Response Content Path:** `data[0].text`
|
| 266 |
+
**Streaming:** OFF
|
| 267 |
|
| 268 |
### Custom AI Provider (Streaming):
|
| 269 |
**Curl Command:**
|
| 270 |
```
|
| 271 |
+
curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
|
|
|
|
|
|
|
| 272 |
```
|
| 273 |
+
**Response Content Path:** `data`
|
| 274 |
+
**Streaming:** ON ✅
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
""")
|
| 276 |
+
|
| 277 |
+
# Hidden interface components that create API endpoints
|
| 278 |
+
with gr.Row(visible=False):
|
| 279 |
+
stt_input = gr.Textbox()
|
| 280 |
+
stt_output = gr.JSON()
|
| 281 |
+
ai_input = gr.Textbox()
|
| 282 |
+
ai_output = gr.Textbox()
|
| 283 |
+
|
| 284 |
+
# These create the /call/transcribe_stt and /call/answer_ai_stream endpoints
|
| 285 |
+
stt_button = gr.Button("STT", visible=False)
|
| 286 |
+
stt_button.click(
|
| 287 |
+
fn=transcribe_audio_base64,
|
| 288 |
+
inputs=[stt_input],
|
| 289 |
+
outputs=[stt_output],
|
| 290 |
+
api_name="transcribe_stt"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
ai_button = gr.Button("AI", visible=False)
|
| 294 |
+
ai_button.click(
|
| 295 |
+
fn=generate_answer_stream,
|
| 296 |
+
inputs=[ai_input],
|
| 297 |
+
outputs=[ai_output],
|
| 298 |
+
api_name="answer_ai_stream"
|
| 299 |
+
)
|
| 300 |
|
| 301 |
gr.Markdown("""
|
| 302 |
---
|
|
|
|
| 305 |
**Streaming Mode:** Words appear as they're generated - much faster perceived response!
|
| 306 |
""")
|
| 307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
if __name__ == "__main__":
|
| 309 |
demo.queue(max_size=5)
|
| 310 |
demo.launch()
|