Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -249,23 +249,62 @@ def generate_qwen3(prompt: str) -> (str, str):
|
|
| 249 |
else:
|
| 250 |
return "", generated_text.strip()
|
| 251 |
|
| 252 |
-
def generate_qwen3_gguf(prompt: str) -> (str, str):
|
| 253 |
messages = [
|
| 254 |
{"role": "user", "content": prompt}
|
| 255 |
]
|
| 256 |
-
# Set max_tokens or max_new_tokens to keep total tokens <= 512
|
| 257 |
response = qwen3_gguf_llm.create_chat_completion(
|
| 258 |
messages=messages,
|
| 259 |
-
max_tokens=
|
| 260 |
)
|
| 261 |
generated_text = response['choices'][0]['message']['content']
|
| 262 |
-
|
| 263 |
if "</think>" in generated_text:
|
| 264 |
reasoning_content, content = generated_text.split("</think>", 1)
|
| 265 |
return reasoning_content.strip() + "</think>", content.strip()
|
| 266 |
else:
|
| 267 |
return "", generated_text.strip()
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
|
| 271 |
|
|
|
|
| 249 |
else:
|
| 250 |
return "", generated_text.strip()
|
| 251 |
|
| 252 |
+
def generate_qwen3_gguf(prompt: str, max_tokens: int = 256) -> (str, str):
|
| 253 |
messages = [
|
| 254 |
{"role": "user", "content": prompt}
|
| 255 |
]
|
|
|
|
| 256 |
response = qwen3_gguf_llm.create_chat_completion(
|
| 257 |
messages=messages,
|
| 258 |
+
max_tokens=max_tokens,
|
| 259 |
)
|
| 260 |
generated_text = response['choices'][0]['message']['content']
|
|
|
|
| 261 |
if "</think>" in generated_text:
|
| 262 |
reasoning_content, content = generated_text.split("</think>", 1)
|
| 263 |
return reasoning_content.strip() + "</think>", content.strip()
|
| 264 |
else:
|
| 265 |
return "", generated_text.strip()
|
| 266 |
|
| 267 |
+
# --- New summarization endpoint ---
|
| 268 |
+
|
| 269 |
+
@app.post("/summarize_thread", response_model=SummarizeResponse)
|
| 270 |
+
async def summarize_thread(request: SummarizeRequest):
|
| 271 |
+
if request.task.lower() != "summarisation":
|
| 272 |
+
return JSONResponse(
|
| 273 |
+
status_code=400,
|
| 274 |
+
content={"error": "Unsupported task. Only 'summarisation' is supported."}
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
individual_summaries = {}
|
| 278 |
+
combined_reasonings = []
|
| 279 |
+
combined_summaries = []
|
| 280 |
+
|
| 281 |
+
# Summarize each reply individually
|
| 282 |
+
for idx, reply in enumerate(request.replies):
|
| 283 |
+
reasoning, summary = generate_qwen3_gguf(reply, max_tokens=256)
|
| 284 |
+
individual_summaries[idx] = {
|
| 285 |
+
"reasoning": reasoning,
|
| 286 |
+
"summary": summary
|
| 287 |
+
}
|
| 288 |
+
if reasoning:
|
| 289 |
+
combined_reasonings.append(reasoning)
|
| 290 |
+
combined_summaries.append(summary)
|
| 291 |
+
|
| 292 |
+
# Combine all individual summaries into one text
|
| 293 |
+
combined_summary_text = " ".join(combined_summaries)
|
| 294 |
+
|
| 295 |
+
# Recursively summarize combined summary if too long (optional)
|
| 296 |
+
# Here, we summarize combined summary to get final reasoning and summary
|
| 297 |
+
final_reasoning, final_summary = generate_qwen3_gguf(combined_summary_text, max_tokens=256)
|
| 298 |
+
|
| 299 |
+
# Append final reasoning to combined reasonings
|
| 300 |
+
if final_reasoning:
|
| 301 |
+
combined_reasonings.append(final_reasoning)
|
| 302 |
+
|
| 303 |
+
return SummarizeResponse(
|
| 304 |
+
individual_summaries=individual_summaries,
|
| 305 |
+
combined_reasoning="\n\n".join(combined_reasonings).strip(),
|
| 306 |
+
combined_summary=final_summary.strip()
|
| 307 |
+
)
|
| 308 |
|
| 309 |
|
| 310 |
|