Spaces:
Sleeping
Sleeping
1. Added memory management
Browse files2. Limited input text length
3. Limited response length
4. Added batch size control
5. Added thread limiting
- src/app.py +17 -4
src/app.py
CHANGED
|
@@ -206,7 +206,12 @@ llm = LlamaCPP(
|
|
| 206 |
model_path=model_path,
|
| 207 |
temperature=0.7,
|
| 208 |
max_new_tokens=256,
|
| 209 |
-
context_window=2048
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
)
|
| 211 |
print("LLM initialized successfully")
|
| 212 |
|
|
@@ -556,31 +561,39 @@ with gr.Blocks(
|
|
| 556 |
)
|
| 557 |
|
| 558 |
def process_text_input(text, history):
|
| 559 |
-
"""Process text input
|
| 560 |
if not text:
|
| 561 |
return history
|
| 562 |
|
| 563 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
# Process the symptoms
|
| 565 |
diagnosis_query = f"""
|
| 566 |
Based on these symptoms: '{text}'
|
| 567 |
Provide relevant ICD-10 codes and diagnostic questions.
|
| 568 |
Focus on clinical implications.
|
|
|
|
| 569 |
"""
|
| 570 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
|
|
|
|
|
|
|
|
|
| 571 |
|
| 572 |
-
# Format and return chat messages
|
| 573 |
return history + [
|
| 574 |
{"role": "user", "content": text},
|
| 575 |
{"role": "assistant", "content": format_response_for_user({
|
| 576 |
"diagnoses": [],
|
| 577 |
"confidences": [],
|
| 578 |
-
"follow_up": str(response)
|
| 579 |
})}
|
| 580 |
]
|
| 581 |
|
| 582 |
except Exception as e:
|
| 583 |
print(f"Text processing error: {str(e)}")
|
|
|
|
| 584 |
return history
|
| 585 |
|
| 586 |
submit_btn.click(
|
|
|
|
| 206 |
model_path=model_path,
|
| 207 |
temperature=0.7,
|
| 208 |
max_new_tokens=256,
|
| 209 |
+
context_window=2048,
|
| 210 |
+
n_batch=512, # Added batch size limit
|
| 211 |
+
n_ctx=2048, # Explicit context window
|
| 212 |
+
verbose=False, # Reduce logging
|
| 213 |
+
n_threads=4, # Limit threads
|
| 214 |
+
last_n_tokens_size=256 # Limit token history
|
| 215 |
)
|
| 216 |
print("LLM initialized successfully")
|
| 217 |
|
|
|
|
| 561 |
)
|
| 562 |
|
| 563 |
def process_text_input(text, history):
|
| 564 |
+
"""Process text input with memory management."""
|
| 565 |
if not text:
|
| 566 |
return history
|
| 567 |
|
| 568 |
try:
|
| 569 |
+
# Limit input length
|
| 570 |
+
if len(text) > 500:
|
| 571 |
+
text = text[:500] + "..."
|
| 572 |
+
|
| 573 |
# Process the symptoms
|
| 574 |
diagnosis_query = f"""
|
| 575 |
Based on these symptoms: '{text}'
|
| 576 |
Provide relevant ICD-10 codes and diagnostic questions.
|
| 577 |
Focus on clinical implications.
|
| 578 |
+
Limit response to 1000 characters.
|
| 579 |
"""
|
| 580 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
| 581 |
+
|
| 582 |
+
# Clean up memory
|
| 583 |
+
cleanup_memory()
|
| 584 |
|
|
|
|
| 585 |
return history + [
|
| 586 |
{"role": "user", "content": text},
|
| 587 |
{"role": "assistant", "content": format_response_for_user({
|
| 588 |
"diagnoses": [],
|
| 589 |
"confidences": [],
|
| 590 |
+
"follow_up": str(response)[:1000] # Limit response length
|
| 591 |
})}
|
| 592 |
]
|
| 593 |
|
| 594 |
except Exception as e:
|
| 595 |
print(f"Text processing error: {str(e)}")
|
| 596 |
+
cleanup_memory()
|
| 597 |
return history
|
| 598 |
|
| 599 |
submit_btn.click(
|