Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Sleeping

App Files Files Community

gpaasch commited on Jun 8, 2025

Commit

75020f1

1 Parent(s): 21d0c24

1. Added memory management

Browse files

2. Limited input text length
3. Limited response length
4. Added batch size control
5. Added thread limiting

Files changed (1) hide show

src/app.py +17 -4

src/app.py CHANGED Viewed

@@ -206,7 +206,12 @@ llm = LlamaCPP(
     model_path=model_path,
     temperature=0.7,
     max_new_tokens=256,
-    context_window=2048
 )
 print("LLM initialized successfully")
@@ -556,31 +561,39 @@ with gr.Blocks(
     )
     def process_text_input(text, history):
-        """Process text input and generate response."""
         if not text:
             return history
         try:
             # Process the symptoms
             diagnosis_query = f"""
             Based on these symptoms: '{text}'
             Provide relevant ICD-10 codes and diagnostic questions.
             Focus on clinical implications.
             """
             response = symptom_index.as_query_engine().query(diagnosis_query)
-            # Format and return chat messages
             return history + [
                 {"role": "user", "content": text},
                 {"role": "assistant", "content": format_response_for_user({
                     "diagnoses": [],
                     "confidences": [],
-                    "follow_up": str(response)
                 })}
             ]
         except Exception as e:
             print(f"Text processing error: {str(e)}")
             return history
     submit_btn.click(

     model_path=model_path,
     temperature=0.7,
     max_new_tokens=256,
+    context_window=2048,
+    n_batch=512,      # Added batch size limit
+    n_ctx=2048,       # Explicit context window
+    verbose=False,    # Reduce logging
+    n_threads=4,      # Limit threads
+    last_n_tokens_size=256  # Limit token history
 )
 print("LLM initialized successfully")
     )
     def process_text_input(text, history):
+        """Process text input with memory management."""
         if not text:
             return history
         try:
+            # Limit input length
+            if len(text) > 500:
+                text = text[:500] + "..."
             # Process the symptoms
             diagnosis_query = f"""
             Based on these symptoms: '{text}'
             Provide relevant ICD-10 codes and diagnostic questions.
             Focus on clinical implications.
+            Limit response to 1000 characters.
             """
             response = symptom_index.as_query_engine().query(diagnosis_query)
+            # Clean up memory
+            cleanup_memory()
             return history + [
                 {"role": "user", "content": text},
                 {"role": "assistant", "content": format_response_for_user({
                     "diagnoses": [],
                     "confidences": [],
+                    "follow_up": str(response)[:1000]  # Limit response length
                 })}
             ]
         except Exception as e:
             print(f"Text processing error: {str(e)}")
+            cleanup_memory()
             return history
     submit_btn.click(