Spaces:

kenlkehl
/

mm-ai-demo

Sleeping

App Files Files Community

kenlkehl commited on Nov 3, 2025

Commit

c4d8a31

verified ·

1 Parent(s): b209fb2

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -336,7 +336,7 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
             state.llm_model = LLM(
                 model=model_path,
                 tensor_parallel_size=tp_size,
-                gpu_memory_utilization=0.15,
                 max_model_len=5000
             )
             state.llm_tokenizer = state.llm_model.get_tokenizer()
@@ -679,7 +679,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
                 SamplingParams(
                     temperature=0.0,
                     top_k=1,
-                    max_tokens=7500,
                     repetition_penalty=1.2
                 )
             )
@@ -696,7 +696,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
             with torch.no_grad():
                 outputs = state.llm_model.generate(
                     input_ids,
-                    max_new_tokens=7500,
                     temperature=0.00,
                     do_sample=True,
                     repetition_penalty=1.2
@@ -774,7 +774,7 @@ def extract_trial_spaces(trial_text: str) -> str:
                 SamplingParams(
                     temperature=0.0,
                     top_k=1,
-                    max_tokens=7500,
                     repetition_penalty=1.3
                 )
             )
@@ -791,7 +791,7 @@ def extract_trial_spaces(trial_text: str) -> str:
             with torch.no_grad():
                 outputs = state.llm_model.generate(
                     input_ids,
-                    max_new_tokens=7500,
                     temperature=0.0,
                     do_sample=False,
                     repetition_penalty=1.3

             state.llm_model = LLM(
                 model=model_path,
                 tensor_parallel_size=tp_size,
+                gpu_memory_utilization=0.40,
                 max_model_len=5000
             )
             state.llm_tokenizer = state.llm_model.get_tokenizer()
                 SamplingParams(
                     temperature=0.0,
                     top_k=1,
+                    max_tokens=1500,
                     repetition_penalty=1.2
                 )
             )
             with torch.no_grad():
                 outputs = state.llm_model.generate(
                     input_ids,
+                    max_new_tokens=1500,
                     temperature=0.00,
                     do_sample=True,
                     repetition_penalty=1.2
                 SamplingParams(
                     temperature=0.0,
                     top_k=1,
+                    max_tokens=1500,
                     repetition_penalty=1.3
                 )
             )
             with torch.no_grad():
                 outputs = state.llm_model.generate(
                     input_ids,
+                    max_new_tokens=1500,
                     temperature=0.0,
                     do_sample=False,
                     repetition_penalty=1.3