kenlkehl commited on
Commit
c4d8a31
·
verified ·
1 Parent(s): b209fb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -336,7 +336,7 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
- gpu_memory_utilization=0.15,
340
  max_model_len=5000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
@@ -679,7 +679,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
679
  SamplingParams(
680
  temperature=0.0,
681
  top_k=1,
682
- max_tokens=7500,
683
  repetition_penalty=1.2
684
  )
685
  )
@@ -696,7 +696,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
696
  with torch.no_grad():
697
  outputs = state.llm_model.generate(
698
  input_ids,
699
- max_new_tokens=7500,
700
  temperature=0.00,
701
  do_sample=True,
702
  repetition_penalty=1.2
@@ -774,7 +774,7 @@ def extract_trial_spaces(trial_text: str) -> str:
774
  SamplingParams(
775
  temperature=0.0,
776
  top_k=1,
777
- max_tokens=7500,
778
  repetition_penalty=1.3
779
  )
780
  )
@@ -791,7 +791,7 @@ def extract_trial_spaces(trial_text: str) -> str:
791
  with torch.no_grad():
792
  outputs = state.llm_model.generate(
793
  input_ids,
794
- max_new_tokens=7500,
795
  temperature=0.0,
796
  do_sample=False,
797
  repetition_penalty=1.3
 
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
+ gpu_memory_utilization=0.40,
340
  max_model_len=5000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
 
679
  SamplingParams(
680
  temperature=0.0,
681
  top_k=1,
682
+ max_tokens=1500,
683
  repetition_penalty=1.2
684
  )
685
  )
 
696
  with torch.no_grad():
697
  outputs = state.llm_model.generate(
698
  input_ids,
699
+ max_new_tokens=1500,
700
  temperature=0.00,
701
  do_sample=True,
702
  repetition_penalty=1.2
 
774
  SamplingParams(
775
  temperature=0.0,
776
  top_k=1,
777
+ max_tokens=1500,
778
  repetition_penalty=1.3
779
  )
780
  )
 
791
  with torch.no_grad():
792
  outputs = state.llm_model.generate(
793
  input_ids,
794
+ max_new_tokens=1500,
795
  temperature=0.0,
796
  do_sample=False,
797
  repetition_penalty=1.3