kenlkehl commited on
Commit
2a8c032
Β·
verified Β·
1 Parent(s): 64bfa37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -276,7 +276,7 @@ def load_tagger_model(model_path: str) -> Tuple[str, str]:
276
  device=0 if state.device == "cuda" else -1,
277
  truncation=True,
278
  padding="max_length",
279
- max_length=128
280
  )
281
  return f"βœ“ Tagger model loaded from {model_path}", ""
282
  except Exception as e:
@@ -336,8 +336,8 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
- gpu_memory_utilization=0.50,
340
- max_model_len=15000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
343
  return f"βœ“ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
@@ -679,7 +679,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
679
  SamplingParams(
680
  temperature=0.0,
681
  top_k=1,
682
- max_tokens=4000,
683
  repetition_penalty=1.2
684
  )
685
  )
@@ -696,7 +696,7 @@ Now, write your summary. Do not add preceding text before the abstraction, and d
696
  with torch.no_grad():
697
  outputs = state.llm_model.generate(
698
  input_ids,
699
- max_new_tokens=4000,
700
  temperature=0.00,
701
  do_sample=True,
702
  repetition_penalty=1.2
@@ -774,7 +774,7 @@ def extract_trial_spaces(trial_text: str) -> str:
774
  SamplingParams(
775
  temperature=0.0,
776
  top_k=1,
777
- max_tokens=4000,
778
  repetition_penalty=1.3
779
  )
780
  )
@@ -791,7 +791,7 @@ def extract_trial_spaces(trial_text: str) -> str:
791
  with torch.no_grad():
792
  outputs = state.llm_model.generate(
793
  input_ids,
794
- max_new_tokens=1500,
795
  temperature=0.0,
796
  do_sample=False,
797
  repetition_penalty=1.3
@@ -936,9 +936,19 @@ def get_trial_details(df: pd.DataFrame, evt: gr.SelectData) -> str:
936
 
937
  row_idx = evt.index[0]
938
  nct_id = df.iloc[row_idx]['nct_id']
 
939
 
940
- # Find in original dataframe
941
- trial_row = state.trial_spaces_df[state.trial_spaces_df['nct_id'] == nct_id].iloc[0]
 
 
 
 
 
 
 
 
 
942
 
943
  # Create clinicaltrials.gov link
944
  ct_gov_link = f"https://clinicaltrials.gov/study/{nct_id}"
@@ -950,7 +960,7 @@ def get_trial_details(df: pd.DataFrame, evt: gr.SelectData) -> str:
950
 
951
  ---
952
 
953
- ## Eligibility Criteria Summary
954
  {trial_row['this_space']}
955
 
956
  ## Full Trial Text
 
276
  device=0 if state.device == "cuda" else -1,
277
  truncation=True,
278
  padding="max_length",
279
+ max_length=512
280
  )
281
  return f"βœ“ Tagger model loaded from {model_path}", ""
282
  except Exception as e:
 
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
+ gpu_memory_utilization=0.20,
340
+ max_model_len=10000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
343
  return f"βœ“ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
 
679
  SamplingParams(
680
  temperature=0.0,
681
  top_k=1,
682
+ max_tokens=7500,
683
  repetition_penalty=1.2
684
  )
685
  )
 
696
  with torch.no_grad():
697
  outputs = state.llm_model.generate(
698
  input_ids,
699
+ max_new_tokens=7500,
700
  temperature=0.00,
701
  do_sample=True,
702
  repetition_penalty=1.2
 
774
  SamplingParams(
775
  temperature=0.0,
776
  top_k=1,
777
+ max_tokens=7500,
778
  repetition_penalty=1.3
779
  )
780
  )
 
791
  with torch.no_grad():
792
  outputs = state.llm_model.generate(
793
  input_ids,
794
+ max_new_tokens=7500,
795
  temperature=0.0,
796
  do_sample=False,
797
  repetition_penalty=1.3
 
936
 
937
  row_idx = evt.index[0]
938
  nct_id = df.iloc[row_idx]['nct_id']
939
+ this_space = df.iloc[row_idx]['this_space']
940
 
941
+ # Find the specific trial space in original dataframe
942
+ # Match both NCT ID and the exact trial space text
943
+ matching_rows = state.trial_spaces_df[
944
+ (state.trial_spaces_df['nct_id'] == nct_id) &
945
+ (state.trial_spaces_df['this_space'] == this_space)
946
+ ]
947
+
948
+ if len(matching_rows) == 0:
949
+ return f"Error: Could not find matching trial space for {nct_id}"
950
+
951
+ trial_row = matching_rows.iloc[0]
952
 
953
  # Create clinicaltrials.gov link
954
  ct_gov_link = f"https://clinicaltrials.gov/study/{nct_id}"
 
960
 
961
  ---
962
 
963
+ ## Eligibility Criteria Summary (Selected Space)
964
  {trial_row['this_space']}
965
 
966
  ## Full Trial Text