jomoll commited on
Commit
6f7793d
·
verified ·
1 Parent(s): 460fce8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -0
README.md CHANGED
@@ -41,6 +41,28 @@ generated_ids = model.generate(
41
  input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
42
  )[0]
43
  decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  print(decoded)
45
  ```
46
 
 
41
  input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
42
  )[0]
43
  decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
44
+
45
+ # step 4: Postprocess output
46
+ # Remove extra <pad> tokens
47
+ decoded = decoded.replace("<pad>", "").strip()
48
+
49
+ # Split into sections based on known headers or patterns
50
+ sections = ["History:", "Technique:", "Comparison:", "Findings:", "Impression:"]
51
+ organs = ['Lungs and Airways:', 'Musculoskeletal and Chest Wall:','Cardiovascular:','Tubes, Catheters, and Support Devices:','Abdominal:','Pleura:','Other:','Hila and Mediastinum:']
52
+ for section in sections:
53
+ decoded = decoded.replace(section, f"\n{section}")
54
+ for organ in organs:
55
+ try:
56
+ decoded = decoded.replace(organ, f"\n{organ}")
57
+ except:
58
+ continue
59
+ # Ensure newlines after colons and before bullet points
60
+ decoded = decoded.replace("- ", "\n- ")
61
+ # Ensure newlines before numbers
62
+ for i in range(1, 8):
63
+ decoded = decoded.replace(f"{i}.", f"\n{i}.")
64
+ # Remove any leading or trailing whitespace
65
+ decoded = decoded.strip()
66
  print(decoded)
67
  ```
68