StanfordAIMI
/

SRR-T5-Base

text2text-generation

text-generation-inference

Model card Files Files and versions

jomoll commited on Jun 4, 2025

Commit

6f7793d

·

verified ·

1 Parent(s): 460fce8

Update README.md

Files changed (1) hide show

README.md +22 -0

README.md CHANGED Viewed

@@ -41,6 +41,28 @@ generated_ids = model.generate(
     input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
     )[0]
 decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
 print(decoded)
 ```

     input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
     )[0]
 decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
+# step 4: Postprocess output
+# Remove extra <pad> tokens
+decoded = decoded.replace("<pad>", "").strip()
+# Split into sections based on known headers or patterns
+sections = ["History:", "Technique:", "Comparison:", "Findings:", "Impression:"]
+organs = ['Lungs and Airways:', 'Musculoskeletal and Chest Wall:','Cardiovascular:','Tubes, Catheters, and Support Devices:','Abdominal:','Pleura:','Other:','Hila and Mediastinum:']
+for section in sections:
+    decoded = decoded.replace(section, f"\n{section}")
+for organ in organs:
+    try:
+        decoded = decoded.replace(organ, f"\n{organ}")
+    except:
+        continue
+# Ensure newlines after colons and before bullet points
+decoded = decoded.replace("- ", "\n- ")
+# Ensure newlines before numbers
+for i in range(1, 8):
+    decoded = decoded.replace(f"{i}.", f"\n{i}.")
+# Remove any leading or trailing whitespace
+decoded = decoded.strip()
 print(decoded)
 ```