Update README.md
Browse files
README.md
CHANGED
|
@@ -41,6 +41,28 @@ generated_ids = model.generate(
|
|
| 41 |
input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
|
| 42 |
)[0]
|
| 43 |
decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
print(decoded)
|
| 45 |
```
|
| 46 |
|
|
|
|
| 41 |
input_ids, attention_mask=attention_mask, max_new_tokens=286, min_new_tokens= 120,decoder_start_token_id=model.config.decoder_start_token_id, num_beams=5, early_stopping=True, max_length=None
|
| 42 |
)[0]
|
| 43 |
decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
|
| 44 |
+
|
| 45 |
+
# step 4: Postprocess output
|
| 46 |
+
# Remove extra <pad> tokens
|
| 47 |
+
decoded = decoded.replace("<pad>", "").strip()
|
| 48 |
+
|
| 49 |
+
# Split into sections based on known headers or patterns
|
| 50 |
+
sections = ["History:", "Technique:", "Comparison:", "Findings:", "Impression:"]
|
| 51 |
+
organs = ['Lungs and Airways:', 'Musculoskeletal and Chest Wall:','Cardiovascular:','Tubes, Catheters, and Support Devices:','Abdominal:','Pleura:','Other:','Hila and Mediastinum:']
|
| 52 |
+
for section in sections:
|
| 53 |
+
decoded = decoded.replace(section, f"\n{section}")
|
| 54 |
+
for organ in organs:
|
| 55 |
+
try:
|
| 56 |
+
decoded = decoded.replace(organ, f"\n{organ}")
|
| 57 |
+
except:
|
| 58 |
+
continue
|
| 59 |
+
# Ensure newlines after colons and before bullet points
|
| 60 |
+
decoded = decoded.replace("- ", "\n- ")
|
| 61 |
+
# Ensure newlines before numbers
|
| 62 |
+
for i in range(1, 8):
|
| 63 |
+
decoded = decoded.replace(f"{i}.", f"\n{i}.")
|
| 64 |
+
# Remove any leading or trailing whitespace
|
| 65 |
+
decoded = decoded.strip()
|
| 66 |
print(decoded)
|
| 67 |
```
|
| 68 |
|