|
|
|
|
|
""" |
|
|
Quick test of the trained Prothom Alo model |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
def test_model(): |
|
|
"""Test the fine-tuned model""" |
|
|
|
|
|
print("π Testing Prothom Alo Fine-tuned Model") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
model_path = "./prothomalo_model/final_model" |
|
|
print(f"Loading model from: {model_path}") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_path) |
|
|
|
|
|
|
|
|
prompts = [ |
|
|
"The latest news from Bangladesh", |
|
|
"In today's opinion piece", |
|
|
"Government announces new policy" |
|
|
] |
|
|
|
|
|
for i, prompt in enumerate(prompts, 1): |
|
|
print(f"\nπ§ͺ Test {i}: {prompt}") |
|
|
print("-" * 40) |
|
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_length=150, |
|
|
num_return_sequences=1, |
|
|
do_sample=True, |
|
|
temperature=0.8, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"Generated: {generated_text}") |
|
|
|
|
|
|
|
|
print(f"\nπ Testing Safetensors Format") |
|
|
print("-" * 40) |
|
|
|
|
|
try: |
|
|
from safetensors import safe_open |
|
|
|
|
|
with safe_open("./prothomalo_model.safetensors", framework="pt", device=0) as f: |
|
|
keys = list(f.keys()) |
|
|
print(f"β
Safetensors loaded successfully!") |
|
|
print(f"π Contains {len(keys)} tensors") |
|
|
print(f"π First 3 tensor names:") |
|
|
for key in keys[:3]: |
|
|
print(f" - {key}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Safetensors test failed: {e}") |
|
|
|
|
|
print(f"\nπ Model testing completed!") |
|
|
return True |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_model() |
|
|
|