Spaces:
Running
Running
| import torch | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| import os | |
| class HumanizerEncoder: | |
| def __init__(self, model_path, device): | |
| self.device = device | |
| self.model = None | |
| self.tokenizer = None | |
| if not os.path.exists(model_path): | |
| print("[HUMANIZER] No pretrained weights found") | |
| return | |
| # ===== Initialize T5 tokenizer & model ===== | |
| try: | |
| self.tokenizer = T5Tokenizer.from_pretrained("t5-small") | |
| # Load model from checkpoint if it's a state_dict | |
| checkpoint = torch.load(model_path, map_location=device) | |
| if isinstance(checkpoint, dict): | |
| self.model = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| self.model.load_state_dict(checkpoint, strict=False) | |
| else: | |
| self.model = checkpoint # full model saved | |
| self.model = self.model.to(device) | |
| self.model.eval() | |
| print("[HUMANIZER] Humanizer loaded →", model_path) | |
| except Exception as e: | |
| print("[HUMANIZER] Failed to load model:", e) | |
| self.model = None | |
| # ===== Real text rewriting ===== | |
| def generate(self, text: str, max_length=64): | |
| if self.model is None or self.tokenizer is None: | |
| return text # fallback: just return original text | |
| # Tokenize input | |
| inputs = self.tokenizer( | |
| text, | |
| return_tensors="pt", | |
| max_length=max_length, | |
| truncation=True | |
| ).to(self.device) | |
| # Generate output | |
| with torch.no_grad(): | |
| output_ids = self.model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| num_beams=4, | |
| early_stopping=True | |
| ) | |
| # Decode generated text | |
| rewritten_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| return rewritten_text |