| """ |
| Authorship verification module. |
| Uses a fine-tuned model to verify whether the corrected output |
| could plausibly have been written by the same author as the input. |
| Target: > 0.80 same-author probability. |
| """ |
|
|
| from typing import Tuple |
| from loguru import logger |
| import torch |
| import torch.nn.functional as F |
|
|
|
|
| class AuthorshipVerifier: |
| """Verifies authorship consistency between input and output text.""" |
|
|
| def __init__(self, model_name: str = "roberta-base"): |
| try: |
| from sentence_transformers import SentenceTransformer |
| self.model = SentenceTransformer(model_name, device="cpu") |
| logger.info(f"AuthorshipVerifier loaded with {model_name}") |
| except Exception as e: |
| logger.warning(f"Failed to load authorship model: {e}") |
| self.model = None |
|
|
| def verify(self, text_a: str, text_b: str) -> float: |
| """Return probability that both texts were written by the same author. |
| |
| Uses sentence embedding similarity as a proxy for authorship. |
| Higher cosine similarity suggests same author. |
| """ |
| if self.model is None: |
| return 0.5 |
|
|
| if not text_a or not text_b: |
| return 0.5 |
|
|
| try: |
| embeddings = self.model.encode([text_a, text_b], convert_to_tensor=True) |
| sim = F.cosine_similarity( |
| embeddings[0].unsqueeze(0), |
| embeddings[1].unsqueeze(0), |
| ) |
| |
| |
| prob = (sim.item() + 1.0) / 2.0 |
| return prob |
| except Exception as e: |
| logger.warning(f"Authorship verification failed: {e}") |
| return 0.5 |
|
|