""" PIOE Credibility Scorer Evaluates trustworthiness of sources and authors. """ from ..models import SourceType class CredibilityScorer: """ Scores credibility based on source type, author history, and content signals. """ # Base credibility scores by source type SOURCE_CREDIBILITY = { SourceType.ARXIV: 0.95, # Academic papers - highest trust SourceType.GITHUB: 0.8, # Open source - high trust SourceType.RSS: 0.7, # Varies by feed SourceType.SUPERTEAM: 0.85, # Official platform SourceType.REDDIT: 0.5, # Community - variable SourceType.TWITTER: 0.4, # Social - requires filtering SourceType.LINKEDIN: 0.6, # Professional but noisy SourceType.WEB_SCRAPE: 0.5, # Unknown quality } def __init__(self): pass def score_source(self, source_type: SourceType) -> float: """Get base credibility score for source type.""" return self.SOURCE_CREDIBILITY.get(source_type, 0.5) def score_content_signals(self, text: str, metadata: dict = None) -> dict: """ Evaluate content signals that indicate credibility. Returns individual signal scores. """ metadata = metadata or {} signals = {} text_lower = text.lower() if text else "" # Has deadline (official announcements usually have deadlines) signals["has_deadline"] = 1.0 if metadata.get("deadline") or \ any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0 # Has organization/institution signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5 # Contains action URL signals["has_action_url"] = 1.0 if metadata.get("url") or \ any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0 # Is first announcement (not a repost) signals["is_original"] = 0.0 if any(kw in text_lower for kw in [ "repost", "sharing", "fyi", "icymi", "in case you missed" ]) else 1.0 # Has specific requirements (detailed = more credible) signals["has_requirements"] = 1.0 if metadata.get("requirements") or \ any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0 return signals def calculate_signal_strength(self, signals: dict) -> float: """ Calculate overall signal strength from content signals. High signal strength = actionable, official, time-sensitive. """ weights = { "has_deadline": 0.3, "has_organization": 0.2, "has_action_url": 0.2, "is_original": 0.2, "has_requirements": 0.1 } total = sum(signals.get(k, 0) * w for k, w in weights.items()) return round(total, 3) def score( self, source_type: SourceType, text: str = "", metadata: dict = None, author_credibility: float = 0.5, social_engagement: int = 0 ) -> dict: """ Calculate comprehensive credibility score. Returns dict with: - source_score: Base source credibility - signal_strength: Content actionability - credibility_score: Combined score """ source_score = self.score_source(source_type) content_signals = self.score_content_signals(text, metadata) signal_strength = self.calculate_signal_strength(content_signals) # Social engagement boost (for social sources) engagement_boost = 0.0 if source_type in [SourceType.REDDIT, SourceType.TWITTER]: if social_engagement > 100: engagement_boost = 0.15 elif social_engagement > 50: engagement_boost = 0.1 elif social_engagement > 20: engagement_boost = 0.05 # Combined credibility: # 50% source, 30% signals, 10% author, 10% engagement credibility_score = ( 0.5 * source_score + 0.3 * signal_strength + 0.1 * author_credibility + 0.1 * min(engagement_boost + 0.5, 1.0) ) return { "source_score": round(source_score, 3), "signal_strength": signal_strength, "signals": content_signals, "credibility_score": round(credibility_score, 3) }