| """ |
| PIOE Credibility Scorer |
| |
| Evaluates trustworthiness of sources and authors. |
| """ |
| from ..models import SourceType |
|
|
|
|
| class CredibilityScorer: |
| """ |
| Scores credibility based on source type, author history, and content signals. |
| """ |
| |
| |
| SOURCE_CREDIBILITY = { |
| SourceType.ARXIV: 0.95, |
| SourceType.GITHUB: 0.8, |
| SourceType.RSS: 0.7, |
| SourceType.SUPERTEAM: 0.85, |
| SourceType.REDDIT: 0.5, |
| SourceType.TWITTER: 0.4, |
| SourceType.LINKEDIN: 0.6, |
| SourceType.WEB_SCRAPE: 0.5, |
| } |
| |
| def __init__(self): |
| pass |
| |
| def score_source(self, source_type: SourceType) -> float: |
| """Get base credibility score for source type.""" |
| return self.SOURCE_CREDIBILITY.get(source_type, 0.5) |
| |
| def score_content_signals(self, text: str, metadata: dict = None) -> dict: |
| """ |
| Evaluate content signals that indicate credibility. |
| Returns individual signal scores. |
| """ |
| metadata = metadata or {} |
| signals = {} |
| |
| text_lower = text.lower() if text else "" |
| |
| |
| signals["has_deadline"] = 1.0 if metadata.get("deadline") or \ |
| any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0 |
| |
| |
| signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5 |
| |
| |
| signals["has_action_url"] = 1.0 if metadata.get("url") or \ |
| any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0 |
| |
| |
| signals["is_original"] = 0.0 if any(kw in text_lower for kw in [ |
| "repost", "sharing", "fyi", "icymi", "in case you missed" |
| ]) else 1.0 |
| |
| |
| signals["has_requirements"] = 1.0 if metadata.get("requirements") or \ |
| any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0 |
| |
| return signals |
| |
| def calculate_signal_strength(self, signals: dict) -> float: |
| """ |
| Calculate overall signal strength from content signals. |
| High signal strength = actionable, official, time-sensitive. |
| """ |
| weights = { |
| "has_deadline": 0.3, |
| "has_organization": 0.2, |
| "has_action_url": 0.2, |
| "is_original": 0.2, |
| "has_requirements": 0.1 |
| } |
| |
| total = sum(signals.get(k, 0) * w for k, w in weights.items()) |
| return round(total, 3) |
| |
| def score( |
| self, |
| source_type: SourceType, |
| text: str = "", |
| metadata: dict = None, |
| author_credibility: float = 0.5, |
| social_engagement: int = 0 |
| ) -> dict: |
| """ |
| Calculate comprehensive credibility score. |
| |
| Returns dict with: |
| - source_score: Base source credibility |
| - signal_strength: Content actionability |
| - credibility_score: Combined score |
| """ |
| source_score = self.score_source(source_type) |
| content_signals = self.score_content_signals(text, metadata) |
| signal_strength = self.calculate_signal_strength(content_signals) |
| |
| |
| engagement_boost = 0.0 |
| if source_type in [SourceType.REDDIT, SourceType.TWITTER]: |
| if social_engagement > 100: |
| engagement_boost = 0.15 |
| elif social_engagement > 50: |
| engagement_boost = 0.1 |
| elif social_engagement > 20: |
| engagement_boost = 0.05 |
| |
| |
| |
| credibility_score = ( |
| 0.5 * source_score + |
| 0.3 * signal_strength + |
| 0.1 * author_credibility + |
| 0.1 * min(engagement_boost + 0.5, 1.0) |
| ) |
| |
| return { |
| "source_score": round(source_score, 3), |
| "signal_strength": signal_strength, |
| "signals": content_signals, |
| "credibility_score": round(credibility_score, 3) |
| } |
|
|