PIOE / backend /intelligence /credibility.py
B1acB1rd
PIOE 2.0 ready for deploymnet
4d92cd5
"""
PIOE Credibility Scorer
Evaluates trustworthiness of sources and authors.
"""
from ..models import SourceType
class CredibilityScorer:
"""
Scores credibility based on source type, author history, and content signals.
"""
# Base credibility scores by source type
SOURCE_CREDIBILITY = {
SourceType.ARXIV: 0.95, # Academic papers - highest trust
SourceType.GITHUB: 0.8, # Open source - high trust
SourceType.RSS: 0.7, # Varies by feed
SourceType.SUPERTEAM: 0.85, # Official platform
SourceType.REDDIT: 0.5, # Community - variable
SourceType.TWITTER: 0.4, # Social - requires filtering
SourceType.LINKEDIN: 0.6, # Professional but noisy
SourceType.WEB_SCRAPE: 0.5, # Unknown quality
}
def __init__(self):
pass
def score_source(self, source_type: SourceType) -> float:
"""Get base credibility score for source type."""
return self.SOURCE_CREDIBILITY.get(source_type, 0.5)
def score_content_signals(self, text: str, metadata: dict = None) -> dict:
"""
Evaluate content signals that indicate credibility.
Returns individual signal scores.
"""
metadata = metadata or {}
signals = {}
text_lower = text.lower() if text else ""
# Has deadline (official announcements usually have deadlines)
signals["has_deadline"] = 1.0 if metadata.get("deadline") or \
any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0
# Has organization/institution
signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5
# Contains action URL
signals["has_action_url"] = 1.0 if metadata.get("url") or \
any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0
# Is first announcement (not a repost)
signals["is_original"] = 0.0 if any(kw in text_lower for kw in [
"repost", "sharing", "fyi", "icymi", "in case you missed"
]) else 1.0
# Has specific requirements (detailed = more credible)
signals["has_requirements"] = 1.0 if metadata.get("requirements") or \
any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0
return signals
def calculate_signal_strength(self, signals: dict) -> float:
"""
Calculate overall signal strength from content signals.
High signal strength = actionable, official, time-sensitive.
"""
weights = {
"has_deadline": 0.3,
"has_organization": 0.2,
"has_action_url": 0.2,
"is_original": 0.2,
"has_requirements": 0.1
}
total = sum(signals.get(k, 0) * w for k, w in weights.items())
return round(total, 3)
def score(
self,
source_type: SourceType,
text: str = "",
metadata: dict = None,
author_credibility: float = 0.5,
social_engagement: int = 0
) -> dict:
"""
Calculate comprehensive credibility score.
Returns dict with:
- source_score: Base source credibility
- signal_strength: Content actionability
- credibility_score: Combined score
"""
source_score = self.score_source(source_type)
content_signals = self.score_content_signals(text, metadata)
signal_strength = self.calculate_signal_strength(content_signals)
# Social engagement boost (for social sources)
engagement_boost = 0.0
if source_type in [SourceType.REDDIT, SourceType.TWITTER]:
if social_engagement > 100:
engagement_boost = 0.15
elif social_engagement > 50:
engagement_boost = 0.1
elif social_engagement > 20:
engagement_boost = 0.05
# Combined credibility:
# 50% source, 30% signals, 10% author, 10% engagement
credibility_score = (
0.5 * source_score +
0.3 * signal_strength +
0.1 * author_credibility +
0.1 * min(engagement_boost + 0.5, 1.0)
)
return {
"source_score": round(source_score, 3),
"signal_strength": signal_strength,
"signals": content_signals,
"credibility_score": round(credibility_score, 3)
}