Spaces:

Tremick
/

PIOE

Runtime error

PIOE / backend /intelligence /credibility.py

B1acB1rd

PIOE 2.0 ready for deploymnet

4d92cd5 4 months ago

4.61 kB

	"""
	PIOE Credibility Scorer

	Evaluates trustworthiness of sources and authors.
	"""
	from ..models import SourceType


	class CredibilityScorer:
	"""
	Scores credibility based on source type, author history, and content signals.
	"""

	# Base credibility scores by source type
	SOURCE_CREDIBILITY = {
	SourceType.ARXIV: 0.95, # Academic papers - highest trust
	SourceType.GITHUB: 0.8, # Open source - high trust
	SourceType.RSS: 0.7, # Varies by feed
	SourceType.SUPERTEAM: 0.85, # Official platform
	SourceType.REDDIT: 0.5, # Community - variable
	SourceType.TWITTER: 0.4, # Social - requires filtering
	SourceType.LINKEDIN: 0.6, # Professional but noisy
	SourceType.WEB_SCRAPE: 0.5, # Unknown quality
	}

	def __init__(self):
	pass

	def score_source(self, source_type: SourceType) -> float:
	"""Get base credibility score for source type."""
	return self.SOURCE_CREDIBILITY.get(source_type, 0.5)

	def score_content_signals(self, text: str, metadata: dict = None) -> dict:
	"""
	Evaluate content signals that indicate credibility.
	Returns individual signal scores.
	"""
	metadata = metadata or {}
	signals = {}

	text_lower = text.lower() if text else ""

	# Has deadline (official announcements usually have deadlines)
	signals["has_deadline"] = 1.0 if metadata.get("deadline") or \
	any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0

	# Has organization/institution
	signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5

	# Contains action URL
	signals["has_action_url"] = 1.0 if metadata.get("url") or \
	any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0

	# Is first announcement (not a repost)
	signals["is_original"] = 0.0 if any(kw in text_lower for kw in [
	"repost", "sharing", "fyi", "icymi", "in case you missed"
	]) else 1.0

	# Has specific requirements (detailed = more credible)
	signals["has_requirements"] = 1.0 if metadata.get("requirements") or \
	any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0

	return signals

	def calculate_signal_strength(self, signals: dict) -> float:
	"""
	Calculate overall signal strength from content signals.
	High signal strength = actionable, official, time-sensitive.
	"""
	weights = {
	"has_deadline": 0.3,
	"has_organization": 0.2,
	"has_action_url": 0.2,
	"is_original": 0.2,
	"has_requirements": 0.1
	}

	total = sum(signals.get(k, 0) * w for k, w in weights.items())
	return round(total, 3)

	def score(
	self,
	source_type: SourceType,
	text: str = "",
	metadata: dict = None,
	author_credibility: float = 0.5,
	social_engagement: int = 0
	) -> dict:
	"""
	Calculate comprehensive credibility score.

	Returns dict with:
	- source_score: Base source credibility
	- signal_strength: Content actionability
	- credibility_score: Combined score
	"""
	source_score = self.score_source(source_type)
	content_signals = self.score_content_signals(text, metadata)
	signal_strength = self.calculate_signal_strength(content_signals)

	# Social engagement boost (for social sources)
	engagement_boost = 0.0
	if source_type in [SourceType.REDDIT, SourceType.TWITTER]:
	if social_engagement > 100:
	engagement_boost = 0.15
	elif social_engagement > 50:
	engagement_boost = 0.1
	elif social_engagement > 20:
	engagement_boost = 0.05

	# Combined credibility:
	# 50% source, 30% signals, 10% author, 10% engagement
	credibility_score = (
	0.5 * source_score +
	0.3 * signal_strength +
	0.1 * author_credibility +
	0.1 * min(engagement_boost + 0.5, 1.0)
	)

	return {
	"source_score": round(source_score, 3),
	"signal_strength": signal_strength,
	"signals": content_signals,
	"credibility_score": round(credibility_score, 3)
	}