from src.db.vector_store import NewsVectorStore from src.models.test_inference import BiasPredictor from src.analysis.source_bias import get_source_bias, get_source_record from collections import defaultdict class NewsAnalysisPipeline: def __init__(self): print("Initializing NewsLens pipeline...") self.vector_store = NewsVectorStore() self.bias_predictor = BiasPredictor() print("Pipeline ready.") def analyze(self, topic: str, top_k: int = 10) -> dict: articles = self.vector_store.query(topic, top_k=top_k) if not articles: return {"topic": topic, "results": [], "summary": {}} results = [] texts = [article["text"] for article in articles] predictions = self.bias_predictor.predict_batch(texts) for article, prediction in zip(articles, predictions): source_record = get_source_record(article["source"]) results.append({ "source": article["source"], "source_bias": source_record["bias"], "source_bias_provenance": source_record["provenance"], "url": article["url"], "title": article.get("title", ""), "description": article.get("description", ""), "publishedAt": article.get("publishedAt", ""), "text": article["text"], "text_label": prediction["label"], "confidence": prediction["confidence"], "probabilities": { "Not Biased": round(prediction["probabilities"][0], 4), "Biased": round(prediction["probabilities"][1], 4), }, "similarity_score": article["similarity_score"] }) # Aggregate per source summary = defaultdict(lambda: { "source_bias": "Unknown", "Biased": 0, "Not Biased": 0, "total": 0 }) for r in results: source = r["source"] summary[source]["source_bias"] = r["source_bias"] summary[source][r["text_label"]] += 1 summary[source]["total"] += 1 return { "topic": topic, "results": results, "summary": dict(summary) } if __name__ == "__main__": pipeline = NewsAnalysisPipeline() output = pipeline.analyze("climate change", top_k=10) print(f"\n=== Results for: '{output['topic']}' ===") for r in output["results"]: print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']} — {r['source']}: {r['text'][:80]}...") print("\n=== Source Summary ===") for source, counts in output["summary"].items(): print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}")