Spaces:
Sleeping
Sleeping
| from src.db.vector_store import NewsVectorStore | |
| from src.models.test_inference import BiasPredictor | |
| from src.analysis.source_bias import get_source_bias, get_source_record | |
| from collections import defaultdict | |
| class NewsAnalysisPipeline: | |
| def __init__(self): | |
| print("Initializing NewsLens pipeline...") | |
| self.vector_store = NewsVectorStore() | |
| self.bias_predictor = BiasPredictor() | |
| print("Pipeline ready.") | |
| def analyze(self, topic: str, top_k: int = 10) -> dict: | |
| articles = self.vector_store.query(topic, top_k=top_k) | |
| if not articles: | |
| return {"topic": topic, "results": [], "summary": {}} | |
| results = [] | |
| texts = [article["text"] for article in articles] | |
| predictions = self.bias_predictor.predict_batch(texts) | |
| for article, prediction in zip(articles, predictions): | |
| source_record = get_source_record(article["source"]) | |
| results.append({ | |
| "source": article["source"], | |
| "source_bias": source_record["bias"], | |
| "source_bias_provenance": source_record["provenance"], | |
| "url": article["url"], | |
| "title": article.get("title", ""), | |
| "description": article.get("description", ""), | |
| "publishedAt": article.get("publishedAt", ""), | |
| "text": article["text"], | |
| "text_label": prediction["label"], | |
| "confidence": prediction["confidence"], | |
| "probabilities": { | |
| "Not Biased": round(prediction["probabilities"][0], 4), | |
| "Biased": round(prediction["probabilities"][1], 4), | |
| }, | |
| "similarity_score": article["similarity_score"] | |
| }) | |
| # Aggregate per source | |
| summary = defaultdict(lambda: { | |
| "source_bias": "Unknown", | |
| "Biased": 0, | |
| "Not Biased": 0, | |
| "total": 0 | |
| }) | |
| for r in results: | |
| source = r["source"] | |
| summary[source]["source_bias"] = r["source_bias"] | |
| summary[source][r["text_label"]] += 1 | |
| summary[source]["total"] += 1 | |
| return { | |
| "topic": topic, | |
| "results": results, | |
| "summary": dict(summary) | |
| } | |
| if __name__ == "__main__": | |
| pipeline = NewsAnalysisPipeline() | |
| output = pipeline.analyze("climate change", top_k=10) | |
| print(f"\n=== Results for: '{output['topic']}' ===") | |
| for r in output["results"]: | |
| print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']} — {r['source']}: {r['text'][:80]}...") | |
| print("\n=== Source Summary ===") | |
| for source, counts in output["summary"].items(): | |
| print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}") | |