newslens / src /analysis /rag_pipeline.py
Jitender20's picture
Add NewsLens Streamlit app
208266a
from src.db.vector_store import NewsVectorStore
from src.models.test_inference import BiasPredictor
from src.analysis.source_bias import get_source_bias, get_source_record
from collections import defaultdict
class NewsAnalysisPipeline:
def __init__(self):
print("Initializing NewsLens pipeline...")
self.vector_store = NewsVectorStore()
self.bias_predictor = BiasPredictor()
print("Pipeline ready.")
def analyze(self, topic: str, top_k: int = 10) -> dict:
articles = self.vector_store.query(topic, top_k=top_k)
if not articles:
return {"topic": topic, "results": [], "summary": {}}
results = []
texts = [article["text"] for article in articles]
predictions = self.bias_predictor.predict_batch(texts)
for article, prediction in zip(articles, predictions):
source_record = get_source_record(article["source"])
results.append({
"source": article["source"],
"source_bias": source_record["bias"],
"source_bias_provenance": source_record["provenance"],
"url": article["url"],
"title": article.get("title", ""),
"description": article.get("description", ""),
"publishedAt": article.get("publishedAt", ""),
"text": article["text"],
"text_label": prediction["label"],
"confidence": prediction["confidence"],
"probabilities": {
"Not Biased": round(prediction["probabilities"][0], 4),
"Biased": round(prediction["probabilities"][1], 4),
},
"similarity_score": article["similarity_score"]
})
# Aggregate per source
summary = defaultdict(lambda: {
"source_bias": "Unknown",
"Biased": 0,
"Not Biased": 0,
"total": 0
})
for r in results:
source = r["source"]
summary[source]["source_bias"] = r["source_bias"]
summary[source][r["text_label"]] += 1
summary[source]["total"] += 1
return {
"topic": topic,
"results": results,
"summary": dict(summary)
}
if __name__ == "__main__":
pipeline = NewsAnalysisPipeline()
output = pipeline.analyze("climate change", top_k=10)
print(f"\n=== Results for: '{output['topic']}' ===")
for r in output["results"]:
print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']}{r['source']}: {r['text'][:80]}...")
print("\n=== Source Summary ===")
for source, counts in output["summary"].items():
print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}")