| """ |
| main.py β The FastAPI Application |
| |
| Entry point of the NLP service. Exposes the extraction logic as HTTP API endpoints. |
| The Node.js backend will POST claims here, and we return extracted metric/value/year. |
| |
| To run: |
| uvicorn main:app --reload --port 5001 |
| |
| Breakdown: |
| uvicorn β the ASGI server (like nodemon for Python) |
| main:app β "in the file main.py, find the variable called app" |
| --reload β auto-restart on file changes (like nodemon) |
| --port 5001 β listen on port 5001 (backend is on 5000) |
| |
| Swagger docs: http://localhost:5001/docs |
| """ |
| import asyncio |
| import logging |
| import os |
|
|
| from fastapi import FastAPI, HTTPException, Request |
| from fastapi.exceptions import RequestValidationError |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import HTMLResponse, JSONResponse |
| from slowapi import Limiter, _rate_limit_exceeded_handler |
| from slowapi.errors import RateLimitExceeded |
| from slowapi.util import get_remote_address |
| from pydantic import BaseModel, Field |
|
|
| from claim_detector import split_into_sentences, score_claim_probability, detect_claim_language |
| from extractor import extract_all, preprocess_claim |
| from metrics import get_all_metric_names |
| from swagger_ui import get_swagger_html, tags_metadata |
| from verifier.tier1_numeric import tier1_numeric_check |
| from verifier.verdict_router import route_verification, VerificationResult |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s %(name)s %(levelname)s %(message)s", |
| ) |
| logger = logging.getLogger("bware.nlp") |
| limiter = Limiter(key_func=get_remote_address) |
| |
| |
| |
|
|
| class ClaimRequest(BaseModel): |
| """What the client sends TO us.""" |
| text: str = Field( |
| ..., |
| min_length=3, |
| max_length=2000, |
| description="The raw claim text to analyze. Can be a single sentence or a full paragraph.", |
| ) |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "text": "India's GDP growth rate stood at 7.5 percent in 2024" |
| } |
| } |
| } |
|
|
|
|
| class ExtractionResponse(BaseModel): |
| """What we send BACK to the client.""" |
| original_text: str |
| metric: str | None = None |
| value: float | None = None |
| year: int | None = None |
| value_type: str | None = None |
| confidence: float |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "original_text": "India's GDP growth rate stood at 7.5 percent in 2024", |
| "metric": "GDP growth rate", |
| "value": 7.5, |
| "year": 2024, |
| "value_type": "percentage", |
| "confidence": 0.9 |
| } |
| } |
| } |
|
|
|
|
| class HealthResponse(BaseModel): |
| """Health check response β includes component readiness.""" |
| status: str |
| service: str |
| version: str |
| bart_model: str |
| gemini_key: str |
| newsapi_key: str |
| factcheck_key: str |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "status": "healthy", |
| "service": "B-ware NLP Service", |
| "version": "1.0.0", |
| "bart_model": "loaded", |
| "gemini_key": "configured", |
| "newsapi_key": "missing", |
| "factcheck_key": "configured" |
| } |
| } |
| } |
|
|
|
|
| class MetricsListResponse(BaseModel): |
| """List of supported metrics.""" |
| supported_metrics: list[str] |
| count: int |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "supported_metrics": [ |
| "GDP growth rate", "inflation rate", "unemployment rate", |
| "fiscal deficit", "literacy rate", "population", |
| "per capita income", "poverty rate", |
| "foreign exchange reserves", "current account deficit" |
| ], |
| "count": 10 |
| } |
| } |
| } |
|
|
|
|
| class BatchRequest(BaseModel): |
| """What the client sends for a batch extraction.""" |
| claims: list[str] = Field( |
| ..., |
| min_length=1, |
| max_length=50, |
| description="List of individual claim texts to analyze. Maximum 50 claims per request.", |
| ) |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "claims": [ |
| "India's GDP growth rate stood at 7.5 percent in 2024", |
| "Retail CPI inflation fell to 4.8% in January 2024", |
| "India's forex reserves crossed $650 billion in 2024" |
| ] |
| } |
| } |
| } |
|
|
|
|
| class BatchResponse(BaseModel): |
| """What we send back for a batch extraction.""" |
| results: list[ExtractionResponse] |
| total: int |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "results": [ |
| {"original_text": "India's GDP growth rate stood at 7.5 percent in 2024", "metric": "GDP growth rate", "value": 7.5, "year": 2024, "confidence": 0.9}, |
| {"original_text": "Retail CPI inflation fell to 4.8% in January 2024", "metric": "inflation rate", "value": 4.8, "year": 2024, "confidence": 0.9} |
| ], |
| "total": 2 |
| } |
| } |
| } |
|
|
|
|
| |
| |
| |
|
|
| class SentenceAnalysis(BaseModel): |
| """One sentence from the analyzed paragraph, with its claim probability and extraction.""" |
| sentence: str |
| claim_probability: float |
| extraction: ExtractionResponse |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "sentence": "India's GDP growth rate was 7.5% in 2024", |
| "claim_probability": 0.95, |
| "extraction": { |
| "original_text": "India's GDP growth rate was 7.5% in 2024", |
| "metric": "GDP growth rate", |
| "value": 7.5, |
| "year": 2024, |
| "value_type": "percentage", |
| "confidence": 0.9 |
| } |
| } |
| } |
| } |
|
|
|
|
| class ParagraphResponse(BaseModel): |
| """ |
| Response for POST /analyze. |
| |
| Fields: |
| total_sentences β total sentences found in the paragraph |
| verified_count β sentences that scored > 0.5 claim probability |
| high_confidence_count β verified claims with extraction confidence β₯ 0.8 |
| results β per-claim sentence, probability and extraction data |
| """ |
| total_sentences: int |
| verified_count: int |
| high_confidence_count: int |
| results: list[SentenceAnalysis] |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "total_sentences": 3, |
| "verified_count": 2, |
| "high_confidence_count": 1, |
| "results": [ |
| { |
| "sentence": "India's GDP growth rate was 7.5% in 2024", |
| "claim_probability": 0.95, |
| "extraction": { |
| "original_text": "India's GDP growth rate was 7.5% in 2024", |
| "metric": "GDP growth rate", |
| "value": 7.5, |
| "year": 2024, |
| "value_type": "percentage", |
| "confidence": 0.9 |
| } |
| } |
| ] |
| } |
| } |
| } |
|
|
|
|
| class NumericCheckResult(BaseModel): |
| """ |
| The result of comparing a claimed value against official World Bank data. |
| Returned as part of QuickVerificationResult. |
| """ |
| official_value: float | None = None |
| claimed_value: float | None = None |
| percentage_error: float | None = None |
| source: str | None = None |
| indicator_code: str | None = None |
| source_url: str | None = None |
| year: int | None = None |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "official_value": 6.49, |
| "claimed_value": 7.5, |
| "percentage_error": 15.48, |
| "source": "World Bank", |
| "indicator_code": "NY.GDP.MKTP.KD.ZG", |
| "source_url": "https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG?locations=IN", |
| "year": 2024 |
| } |
| } |
| } |
|
|
|
|
| class QuickVerificationResult(BaseModel): |
| """ |
| Response shape for POST /verify/quick (Tier 1 only). |
| |
| verdict meanings: |
| accurate β % error < 5% |
| misleading β % error between 5% and 20% |
| false β % error >= 20% |
| unverifiable β could not extract metric/value/year OR |
| World Bank has no data for that metric/year |
| """ |
| original_text: str |
| tier_used: str = "tier1" |
| verdict: str |
| confidence: float |
| extraction: ExtractionResponse |
| numeric_check: NumericCheckResult | None = None |
| explanation: str |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "original_text": "India's GDP growth rate was 7.5% in 2024", |
| "tier_used": "tier1", |
| "verdict": "misleading", |
| "confidence": 0.78, |
| "extraction": { |
| "original_text": "India's GDP growth rate was 7.5% in 2024", |
| "metric": "GDP growth rate", |
| "value": 7.5, |
| "year": 2024, |
| "confidence": 0.9 |
| }, |
| "numeric_check": { |
| "official_value": 6.49, |
| "claimed_value": 7.5, |
| "percentage_error": 15.48, |
| "source": "World Bank", |
| "indicator_code": "NY.GDP.MKTP.KD.ZG", |
| "source_url": "https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG?locations=IN", |
| "year": 2024 |
| }, |
| "explanation": "Claimed 7.5%, official World Bank value is 6.49% (error: 15.48%). Classified as misleading." |
| } |
| } |
| } |
|
|
|
|
| |
| |
| |
|
|
| class VerificationEvidenceItem(BaseModel): |
| """One piece of evidence shown in the full verification response.""" |
| source: str |
| snippet: str |
| url: str |
| evidence_type: str |
| nli_verdict: str | None = None |
| nli_score: float | None = None |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "source": "Reuters", |
| "snippet": "India's GDP growth accelerated to 6.5% in fiscal year 2024.", |
| "url": "https://reuters.com/article/india-gdp", |
| "evidence_type": "news", |
| "nli_verdict": "entailment", |
| "nli_score": 0.82 |
| } |
| } |
| } |
|
|
|
|
| class FullVerificationResult(BaseModel): |
| """ |
| Response shape for POST /verify and POST /verify/deep. |
| |
| tier_used values: |
| tier1 β verdict came from numeric World Bank check alone |
| tier2 β verdict came from NLI over news/fact-check evidence |
| tier3 β verdict came from Gemini LLM reasoning |
| |
| verdict values: |
| accurate β claim matches official/evidence data within 5% |
| misleading β claim is off by 5β20% or weakly contradicted |
| false β claim is off by >20% or strongly contradicted |
| unverifiable β insufficient data to decide |
| """ |
| original_text: str |
| tier_used: str |
| verdict: str |
| confidence: float |
|
|
| |
| extracted_metric: str | None = None |
| extracted_value: float | None = None |
| extracted_year: int | None = None |
| extraction_confidence: float |
|
|
| |
| official_value: float | None = None |
| percentage_error: float | None = None |
| official_source: str | None = None |
| indicator_code: str | None = None |
| source_url: str | None = None |
|
|
| |
| evidence: list[VerificationEvidenceItem] = [] |
| explanation: str |
| tiers_run: list[str] = [] |
|
|
| model_config = { |
| "json_schema_extra": { |
| "example": { |
| "original_text": "India's GDP growth rate was 7.5% in 2024", |
| "tier_used": "tier2", |
| "verdict": "misleading", |
| "confidence": 0.71, |
| "extracted_metric": "GDP growth rate", |
| "extracted_value": 7.5, |
| "extracted_year": 2024, |
| "extraction_confidence": 0.9, |
| "official_value": 6.49, |
| "percentage_error": 15.48, |
| "official_source": "World Bank", |
| "indicator_code": "NY.GDP.MKTP.KD.ZG", |
| "source_url": "https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG?locations=IN", |
| "evidence": [], |
| "explanation": "Numeric check: claimed GDP growth rate = 7.5 (2024), official World Bank = 6.4900 (error: 15.48%) β misleading.", |
| "tiers_run": ["tier1", "tier2"] |
| } |
| } |
| } |
|
|
|
|
| |
| |
| |
|
|
| def _compute_verdict(percentage_error: float | None) -> tuple[str, str]: |
| """ |
| Apply the verdict rule based on % error. |
| Returns (verdict, explanation_fragment). |
| |
| Rules (from project spec): |
| < 5% β accurate |
| < 20% β misleading |
| >= 20% β false |
| """ |
| if percentage_error is None: |
| return "unverifiable", "No official data found for this metric/year." |
| if percentage_error < 5.0: |
| return "accurate", f"Percentage error is {percentage_error:.2f}%, which is within the acceptable 5% threshold." |
| if percentage_error < 20.0: |
| return "misleading", f"Percentage error is {percentage_error:.2f}%, which exceeds 5% but is below 20% β classified as misleading." |
| return "false", f"Percentage error is {percentage_error:.2f}%, which exceeds 20% β classified as false." |
|
|
|
|
|
|
| |
|
|
| app = FastAPI( |
| title="B-ware NLP Service", |
| version="1.0.0", |
| description=""" |
| ## B-ware Claim Extraction & Analysis API |
| |
| This service is the **NLP backbone of the B-ware fact-checking platform**. |
| It takes raw text β a single sentence or a full paragraph β and extracts |
| structured, verifiable information from it. |
| |
| --- |
| |
| ### What This Service Does |
| |
| | Extracts | Example input | Example output | |
| |---|---|---| |
| | **Metric** | "GDP growth rate stood at..." | `GDP growth rate` | |
| | **Value** | "...stood at 7.5 percent..." | `7.5` | |
| | **Year** | "...in 2024" | `2024` | |
| | **Confidence** | all three found, strong match | `0.9` | |
| |
| --- |
| |
| ### Supported Economic Metrics (10) |
| |
| `GDP growth rate` Β· `inflation rate` Β· `unemployment rate` Β· `fiscal deficit` Β· |
| `literacy rate` Β· `population` Β· `per capita income` Β· `poverty rate` Β· |
| `foreign exchange reserves` Β· `current account deficit` |
| |
| --- |
| |
| ### Confidence Score Guide |
| |
| | Score | Meaning | |
| |---|---| |
| | `0.9` | Strong metric match + value + year all found | |
| | `0.6 β 0.8` | Weak metric match OR one field missing | |
| | `0.3 β 0.5` | Only partial extraction possible | |
| | `0.0` | Nothing could be extracted | |
| |
| --- |
| |
| ### Quick Start |
| |
| ```bash |
| # Single claim |
| curl -X POST http://localhost:5001/extract \\ |
| -H "Content-Type: application/json" \\ |
| -d '{"text": "India GDP growth rate was 7.5% in 2024"}' |
| |
| # Full paragraph |
| curl -X POST http://localhost:5001/analyze \\ |
| -H "Content-Type: application/json" \\ |
| -d '{"text": "India has been growing. GDP hit 7.5% in 2024. Inflation fell to 4.8%."}' |
| ``` |
| """, |
| contact={ |
| "name": "B-ware Development", |
| "url": "https://github.com/B-ware", |
| }, |
| license_info={ |
| "name": "MIT", |
| }, |
| openapi_tags=tags_metadata, |
| docs_url=None, |
| ) |
|
|
| |
| |
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=[ |
| "http://localhost:3000", |
| "http://localhost:5000", |
| "http://localhost:5500", |
| "https://b-ware-sand.vercel.app", |
| "https://b-ware-front.vercel.app", |
| ], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
| app.state.limiter = limiter |
| app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) |
|
|
|
|
| @app.exception_handler(Exception) |
| async def generic_exception_handler(request, exc): |
| """Catch-all exception handler to prevent 500 errors from crashing the server.""" |
| return JSONResponse(status_code=500, |
| content={"error": "Internal server error", |
| "detail": str(exc)}) |
|
|
| |
|
|
| @app.get("/docs", include_in_schema=False) |
| async def custom_swagger_ui(): |
| """Serve Swagger UI with a custom dark theme.""" |
| return HTMLResponse(get_swagger_html()) |
|
|
|
|
|
|
|
|
| @app.get( |
| "/health", |
| response_model=HealthResponse, |
| tags=["Service Info"], |
| summary="Health check", |
| response_description="Service status and version info" |
| ) |
| def health_check(): |
| """ |
| Check if the NLP service is running and all components are ready. |
| Returns component-level status so the Node backend can make informed decisions. |
| |
| - `bart_model: loaded` β BART-MNLI is warm in memory (first /verify/deep call triggers load) |
| - `*_key: configured` β the env var is set (non-empty); does not validate the key |
| - `status: degraded` β at least one key is missing (Tier 2/3 may fail) |
| """ |
| from verifier.tier2_nli import _load_pipeline |
|
|
| bart_status = "loaded" if _load_pipeline.cache_info().currsize > 0 else "not_loaded" |
| gemini_key = "configured" if os.getenv("GEMINI_API_KEY") else "missing" |
| newsapi_key = "configured" if os.getenv("NEWS_API_KEY") else "missing" |
| factcheck = "configured" if os.getenv("GOOGLE_FACT_CHECK_API_KEY") else "missing" |
|
|
| |
| keys_ok = all(k == "configured" for k in [gemini_key, newsapi_key, factcheck]) |
| overall = "healthy" if keys_ok else "degraded" |
|
|
| return { |
| "status": overall, |
| "service": "B-ware NLP Service", |
| "version": "1.0.0", |
| "bart_model": bart_status, |
| "gemini_key": gemini_key, |
| "newsapi_key": newsapi_key, |
| "factcheck_key": factcheck, |
| } |
|
|
|
|
| @app.post( |
| "/extract", |
| response_model=ExtractionResponse, |
| tags=["Core Extraction"], |
| summary="Extract from a single claim", |
| response_description="Extracted metric, value, year and confidence score" |
| ) |
| def extract_claim(request: ClaimRequest): |
| """ |
| Extract the economic metric, numeric value, and year from a **single claim sentence**. |
| |
| The extraction pipeline runs three steps in sequence: |
| 1. **Metric detection** β matches against 10 supported economic indicators using |
| two-tier regex (strong patterns β 0.9 confidence, weak patterns β 0.6 confidence) |
| 2. **Value extraction** β finds the numeric value, prioritising percentages over |
| plain numbers. Handles Indian formats (1,72,000) and negative values. |
| 3. **Year extraction** β finds the most recent 4-digit year mentioned (1900β2099) |
| |
| The **confidence score** reflects how complete and certain the extraction was: |
| - `0.9` β strong metric match, all three fields found |
| - `0.6` β weak metric match or one field missing |
| - `0.0` β metric not recognised |
| """ |
| |
| lang = detect_claim_language(request.text) |
| if lang != "en": |
| raise HTTPException( |
| status_code=422, |
| detail=f"Only English claims are supported. Detected language: '{lang}'.", |
| ) |
| result = extract_all(request.text) |
| return result |
|
|
| @app.post( |
| "/batch", |
| response_model=BatchResponse, |
| tags=["Core Extraction"], |
| summary="Extract from multiple claims at once", |
| response_description="List of extraction results, one per input claim" |
| ) |
| def batch_extract(request: BatchRequest): |
| """ |
| Run the extraction pipeline on a **list of up to 50 claims** in a single request. |
| |
| Each claim is processed independently using the same pipeline as `/extract`. |
| If one claim fails (e.g. malformed input), it returns a zeroed-out result |
| for that item rather than failing the entire batch. |
| |
| **Use this endpoint when:** |
| - You have a pre-split list of claim sentences |
| - You want to process a CSV or database of claims in bulk |
| - The claims are already individual sentences (not raw paragraphs) |
| |
| For raw paragraphs, use `/analyze` instead β it handles sentence splitting automatically. |
| """ |
| results = [] |
|
|
| for claim_text in request.claims: |
| try: |
| result = extract_all(claim_text) |
| results.append(result) |
| except Exception as e: |
| |
| |
| results.append({ |
| "original_text": claim_text, |
| "metric": None, |
| "value": None, |
| "year": None, |
| "confidence": 0.0 |
| }) |
|
|
| return { |
| "results": results, |
| "total": len(results) |
| } |
|
|
| @app.post( |
| "/analyze", |
| response_model=ParagraphResponse, |
| tags=["Paragraph Analysis"], |
| summary="Analyze a full paragraph for verifiable claims", |
| response_description="Sentence stats and per-claim extractions for the full paragraph" |
| ) |
| def analyze_text(request: ClaimRequest): |
| """ |
| Feed a **full paragraph** and get back extraction results for every sentence |
| that looks like a verifiable economic claim. |
| |
| This endpoint does two things automatically: |
| |
| **Step 1 β Sentence splitting** |
| The paragraph is split into individual sentences using punctuation-aware splitting |
| that protects abbreviations (`Rs.`, `Dr.`, `No.`) from causing false splits. |
| |
| **Step 2 β Claim probability scoring** |
| Each sentence is scored on a `0.0 β 1.0` scale based on heuristic signals: |
| |
| | Signal | Points | |
| |---|---| |
| | Has a number or percentage | +0.30 | |
| | Mentions a year (1900β2099) | +0.20 | |
| | Contains a known metric keyword | +0.25 | |
| | Uses an assertion verb (grew, fell, stood at...) | +0.15 | |
| | Names a subject (India, RBI, government...) | +0.05 | |
| | Ideal sentence length (8β50 words) | +0.05 | |
| |
| Only sentences scoring **above 0.5** are passed to the extractor. |
| Commentary, questions, and context sentences are automatically filtered out. |
| """ |
| |
| lang = detect_claim_language(request.text) |
| if lang != "en": |
| raise HTTPException( |
| status_code=422, |
| detail=f"Only English claims are supported. Detected language: '{lang}'.", |
| ) |
|
|
| sentences = split_into_sentences(request.text) |
| sentence_results: list[SentenceAnalysis] = [] |
|
|
| for sentence in sentences: |
| prob = score_claim_probability(sentence) |
| if prob > 0.5: |
| extraction = extract_all(sentence) |
| sentence_results.append( |
| SentenceAnalysis( |
| sentence=sentence, |
| claim_probability=round(prob, 2), |
| extraction=ExtractionResponse(**extraction), |
| ) |
| ) |
|
|
| return ParagraphResponse( |
| total_sentences=len(sentences), |
| verified_count=len(sentence_results), |
| high_confidence_count=sum( |
| 1 for r in sentence_results if r.extraction.confidence >= 0.8 |
| ), |
| results=sentence_results, |
| ) |
|
|
|
|
| @app.get( |
| "/metrics", |
| response_model=MetricsListResponse, |
| tags=["Service Info"], |
| summary="List all supported economic metrics", |
| response_description="Complete list of recognisable metrics and total count" |
| ) |
| def list_metrics(): |
| """ |
| Returns the **complete list of economic metrics** this service can recognise and extract. |
| |
| These metric names are the exact strings returned in the `metric` field of |
| `ExtractionResponse`. Use this list to: |
| - Validate metric names before querying `official_data_cache` in your backend |
| - Build dropdown filters in your frontend |
| - Know when a claim falls outside the supported scope |
| |
| The 10 supported metrics map directly to **World Bank and IMF indicator codes** |
| for data verification. |
| """ |
| names = get_all_metric_names() |
| return { |
| "supported_metrics": names, |
| "count": len(names) |
| } |
|
|
| |
| |
| |
|
|
| @app.post( |
| "/verify/quick", |
| response_model=QuickVerificationResult, |
| tags=["Verification"], |
| summary="Quick numeric verification (Tier 1 only)", |
| response_description="Verdict based on official World Bank data comparison" |
| ) |
| async def verify_quick(request: ClaimRequest): |
| """ |
| **Fastest verification path.** Uses Tier 1 only: extracts metric/value/year |
| from the claim and compares against official World Bank data. |
| |
| Best for: **numeric economic claims** with a clear year. |
| e.g. *"India's GDP growth rate was 7.5% in 2024"* |
| |
| **How it works:** |
| 1. Extract `metric`, `value`, `year` via the regex extraction pipeline. |
| 2. Map the metric to a World Bank indicator code. |
| 3. Fetch the official value for that indicator + year from World Bank API. |
| 4. Compute `% error = |claimed β official| / |official| Γ 100`. |
| 5. Apply verdict rule: |
| - `< 5%` β **accurate** |
| - `5% β 20%` β **misleading** |
| - `>= 20%` β **false** |
| - No official data found β **unverifiable** |
| |
| **Limitations of /verify/quick:** |
| - Only works for numeric claims with a recognisable metric + year. |
| - Uses World Bank data only (quarterly/annual, may lag by 1β2 years). |
| - For qualitative claims or deeper analysis, use `POST /verify` (coming soon). |
| """ |
| |
| extraction = extract_all(request.text) |
|
|
| |
| t1 = await tier1_numeric_check( |
| metric=extraction["metric"], |
| claimed_value=extraction["value"], |
| year=extraction["year"], |
| country=extraction.get("country", "IND") or "IND", |
| ) |
|
|
| |
| if extraction["metric"] is None or extraction["value"] is None or extraction["year"] is None: |
| missing = [f for f, v in [("metric", extraction["metric"]), ("value", extraction["value"]), ("year", extraction["year"])] if v is None] |
| return QuickVerificationResult( |
| original_text=request.text, |
| tier_used="tier1", |
| verdict="unverifiable", |
| confidence=0.0, |
| extraction=ExtractionResponse(**extraction), |
| numeric_check=None, |
| explanation=f"Could not extract the following fields: {', '.join(missing)}. " |
| f"This endpoint requires a numeric claim with a recognisable metric and year." |
| ) |
|
|
| |
| numeric_check = NumericCheckResult( |
| official_value=t1.official_value, |
| claimed_value=t1.claimed_value, |
| percentage_error=t1.percentage_error, |
| source=t1.source, |
| indicator_code=t1.indicator_code, |
| source_url=t1.source_url, |
| year=t1.year, |
| ) |
|
|
| |
| verdict, explanation_fragment = _compute_verdict(t1.percentage_error) |
|
|
| |
| if t1.official_value is not None: |
| explanation = ( |
| f"Claimed {t1.claimed_value} for '{extraction['metric']}' in {t1.year}. " |
| f"Official World Bank value: {t1.official_value:.4f}. " |
| f"{explanation_fragment} " |
| f"Source: {t1.source_url}" |
| ) |
| else: |
| explanation = ( |
| f"Found metric '{extraction['metric']}', value {t1.claimed_value}, year {t1.year}, " |
| f"but World Bank has no data for this indicator/year combination. " |
| f"Try a different year or use /verify for deeper analysis." |
| ) |
|
|
| |
| |
| |
| |
| if t1.official_value is not None: |
| tier1_quality = max(0.0, 1.0 - (t1.percentage_error / 100.0)) |
| final_confidence = round(extraction["confidence"] * tier1_quality, 2) |
| else: |
| final_confidence = round(extraction["confidence"] * 0.5, 2) |
|
|
| return QuickVerificationResult( |
| original_text=request.text, |
| tier_used="tier1", |
| verdict=verdict, |
| confidence=final_confidence, |
| extraction=ExtractionResponse(**extraction), |
| numeric_check=numeric_check, |
| explanation=explanation, |
| ) |
|
|
|
|
| |
| |
| |
|
|
| @app.post( |
| "/verify", |
| response_model=FullVerificationResult, |
| tags=["Verification"], |
| summary="Full multi-tier verification", |
| response_description="Best available verdict from Tier 1 β 2 β 3 pipeline" |
| ) |
| async def verify_full(request: ClaimRequest): |
| """ |
| **Full RAV pipeline.** Runs as many tiers as needed to produce a confident verdict. |
| |
| Routing logic: |
| 1. **Tier 1 (always)** β numeric World Bank check. |
| If clear result (error < 5% or β₯ 20%) with high extraction confidence β return immediately. |
| 2. **Tier 2** β fetch news + fact-check evidence, run NLI model over snippets. |
| If NLI confidence β₯ 0.6 β return merged Tier 1 + Tier 2 verdict. |
| 3. **Tier 3** β Gemini 1.5 Flash LLM reasoning over all collected context. |
| Always used as fallback if Tier 2 is uncertain or model unavailable. |
| |
| Returns the `tier_used` field so you know which layer produced the verdict. |
| Use `POST /verify/deep` to force all three tiers regardless of early exit conditions. |
| """ |
| clean_text = preprocess_claim(request.text) |
| try: |
| result: VerificationResult = await asyncio.wait_for( |
| route_verification(clean_text, force_tier3=False), |
| timeout=30.0, |
| ) |
| except asyncio.TimeoutError: |
| logger.warning("verify_full timed out for text: %.80s", clean_text) |
| return FullVerificationResult( |
| original_text=clean_text, |
| tier_used="tier1", |
| verdict="unverifiable", |
| confidence=0.0, |
| extracted_metric=None, |
| extracted_value=None, |
| extracted_year=None, |
| extraction_confidence=0.0, |
| evidence=[], |
| explanation="Verification timed out after 30 seconds.", |
| tiers_run=[], |
| ) |
| return FullVerificationResult( |
| original_text=result.original_text, |
| tier_used=result.tier_used, |
| verdict=result.verdict, |
| confidence=result.confidence, |
| extracted_metric=result.extracted_metric, |
| extracted_value=result.extracted_value, |
| extracted_year=result.extracted_year, |
| extraction_confidence=result.extraction_confidence, |
| official_value=result.official_value, |
| percentage_error=result.percentage_error, |
| official_source=result.official_source, |
| indicator_code=result.indicator_code, |
| source_url=result.source_url, |
| evidence=[ |
| VerificationEvidenceItem( |
| source=e.source, |
| snippet=e.snippet, |
| url=e.url, |
| evidence_type=e.evidence_type, |
| nli_verdict=e.nli_verdict, |
| nli_score=e.nli_score, |
| ) |
| for e in result.evidence |
| ], |
| explanation=result.explanation, |
| tiers_run=result.tiers_run, |
| ) |
|
|
|
|
| @app.post( |
| "/verify/deep", |
| response_model=FullVerificationResult, |
| tags=["Verification"], |
| summary="Deep verification β forces all three tiers", |
| response_description="Verdict from all three tiers: numeric + evidence + LLM reasoning" |
| ) |
| @limiter.limit("10/minute") |
| async def verify_deep(request: Request, body: ClaimRequest): |
| """ |
| **Deepest verification path.** Forces the pipeline through all three tiers |
| regardless of how confident earlier tiers are. |
| |
| Use this when: |
| - You need maximum confidence combined from all data sources |
| - The claim is important enough to warrant LLM reasoning even if Tier 1/2 gave a clear answer |
| - You want `tiers_run: ["tier1", "tier2", "tier3"]` guaranteed in the response |
| |
| **Slower** than `/verify` β expect ~3β8 seconds latency (network + LLM). |
| Subject to Gemini free-tier rate limits (15 req/min). **Rate limited to 10 req/min per IP.** |
| """ |
| clean_text = preprocess_claim(body.text) |
| try: |
| result: VerificationResult = await asyncio.wait_for( |
| route_verification(clean_text, force_tier3=True), |
| timeout=30.0, |
| ) |
| except asyncio.TimeoutError: |
| logger.warning("verify_deep timed out for text: %.80s", clean_text) |
| return FullVerificationResult( |
| original_text=clean_text, |
| tier_used="tier1", |
| verdict="unverifiable", |
| confidence=0.0, |
| extracted_metric=None, |
| extracted_value=None, |
| extracted_year=None, |
| extraction_confidence=0.0, |
| evidence=[], |
| explanation="Verification timed out after 30 seconds.", |
| tiers_run=[], |
| ) |
| return FullVerificationResult( |
| original_text=result.original_text, |
| tier_used=result.tier_used, |
| verdict=result.verdict, |
| confidence=result.confidence, |
| extracted_metric=result.extracted_metric, |
| extracted_value=result.extracted_value, |
| extracted_year=result.extracted_year, |
| extraction_confidence=result.extraction_confidence, |
| official_value=result.official_value, |
| percentage_error=result.percentage_error, |
| official_source=result.official_source, |
| indicator_code=result.indicator_code, |
| source_url=result.source_url, |
| evidence=[ |
| VerificationEvidenceItem( |
| source=e.source, |
| snippet=e.snippet, |
| url=e.url, |
| evidence_type=e.evidence_type, |
| nli_verdict=e.nli_verdict, |
| nli_score=e.nli_score, |
| ) |
| for e in result.evidence |
| ], |
| explanation=result.explanation, |
| tiers_run=result.tiers_run, |
| ) |
|
|
|
|
| |
| if __name__ == "__main__": |
| import uvicorn |
| logger.info("Starting B-ware NLP Service...") |
| logger.info("API docs available at: http://localhost:5001/docs") |
| uvicorn.run( |
| "main:app", |
| host="0.0.0.0", |
| port=5001, |
| reload=True |
| ) |
|
|