Spaces:
Running
Running
File size: 11,071 Bytes
e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 4f48a4e e6d7e29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | # ==========================================
# API WRAPPER FOR FLASK
# ==========================================
from project.database import get_total_evidence_count, load_all_evidence
def run_fact_check_api(claim):
"""
API-friendly version that returns structured data instead of printing.
Returns dict with evidence, NLI results, and metadata.
Note: This is a simplified version for demo. For full functionality,
install all dependencies from requirements.txt
"""
try:
# Try to import the model
from model import (
init_db, clear_db, embed_model, fetch_rss, fetch_gdelt, fetch_newsapi,
fetch_wikipedia, fetch_duckduckgo, fetch_knowledge_base, fetch_wikidata,
build_faiss, load_all_evidence, nli_model, FAISS_FILE
)
import faiss
# Full implementation
init_db()
# clear_db() - Removed to allow accumulation of facts
claim_emb = embed_model.encode([claim], normalize_embeddings=True)
# 1. Static knowledge base (offline, always runs first)
kb_count = fetch_knowledge_base(claim, claim_emb)
# ββ Quick KB short-circuit ββββββββββββββββββββββββββββββββββββββ
# If KB already found strong matches, build a temporary FAISS and
# check the best similarity score. If it's high (β₯ 0.65) we have
# enough reliable evidence β skip the slow live fetches entirely.
kb_short_circuit = False
if kb_count >= 1:
if build_faiss():
_idx = faiss.read_index(FAISS_FILE)
_D, _ = _idx.search(claim_emb, 1)
if len(_D[0]) > 0 and _D[0][0] >= 0.65:
kb_short_circuit = True
print(f"[KnowledgeBase] Strong match (score={_D[0][0]:.2f}) β skipping live fetches.")
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 2. Wikidata entity search (fast, no API key β always runs)
fetch_wikidata(claim, claim_emb)
# ββ Database Evidence Search (Vector Cache) βββββββββββββββββββ
# Before doing slow live scraping, check if our database already has
# highly relevant evidence from previous fact-checks of similar topics.
local_evidence_found = False
if not kb_short_circuit and build_faiss():
_idx = faiss.read_index(FAISS_FILE)
if _idx.ntotal > 0:
_D, _ = _idx.search(claim_emb, 1)
if len(_D[0]) > 0 and _D[0][0] >= 0.60:
local_evidence_found = True
print(f"[VectorCache] Strong local evidence found (score={_D[0][0]:.2f}) β skipping live scrapes.")
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 3. Live fetches β skipped when KB or local DB already has strong matches
gdelt_count = 0
newsapi_count = 0
if not kb_short_circuit and not local_evidence_found:
fetch_rss(claim_emb)
gdelt_count = fetch_gdelt(claim, claim_emb)
newsapi_count = fetch_newsapi(claim, claim_emb)
fetch_wikipedia(claim)
# Count evidence
total_count = get_total_evidence_count()
activate_fallback = False
if (gdelt_count + newsapi_count) == 0 or total_count < 3:
activate_fallback = True
faiss_ready = build_faiss()
if faiss_ready:
index = faiss.read_index(FAISS_FILE)
D, _ = index.search(claim_emb, 1)
if len(D) > 0 and len(D[0]) > 0 and D[0][0] < 0.50:
activate_fallback = True
if activate_fallback:
fetch_duckduckgo(claim, claim_emb)
faiss_ready = build_faiss()
if not faiss_ready:
return {
"success": False,
"error": "No relevant evidence found.",
"evidence": [],
"nli_results": []
}
index = faiss.read_index(FAISS_FILE)
# Search wider first (10 items), then de-duplicate
top_k = min(10, index.ntotal)
D, I = index.search(claim_emb, top_k)
rows = load_all_evidence()
# De-duplicate by text content and apply minimum similarity threshold
seen_texts = set()
unique_indices = []
unique_scores = []
for sim_score, row_idx in zip(D[0], I[0]):
if row_idx >= len(rows):
continue
txt = rows[row_idx][1][:100] # key by first 100 chars
if txt not in seen_texts and sim_score >= 0.50:
seen_texts.add(txt)
unique_indices.append(row_idx)
unique_scores.append(sim_score)
if len(unique_indices) >= 5:
break
evidence_list = []
for i, idx in enumerate(unique_indices):
# rows[idx] contains (id, text, source, embedding_json)
evidence_list.append({
"text": rows[idx][1],
"source": rows[idx][2],
"similarity": float(unique_scores[i])
})
# Build NLI results (track similarity index for weighted voting)
nli_results = []
for i, idx in enumerate(unique_indices):
evidence_text = rows[idx][1]
sim_weight = float(unique_scores[i]) # FAISS cosine similarity
try:
def get_core_claim(c):
"""Strip trailing prepositional qualifiers like 'in 2024', 'currently'
that confuse literal NLI matching β but NOT location qualifiers that
are part of the claim's meaning (e.g. 'at sea level')."""
import re
stripped = re.sub(
r'\s+(in\s+\d{4}|since\s+\w+|currently|right now|nowadays|as of \w+)$',
'', c.strip(), flags=re.IGNORECASE
)
return stripped if stripped != c else c
# Run NLI with the raw claim β this is always the primary result
r1 = nli_model(evidence_text, text_pair=claim)
label1 = r1[0].get("label", "neutral")
score1 = float(r1[0].get("score", 0.0))
# Only try the simplified core-claim if the raw result is neutral
# (prevents stripping from flipping a correct entailment to contradiction)
if label1 == "neutral":
core = get_core_claim(claim)
if core != claim:
r2 = nli_model(evidence_text, text_pair=core)
label2 = r2[0].get("label", "neutral")
score2 = float(r2[0].get("score", 0.0))
if label2 != "neutral" and score2 > score1:
label1, score1 = label2, score2
nli_results.append({
"evidence": evidence_text[:200],
"label": label1,
"score": score1,
"similarity": sim_weight
})
except Exception as e:
print(f"[WARNING] NLI error: {e}")
# ββ Similarity-Weighted Verdict βββββββββββββββββββββββββββββββββββββββ
# Uses the strongest evidence to avoid high-quality sources being
# outvoted by a higher quantity of lower-quality noisy sources.
verdict = "Uncertain"
confidence = 0.0
if nli_results:
best_entail = max(
([r['score'] * r['similarity'] for r in nli_results if 'entail' in r['label'].lower()] + [0.0])
)
best_contra = max(
([r['score'] * r['similarity'] for r in nli_results if 'contradict' in r['label'].lower()] + [0.0])
)
print(f"[Verdict] best entail={best_entail:.3f} contra={best_contra:.3f}")
if best_entail > best_contra and best_entail >= 0.20:
verdict = "True"
confidence = best_entail
elif best_contra > best_entail and best_contra >= 0.20:
verdict = "False"
confidence = best_contra
else:
verdict = "Mixture/Uncertain"
confidence = max(best_entail, best_contra)
return {
"success": True,
"claim": claim,
"verdict": verdict,
"confidence": round(confidence, 2),
"evidence": evidence_list,
"nli_results": nli_results,
"total_evidence": len(evidence_list)
}
except ImportError as e:
print(f"DEBUG: ImportError in api_wrapper: {e}")
# Return demo data if dependencies are missing
return {
"success": True,
"claim": claim,
"evidence": [
{
"text": "This is demo evidence from RSS feed. Install dependencies from requirements.txt for real fact-checking.",
"source": "RSS",
"similarity": 0.85
},
{
"text": "This is demo evidence from GDELT. The full system searches multiple news sources and databases.",
"source": "GDELT",
"similarity": 0.78
},
{
"text": "This is demo evidence from Wikipedia. Install all dependencies to enable real-time fact verification.",
"source": "Wikipedia",
"similarity": 0.72
}
],
"nli_results": [
{
"evidence": "Demo evidence showing entailment (supports the claim)",
"label": "entailment",
"score": 0.89
},
{
"evidence": "Demo evidence showing neutral stance",
"label": "neutral",
"score": 0.65
},
{
"evidence": "Demo evidence showing contradiction",
"label": "contradiction",
"score": 0.45
}
],
"total_evidence": 3
}
except Exception as e:
print(f"DEBUG: General Exception in api_wrapper: {e}")
import traceback
traceback.print_exc()
return {
"success": False,
"error": str(e),
"evidence": [],
"nli_results": []
}
|