|
|
import io, re, json, datetime,os |
|
|
from typing import Dict, Any, List, Tuple, Optional |
|
|
|
|
|
from flask import Flask, request, jsonify, render_template_string, redirect, url_for |
|
|
from flask_cors import CORS |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
from PyPDF2 import PdfReader |
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app, resources={r"/api/*": {"origins": "*"}}) |
|
|
|
|
|
app.config["MAX_CONTENT_LENGTH"] = 16 * 1024 * 1024 |
|
|
|
|
|
THIS_YEAR = datetime.date.today().year |
|
|
DOI_RX = re.compile(r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.I) |
|
|
S2_API_KEY = os.getenv("SEMANTIC_SCHOLAR_API_KEY") |
|
|
|
|
|
def _clean(s: Optional[str]) -> str: |
|
|
return (s or "").strip() |
|
|
|
|
|
def year_from_any(x: str) -> Optional[int]: |
|
|
if not x: return None |
|
|
m = re.search(r"(19|20)\d{2}", x) |
|
|
if m: |
|
|
y = int(m.group(0)) |
|
|
if 1900 <= y <= 2100: |
|
|
return y |
|
|
return None |
|
|
|
|
|
def fetch_url_metadata(url_or_doi: str): |
|
|
warnings = [] |
|
|
url = url_or_doi |
|
|
m = DOI_RX.search(url_or_doi) |
|
|
if m and not url_or_doi.lower().startswith("http"): |
|
|
url = f"https://doi.org/{m.group(1)}" |
|
|
try: |
|
|
r = requests.get(url, timeout=20, headers={"User-Agent":"CRAAPBot"}) |
|
|
r.raise_for_status() |
|
|
except Exception as e: |
|
|
return {}, "", [f"Failed to fetch URL/DOI: {e}"] |
|
|
html = r.text |
|
|
soup = BeautifulSoup(html, "html.parser") |
|
|
meta = {} |
|
|
def mget(*names): |
|
|
for n in names: |
|
|
tag = soup.find("meta", attrs={"name": n}) or soup.find("meta", attrs={"property": n}) |
|
|
if tag and tag.get("content"): |
|
|
return tag["content"] |
|
|
return None |
|
|
meta["title"] = _clean(mget("citation_title") or (soup.title.string if soup.title else "")) |
|
|
authors = soup.find_all("meta", attrs={"name":"citation_author"}) |
|
|
if authors: |
|
|
meta["authors"] = [_clean(a.get("content","")) for a in authors if _clean(a.get("content",""))] |
|
|
else: |
|
|
meta["authors"] = [_clean(mget("author") or "")] |
|
|
meta["authors"] = [a for a in meta["authors"] if a] |
|
|
meta["venue"] = _clean(mget("citation_journal_title") or mget("og:site_name") or "") |
|
|
y = year_from_any(_clean(mget("citation_publication_date") or mget("date") or mget("article:published_time") or "")) |
|
|
meta["year"] = y if y else year_from_any(html) |
|
|
doi = _clean(mget("citation_doi") or (DOI_RX.search(html).group(1) if DOI_RX.search(html) else "")) |
|
|
meta["identifier"] = {"doi": doi if doi else None, "url": url} |
|
|
abst = mget("citation_abstract") |
|
|
if not abst: |
|
|
absnode = soup.find(lambda tag: tag.name in ["section","div","p"] and tag.get_text(strip=True).lower().startswith("abstract")) |
|
|
if absnode: |
|
|
abst = absnode.get_text(" ", strip=True) |
|
|
text_excerpt = (abst or "")[:4000] |
|
|
return meta, text_excerpt, warnings |
|
|
|
|
|
def extract_pdf_text_and_guess_meta(file_storage): |
|
|
warnings = [] |
|
|
try: |
|
|
data = file_storage.read() |
|
|
reader = PdfReader(io.BytesIO(data)) |
|
|
n = len(reader.pages) |
|
|
if n == 0: |
|
|
return {}, "", ["PDF appears empty."] |
|
|
head_pages = min(2, n) |
|
|
body_pages = min(10, n) |
|
|
head = [] |
|
|
body = [] |
|
|
for i in range(head_pages): |
|
|
head.append(reader.pages[i].extract_text() or "") |
|
|
for i in range(body_pages): |
|
|
body.append(reader.pages[i].extract_text() or "") |
|
|
head_txt = "\n".join(head) |
|
|
body_txt = "\n".join(body) |
|
|
lines = [l.strip() for l in head_txt.splitlines() if l.strip()] |
|
|
title = lines[0] if lines else "" |
|
|
authors_line = "" |
|
|
for l in lines[0:10]: |
|
|
if re.search(r"[A-Z][a-z]+(?:\s[A-Z]\.){0,3}", l) and ("," in l or " and " in l.lower()): |
|
|
authors_line = l; break |
|
|
authors = [a.strip() for a in re.split(r",|;| and ", authors_line) if a.strip()] if authors_line else [] |
|
|
venue = "" |
|
|
y = year_from_any(head_txt) |
|
|
m = DOI_RX.search(head_txt) or DOI_RX.search(body_txt) |
|
|
doi = m.group(1) if m else None |
|
|
meta = { |
|
|
"title": _clean(title), |
|
|
"authors": authors, |
|
|
"venue": _clean(venue), |
|
|
"year": y, |
|
|
"identifier": {"doi": doi, "url": None} |
|
|
} |
|
|
if body_pages < 5: |
|
|
warnings.append("Only a small portion of the PDF text was extracted; Accuracy/Purpose may be provisional.") |
|
|
return meta, body_txt[:20000], warnings |
|
|
except Exception as e: |
|
|
return {}, "", [f"Failed to parse PDF: {e}"] |
|
|
|
|
|
def fetch_semantic_scholar(doi: str): |
|
|
"""Fetch enrichment from Semantic Scholar Graph API by DOI.""" |
|
|
if not doi: |
|
|
return {}, ["No DOI provided"] |
|
|
base = f"https://api.semanticscholar.org/graph/v1/paper/DOI:{requests.utils.quote(doi)}" |
|
|
fields = ",".join([ |
|
|
"title","year","publicationDate","journal","url", |
|
|
"isOpenAccess","openAccessPdf","citationCount","influentialCitationCount", |
|
|
"authors.name","fieldsOfStudy","publicationTypes" |
|
|
]) |
|
|
headers = {"User-Agent":"CRAAPBot"} |
|
|
if S2_API_KEY: |
|
|
headers["x-api-key"] = S2_API_KEY |
|
|
try: |
|
|
r = requests.get(base, params={"fields":fields}, headers=headers, timeout=12) |
|
|
if r.status_code == 404: |
|
|
return {}, [] |
|
|
r.raise_for_status() |
|
|
p = r.json() |
|
|
enrich = { |
|
|
"s2": { |
|
|
"title": p.get("title"), |
|
|
"year": p.get("year"), |
|
|
"publicationDate": p.get("publicationDate"), |
|
|
"journal": (p.get("journal") or {}).get("name"), |
|
|
"url": p.get("url"), |
|
|
"isOpenAccess": p.get("isOpenAccess"), |
|
|
"openAccessPdf": (p.get("openAccessPdf") or {}).get("url"), |
|
|
"citationCount": p.get("citationCount"), |
|
|
"influentialCitationCount": p.get("influentialCitationCount"), |
|
|
"authors": [a.get("name") for a in (p.get("authors") or []) if a.get("name")], |
|
|
"fieldsOfStudy": p.get("fieldsOfStudy"), |
|
|
"publicationTypes": p.get("publicationTypes"), |
|
|
} |
|
|
} |
|
|
return enrich, [] |
|
|
except Exception as e: |
|
|
return {}, [f"Semantic Scholar lookup failed: {e}"] |
|
|
|
|
|
def score_currency(year: Optional[int]): |
|
|
if not year: |
|
|
return 2, "Publication year unknown.", ["Could not find a clear date; treat with caution."] |
|
|
age = max(0, THIS_YEAR - year) |
|
|
if age <= 2: return 5, f"Published in {year} (β€2 years old).", ["Recent for fast-moving fields."] |
|
|
if age <= 5: return 4, f"Published in {year} (~{age} years old).", [] |
|
|
if age <= 10: return 3, f"Published in {year} (~{age} years old).", [] |
|
|
return 2, f"Published in {year} (>10 years old).", ["Potentially outdated."] |
|
|
|
|
|
|
|
|
|
|
|
def score_authority(meta: Dict[str,Any]): |
|
|
score = 1 |
|
|
notes = [] |
|
|
if meta.get("venue"): |
|
|
score += 1; notes.append(f"Venue: {meta['venue']}.") |
|
|
if meta.get("identifier",{}).get("doi"): |
|
|
score += 1; notes.append("Has DOI.") |
|
|
if meta.get("authors"): |
|
|
a_count = len(meta["authors"]) |
|
|
if a_count >= 3: score += 1 |
|
|
notes.append(f"Authors: {a_count}.") |
|
|
return min(score,5), "; ".join(notes) if notes else "Insufficient venue/author info." |
|
|
|
|
|
def score_accuracy(text_excerpt: str): |
|
|
keys_present = sum(1 for k in ["methods","materials","results","limitations","confidence interval","validation","dataset","sample size"] if k in text_excerpt.lower()) |
|
|
if not text_excerpt: |
|
|
return 2, "No body text available; cannot inspect methods." |
|
|
if keys_present >= 5: return 5, "Detailed methodological cues detected (methods/results/validation/etc.)." |
|
|
if keys_present >= 3: return 4, "Some methodological cues present." |
|
|
if keys_present >= 1: return 3, "Limited methodological signals." |
|
|
return 2, "Minimal methodological detail detected (likely a commentary/overview)." |
|
|
|
|
|
def score_purpose(text_excerpt: str): |
|
|
lower = text_excerpt.lower() |
|
|
bias_hits = any(w in lower for w in ["sponsored", "advertisement", "marketing"]) |
|
|
conflicts = "conflict of interest" in lower or "competing interest" in lower |
|
|
funding = "funding" in lower or "grant" in lower |
|
|
if bias_hits: |
|
|
return 2, "Potential promotional language detected." |
|
|
if conflicts and not funding: |
|
|
return 3, "Conflicts noted, funding unclear." |
|
|
if funding or conflicts: |
|
|
return 4, "Academic tone with disclosures/funding statements." |
|
|
return 4, "Academic/educational purpose inferred." |
|
|
|
|
|
def score_relevance(assignment_context: str, meta: Dict[str,Any], text_excerpt: str): |
|
|
if not assignment_context: |
|
|
return 4, "General relevance assumed (no assignment context provided)." |
|
|
ctx = assignment_context.lower() |
|
|
hay = (meta.get("title","") + " " + text_excerpt).lower() |
|
|
hits = sum(1 for tok in set(re.findall(r"[a-zA-Z]{4,}", ctx)) if tok in hay) |
|
|
if hits >= 6: return 5, "Strong topical overlap with assignment context." |
|
|
if hits >= 3: return 4, "Good topical overlap." |
|
|
if hits >= 1: return 3, "Partial topical overlap." |
|
|
return 2, "Low topical overlap; may be tangential." |
|
|
|
|
|
def aggregate_scores(meta: Dict[str,Any], text: str, assignment_context: str, provisional: bool): |
|
|
currency_score, currency_evd, currency_checks = score_currency(meta.get("year")) |
|
|
authority_score, authority_evd = score_authority(meta) |
|
|
accuracy_score, accuracy_evd = score_accuracy(text) |
|
|
purpose_score, purpose_evd = score_purpose(text) |
|
|
relevance_score, relevance_evd = score_relevance(assignment_context, meta, text) |
|
|
if provisional: |
|
|
accuracy_score = min(accuracy_score, 3) |
|
|
purpose_score = min(purpose_score, 4) |
|
|
craap = { |
|
|
"Currency": {"score": currency_score, "evidence": currency_evd, "checks": currency_checks}, |
|
|
"Relevance": {"score": relevance_score, "evidence": relevance_evd}, |
|
|
"Authority": {"score": authority_score, "evidence": authority_evd}, |
|
|
"Accuracy": {"score": accuracy_score, "evidence": accuracy_evd}, |
|
|
"Purpose": {"score": purpose_score, "evidence": purpose_evd} |
|
|
} |
|
|
avg = round(sum(v["score"] for v in craap.values())/5, 2) |
|
|
verdict = "use" if avg >= 4.0 else ("use with caution" if avg >= 2.5 else "avoid") |
|
|
return {"metadata": meta, "craap": craap, "overall": {"average": avg, "verdict": verdict}} |
|
|
|
|
|
|
|
|
INDEX_HTML = """ |
|
|
<!doctype html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="utf-8"/> |
|
|
<title>CRAAP Bot Β· Flask</title> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1"> |
|
|
|
|
|
<style> |
|
|
:root{ |
|
|
--bg:#f8fafc; |
|
|
--card:#ffffff; |
|
|
--ink:#0f172a; |
|
|
--muted:#64748b; |
|
|
--line:#e2e8f0; |
|
|
--brand:#111827; |
|
|
--accent:#2563eb; |
|
|
--warn-bg:#fff7ed; |
|
|
--warn-line:#fed7aa; |
|
|
--code-bg:#0b1020; |
|
|
--code-ink:#d7e7ff; |
|
|
--ring:#93c5fd; |
|
|
--shadow:0 1px 2px rgba(0,0,0,.05), 0 10px 16px rgba(2,6,23,.04); |
|
|
} |
|
|
@media (prefers-color-scheme: dark){ |
|
|
:root{ |
|
|
--bg:#0b1220; |
|
|
--card:#0f172a; |
|
|
--ink:#e5e7eb; |
|
|
--muted:#94a3b8; |
|
|
--line:#1f2a44; |
|
|
--brand:#e5e7eb; |
|
|
--accent:#60a5fa; |
|
|
--warn-bg:#2b1f12; |
|
|
--warn-line:#9a5a25; |
|
|
--code-bg:#030712; |
|
|
--code-ink:#d7e7ff; |
|
|
--ring:#2563eb; |
|
|
--shadow:0 1px 2px rgba(0,0,0,.4), 0 12px 20px rgba(0,0,0,.35); |
|
|
} |
|
|
} |
|
|
|
|
|
*{box-sizing:border-box} |
|
|
html,body{height:100%} |
|
|
body{ |
|
|
margin:0; |
|
|
background:var(--bg); |
|
|
color:var(--ink); |
|
|
font:16px/1.55 system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, Apple Color Emoji, Segoe UI Emoji, Noto Color Emoji, sans-serif; |
|
|
} |
|
|
|
|
|
.wrap{max-width:980px;margin:2.2rem auto;padding:0 1rem} |
|
|
header{ |
|
|
padding:1.25rem 1rem 1rem; |
|
|
border-radius:16px; |
|
|
background:linear-gradient(135deg, rgba(37,99,235,.10), rgba(2,6,23,.03)); |
|
|
border:1px solid var(--line); |
|
|
box-shadow:var(--shadow); |
|
|
} |
|
|
header h1{margin:0 0 .35rem;font-weight:800;letter-spacing:.2px} |
|
|
header p{margin:.25rem 0 0;color:var(--muted)} |
|
|
|
|
|
.tag{ |
|
|
display:inline-flex;align-items:center;gap:.4rem; |
|
|
padding:.2rem .6rem;margin-top:.5rem;margin-right:.5rem; |
|
|
border:1px solid var(--line);border-radius:999px;color:var(--muted);font-size:.85rem |
|
|
} |
|
|
|
|
|
.card{ |
|
|
background:var(--card);border:1px solid var(--line);border-radius:16px; |
|
|
padding:1.1rem 1.2rem;margin:1rem 0;box-shadow:var(--shadow) |
|
|
} |
|
|
|
|
|
label{display:block;font-weight:650;margin:.65rem 0 .35rem} |
|
|
input[type="text"], input[type="file"]{ |
|
|
width:100%;padding:.7rem .8rem;border:1px solid var(--line);border-radius:12px;background:transparent;color:var(--ink); |
|
|
outline:none;transition:border .15s, box-shadow .15s |
|
|
} |
|
|
input[type="text"]:focus, input[type="file"]:focus{ |
|
|
border-color:var(--accent); box-shadow:0 0 0 3px color-mix(in srgb, var(--ring) 35%, transparent); |
|
|
} |
|
|
|
|
|
.btn{ |
|
|
display:inline-block; background:var(--brand); color:#fff; text-decoration:none; |
|
|
border:0; padding:.6rem .9rem; border-radius:10px; cursor:pointer; |
|
|
transition:transform .06s ease, opacity .15s ease; |
|
|
margin:.25rem .35rem .25rem 0; font-weight:600; font-size:.95rem |
|
|
} |
|
|
.btn:hover{opacity:.92; transform:translateY(-1px)} |
|
|
.btn:focus{outline:3px solid color-mix(in srgb, var(--ring) 45%, transparent); outline-offset:2px} |
|
|
.btn--ghost{ |
|
|
background:transparent;color:var(--ink);border:1px solid var(--line) |
|
|
} |
|
|
|
|
|
.muted{color:var(--muted)} |
|
|
.warn{padding:.7rem .9rem;background:var(--warn-bg);border:1px solid var(--warn-line);border-radius:12px;margin:.8rem 0} |
|
|
|
|
|
ul{padding-left:1.2rem;margin:.6rem 0} |
|
|
li{margin:.25rem 0} |
|
|
|
|
|
pre{ |
|
|
background:var(--code-bg);color:var(--code-ink); |
|
|
padding:1rem;border-radius:12px;overflow:auto;border:1px solid #0b1220; |
|
|
} |
|
|
|
|
|
details summary{cursor:pointer; list-style:none} |
|
|
details summary::marker, details summary::-webkit-details-marker{display:none} |
|
|
details summary{display:flex; align-items:center; gap:.5rem; font-weight:700} |
|
|
details[open] summary{opacity:.85} |
|
|
|
|
|
.grid{ |
|
|
display:grid; gap:1rem; |
|
|
grid-template-columns:1fr; |
|
|
} |
|
|
@media (min-width:860px){ |
|
|
.grid{grid-template-columns:1fr 1fr} |
|
|
} |
|
|
|
|
|
.meta{display:flex; flex-wrap:wrap; gap:.4rem .6rem; align-items:center} |
|
|
.pill{ |
|
|
display:inline-flex; align-items:center; gap:.4rem; |
|
|
border:1px solid var(--line); border-radius:999px; padding:.15rem .55rem; color:var(--muted); font-size:.85rem |
|
|
} |
|
|
</style> |
|
|
</head> |
|
|
|
|
|
<body> |
|
|
<div class="wrap"> |
|
|
<header> |
|
|
<h1>CRAAP Bot</h1> |
|
|
<p class="muted">URL/DOI or PDF β quick quality check for scholarly sources</p> |
|
|
<span class="tag">By: NADYA W</span> |
|
|
</header> |
|
|
|
|
|
<div class="card"> |
|
|
<form method="POST" action="{{ url_for('analyze') }}" enctype="multipart/form-data"> |
|
|
<label for="paper_source">URL or DOI</label> |
|
|
<input id="paper_source" type="text" name="paper_source" placeholder="https://doi.org/10.xxxx/..."/> |
|
|
|
|
|
<label for="pdf">Or upload PDF</label> |
|
|
<input id="pdf" type="file" name="pdf" accept="application/pdf"/> |
|
|
|
|
|
<label for="assignment_context">Assignment context (optional)</label> |
|
|
<input id="assignment_context" type="text" name="assignment_context" placeholder="e.g., AI for zoonotic disease 2023β2025"/> |
|
|
|
|
|
<div style="margin-top:.9rem"> |
|
|
<button class="btn" type="submit">Analyze</button> |
|
|
<a class="btn btn--ghost" href="{{ url_for('index') }}">Reset</a> |
|
|
</div> |
|
|
<p class="muted" style="margin:.6rem 0 0">Tip: DOI or full PDF gives best results. Partial PDFs limit Accuracy/Purpose.</p> |
|
|
</form> |
|
|
</div> |
|
|
|
|
|
{% if result %} |
|
|
{% if warnings %} |
|
|
<div class="warn">β οΈ {{ warnings|join(' Β· ') }}</div> |
|
|
{% endif %} |
|
|
|
|
|
<div class="card"> |
|
|
<h2 style="margin-top:0">CRAAP Evaluation Summary</h2> |
|
|
|
|
|
<p style="margin:.25rem 0 0"><strong>{{ result.metadata.title or '[unknown title]' }}</strong></p> |
|
|
<p class="muted" style="margin:.25rem 0 .75rem"> |
|
|
{{ (result.metadata.authors or [])|join(', ') }} Β· {{ result.metadata.venue or 'unknown venue' }}{% if result.metadata.year %} Β· {{ result.metadata.year }}{% endif %} |
|
|
</p> |
|
|
|
|
|
{% set s2 = result.enrichment.s2 if result.enrichment else None %} |
|
|
{% set doi = result.metadata.identifier.doi if result.metadata and result.metadata.identifier else None %} |
|
|
{% set src_url = result.metadata.identifier.url if result.metadata and result.metadata.identifier else None %} |
|
|
|
|
|
<p> |
|
|
{% if doi %} |
|
|
<a class="btn" href="https://doi.org/{{ doi }}" target="_blank" rel="noopener">Open DOI</a> |
|
|
{% elif src_url %} |
|
|
<a class="btn" href="{{ src_url }}" target="_blank" rel="noopener">Open Source</a> |
|
|
{% endif %} |
|
|
|
|
|
{% if s2 and s2.url %} |
|
|
<a class="btn" href="{{ s2.url }}" target="_blank" rel="noopener">Semantic Scholar</a> |
|
|
{% endif %} |
|
|
|
|
|
{% if s2 and s2.openAccessPdf %} |
|
|
<a class="btn" href="{{ s2.openAccessPdf }}" target="_blank" rel="noopener">Open Access PDF</a> |
|
|
{% endif %} |
|
|
|
|
|
<a class="btn btn--ghost" href="https://scholar.google.com/scholar?q={{ (result.metadata.title or doi or '')|urlencode }}" target="_blank" rel="noopener">Google Scholar</a> |
|
|
</p> |
|
|
|
|
|
{% if s2 %} |
|
|
<div class="meta" style="margin:.25rem 0 .75rem"> |
|
|
{% if s2.journal %}<span class="pill">π {{ s2.journal }}</span>{% endif %} |
|
|
{% if s2.publicationDate %}<span class="pill">π {{ s2.publicationDate }}</span>{% endif %} |
|
|
<span class="pill">π Citations: {{ s2.citationCount if s2.citationCount is not none else "?" }}</span> |
|
|
{% if s2.influentialCitationCount is not none %}<span class="pill">β Influential: {{ s2.influentialCitationCount }}</span>{% endif %} |
|
|
{% if s2.isOpenAccess %}<span class="pill">π’ Open Access</span>{% endif %} |
|
|
{% if s2.publicationTypes %}<span class="pill">π§ {{ s2.publicationTypes|join(', ') }}</span>{% endif %} |
|
|
</div> |
|
|
{% endif %} |
|
|
|
|
|
<div class="grid"> |
|
|
<div class="card" style="margin:0"> |
|
|
<h3 style="margin-top:0">Scores</h3> |
|
|
<ul> |
|
|
<li><strong>Currency</strong>: {{ result.craap.Currency.score }}/5 β {{ result.craap.Currency.evidence }}</li> |
|
|
<li><strong>Relevance</strong>: {{ result.craap.Relevance.score }}/5 β {{ result.craap.Relevance.evidence }}</li> |
|
|
<li><strong>Authority</strong>: {{ result.craap.Authority.score }}/5 β {{ result.craap.Authority.evidence }}</li> |
|
|
<li><strong>Accuracy</strong>: {{ result.craap.Accuracy.score }}/5 β {{ result.craap.Accuracy.evidence }}</li> |
|
|
<li><strong>Purpose</strong>: {{ result.craap.Purpose.score }}/5 β {{ result.craap.Purpose.evidence }}</li> |
|
|
</ul> |
|
|
<p><strong>Overall:</strong> {{ result.overall.average }} β <em>{{ result.overall.verdict }}</em></p> |
|
|
</div> |
|
|
|
|
|
<div class="card" style="margin:0"> |
|
|
<h3 style="margin-top:0">What to verify next</h3> |
|
|
<ol> |
|
|
<li>Confirm publication date & peer-review at the DOI/URL.</li> |
|
|
<li>Skim methods/results for sample size, validation, limitations.</li> |
|
|
<li>Check author affiliations and profiles (Semantic Scholar/ORCID).</li> |
|
|
<li>Look for funding/conflict-of-interest statements.</li> |
|
|
<li>Search for newer papers (last 1β2 years) that cite or challenge it.</li> |
|
|
</ol> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="card"> |
|
|
<details> |
|
|
<summary>View raw JSON</summary> |
|
|
<pre>{{ result | tojson(indent=2) }}</pre> |
|
|
</details> |
|
|
</div> |
|
|
{% endif %} |
|
|
</div> |
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
|
def index(): |
|
|
return render_template_string(INDEX_HTML, result=None, warnings=None) |
|
|
|
|
|
@app.route("/analyze", methods=["POST"]) |
|
|
def analyze(): |
|
|
paper_source = _clean(request.form.get("paper_source", "")) |
|
|
assignment_context = _clean(request.form.get("assignment_context", "")) |
|
|
provisional = False |
|
|
warnings: List[str] = [] |
|
|
meta, text = {}, "" |
|
|
if paper_source: |
|
|
meta, text, w = fetch_url_metadata(paper_source) |
|
|
warnings.extend(w) |
|
|
elif "pdf" in request.files and request.files["pdf"].filename: |
|
|
meta, text, w = extract_pdf_text_and_guess_meta(request.files["pdf"]) |
|
|
warnings.extend(w); provisional = True |
|
|
else: |
|
|
return redirect(url_for("index")) |
|
|
result = aggregate_scores(meta, text, assignment_context, provisional or bool(warnings)) |
|
|
doi = (meta.get("identifier") or {}).get("doi") |
|
|
enrichment, ewarns = fetch_semantic_scholar(doi) |
|
|
result["enrichment"] = enrichment |
|
|
warnings.extend(ewarns) |
|
|
if not text: |
|
|
warnings.append("Full text not available β Accuracy/Purpose are provisional. Provide a DOI/URL or full PDF for deeper evaluation.") |
|
|
return render_template_string(INDEX_HTML, result=result, warnings=warnings) |
|
|
|
|
|
@app.route("/api/analyze", methods=["POST"]) |
|
|
def api_analyze(): |
|
|
data = request.json or {} |
|
|
paper_source = _clean(data.get("paper_source","")) |
|
|
assignment_context = _clean(data.get("assignment_context","")) |
|
|
meta, text, warnings = ({}, "", []) |
|
|
provisional = False |
|
|
if paper_source: |
|
|
meta, text, warnings = fetch_url_metadata(paper_source) |
|
|
else: |
|
|
return jsonify({"error":"Provide paper_source (URL/DOI) or use /analyze form for PDF upload"}), 400 |
|
|
result = aggregate_scores(meta, text, assignment_context, provisional or bool(warnings)) |
|
|
doi = (meta.get("identifier") or {}).get("doi") |
|
|
enrichment, ewarns = fetch_semantic_scholar(doi) |
|
|
result["enrichment"] = enrichment |
|
|
warnings.extend(ewarns) |
|
|
return jsonify({"result": result, "warnings": warnings}) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=8000, debug=True) |
|
|
|