Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import io, re, json, datetime
|
| 2 |
from typing import Dict, Any, List, Tuple, Optional
|
| 3 |
|
| 4 |
from flask import Flask, request, jsonify, render_template_string, redirect, url_for
|
|
@@ -11,10 +11,10 @@ app = Flask(__name__)
|
|
| 11 |
CORS(app, resources={r"/api/*": {"origins": "*"}})
|
| 12 |
|
| 13 |
app.config["MAX_CONTENT_LENGTH"] = 16 * 1024 * 1024 # 16 MB upload cap
|
|
|
|
| 14 |
THIS_YEAR = datetime.date.today().year
|
| 15 |
DOI_RX = re.compile(r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.I)
|
| 16 |
|
| 17 |
-
# ----------------- Helpers
|
| 18 |
def _clean(s: Optional[str]) -> str:
|
| 19 |
return (s or "").strip()
|
| 20 |
|
|
@@ -27,8 +27,7 @@ def year_from_any(x: str) -> Optional[int]:
|
|
| 27 |
return y
|
| 28 |
return None
|
| 29 |
|
| 30 |
-
def fetch_url_metadata(url_or_doi: str)
|
| 31 |
-
"""Return (metadata, fulltext_excerpt, warnings) for a URL or DOI."""
|
| 32 |
warnings = []
|
| 33 |
url = url_or_doi
|
| 34 |
m = DOI_RX.search(url_or_doi)
|
|
@@ -68,8 +67,7 @@ def fetch_url_metadata(url_or_doi: str) -> Tuple[Dict[str, Any], str, List[str]]
|
|
| 68 |
text_excerpt = (abst or "")[:4000]
|
| 69 |
return meta, text_excerpt, warnings
|
| 70 |
|
| 71 |
-
def extract_pdf_text_and_guess_meta(file_storage)
|
| 72 |
-
"""Return (metadata, body_text, warnings)."""
|
| 73 |
warnings = []
|
| 74 |
try:
|
| 75 |
data = file_storage.read()
|
|
@@ -111,48 +109,7 @@ def extract_pdf_text_and_guess_meta(file_storage) -> Tuple[Dict[str, Any], str,
|
|
| 111 |
except Exception as e:
|
| 112 |
return {}, "", [f"Failed to parse PDF: {e}"]
|
| 113 |
|
| 114 |
-
|
| 115 |
-
SEM_SCH_FIELDS = (
|
| 116 |
-
"title,year,venue,url,isOpenAccess,openAccessPdf,"
|
| 117 |
-
"citationCount,referenceCount,publicationTypes,externalIds,"
|
| 118 |
-
"authors.name,authors.hIndex"
|
| 119 |
-
)
|
| 120 |
-
|
| 121 |
-
def fetch_semantic_scholar(doi: Optional[str]) -> Tuple[Dict[str, Any], List[str]]:
|
| 122 |
-
if not doi:
|
| 123 |
-
return {}, ["Semantic Scholar: DOI missing; lookup skipped."]
|
| 124 |
-
url = f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}"
|
| 125 |
-
try:
|
| 126 |
-
r = requests.get(url, params={"fields": SEM_SCH_FIELDS}, timeout=15)
|
| 127 |
-
if r.status_code == 404:
|
| 128 |
-
return {}, [f"Semantic Scholar: no record for DOI {doi}."]
|
| 129 |
-
r.raise_for_status()
|
| 130 |
-
return r.json(), []
|
| 131 |
-
except Exception as e:
|
| 132 |
-
return {}, [f"Semantic Scholar error: {e}"]
|
| 133 |
-
|
| 134 |
-
def authority_boost_with_semantic(meta: Dict[str,Any], sem: Dict[str,Any]) -> Tuple[int, str]:
|
| 135 |
-
if not sem:
|
| 136 |
-
return 0, ""
|
| 137 |
-
bonus = 0
|
| 138 |
-
notes = []
|
| 139 |
-
cit = sem.get("citationCount")
|
| 140 |
-
if isinstance(cit, int):
|
| 141 |
-
if cit >= 50: bonus += 2
|
| 142 |
-
elif cit >= 10: bonus += 1
|
| 143 |
-
notes.append(f"S2 citations: {cit}.")
|
| 144 |
-
# Author h-index
|
| 145 |
-
try:
|
| 146 |
-
auths = sem.get("authors") or []
|
| 147 |
-
max_h = max([a.get("hIndex", 0) or 0 for a in auths] or [0])
|
| 148 |
-
if max_h >= 30: bonus += 1
|
| 149 |
-
notes.append(f"Top author h-index: {max_h}.")
|
| 150 |
-
except Exception:
|
| 151 |
-
pass
|
| 152 |
-
return min(bonus, 2), "; ".join(notes)
|
| 153 |
-
|
| 154 |
-
# ----------------- Scoring
|
| 155 |
-
def score_currency(year: Optional[int]) -> Tuple[int, str, List[str]]:
|
| 156 |
if not year:
|
| 157 |
return 2, "Publication year unknown.", ["Could not find a clear date; treat with caution."]
|
| 158 |
age = max(0, THIS_YEAR - year)
|
|
@@ -161,7 +118,7 @@ def score_currency(year: Optional[int]) -> Tuple[int, str, List[str]]:
|
|
| 161 |
if age <= 10: return 3, f"Published in {year} (~{age} years old).", []
|
| 162 |
return 2, f"Published in {year} (>10 years old).", ["Potentially outdated."]
|
| 163 |
|
| 164 |
-
def score_authority(meta: Dict[str,Any]
|
| 165 |
score = 1
|
| 166 |
notes = []
|
| 167 |
if meta.get("venue"):
|
|
@@ -172,13 +129,9 @@ def score_authority(meta: Dict[str,Any], sem: Optional[Dict[str,Any]] = None) ->
|
|
| 172 |
a_count = len(meta["authors"])
|
| 173 |
if a_count >= 3: score += 1
|
| 174 |
notes.append(f"Authors: {a_count}.")
|
| 175 |
-
if sem:
|
| 176 |
-
b, bnotes = authority_boost_with_semantic(meta, sem)
|
| 177 |
-
score += b
|
| 178 |
-
if bnotes: notes.append(bnotes)
|
| 179 |
return min(score,5), "; ".join(notes) if notes else "Insufficient venue/author info."
|
| 180 |
|
| 181 |
-
def score_accuracy(text_excerpt: str)
|
| 182 |
keys_present = sum(1 for k in ["methods","materials","results","limitations","confidence interval","validation","dataset","sample size"] if k in text_excerpt.lower())
|
| 183 |
if not text_excerpt:
|
| 184 |
return 2, "No body text available; cannot inspect methods."
|
|
@@ -187,7 +140,7 @@ def score_accuracy(text_excerpt: str) -> Tuple[int, str]:
|
|
| 187 |
if keys_present >= 1: return 3, "Limited methodological signals."
|
| 188 |
return 2, "Minimal methodological detail detected (likely a commentary/overview)."
|
| 189 |
|
| 190 |
-
def score_purpose(text_excerpt: str)
|
| 191 |
lower = text_excerpt.lower()
|
| 192 |
bias_hits = any(w in lower for w in ["sponsored", "advertisement", "marketing"])
|
| 193 |
conflicts = "conflict of interest" in lower or "competing interest" in lower
|
|
@@ -200,7 +153,7 @@ def score_purpose(text_excerpt: str) -> Tuple[int, str]:
|
|
| 200 |
return 4, "Academic tone with disclosures/funding statements."
|
| 201 |
return 4, "Academic/educational purpose inferred."
|
| 202 |
|
| 203 |
-
def score_relevance(assignment_context: str, meta: Dict[str,Any], text_excerpt: str)
|
| 204 |
if not assignment_context:
|
| 205 |
return 4, "General relevance assumed (no assignment context provided)."
|
| 206 |
ctx = assignment_context.lower()
|
|
@@ -211,9 +164,9 @@ def score_relevance(assignment_context: str, meta: Dict[str,Any], text_excerpt:
|
|
| 211 |
if hits >= 1: return 3, "Partial topical overlap."
|
| 212 |
return 2, "Low topical overlap; may be tangential."
|
| 213 |
|
| 214 |
-
def
|
| 215 |
currency_score, currency_evd, currency_checks = score_currency(meta.get("year"))
|
| 216 |
-
authority_score, authority_evd = score_authority(meta
|
| 217 |
accuracy_score, accuracy_evd = score_accuracy(text)
|
| 218 |
purpose_score, purpose_evd = score_purpose(text)
|
| 219 |
relevance_score, relevance_evd = score_relevance(assignment_context, meta, text)
|
|
@@ -229,25 +182,10 @@ def aggregate_scores_with_sem(meta: Dict[str,Any], text: str, assignment_context
|
|
| 229 |
}
|
| 230 |
avg = round(sum(v["score"] for v in craap.values())/5, 2)
|
| 231 |
verdict = "use" if avg >= 4.0 else ("use with caution" if avg >= 2.5 else "avoid")
|
| 232 |
-
return {
|
| 233 |
-
"metadata": meta,
|
| 234 |
-
"craap": craap,
|
| 235 |
-
"overall": {"average": avg, "verdict": verdict},
|
| 236 |
-
"external": {
|
| 237 |
-
"semantic_scholar": {
|
| 238 |
-
"url": sem.get("url"),
|
| 239 |
-
"venue": sem.get("venue"),
|
| 240 |
-
"year": sem.get("year"),
|
| 241 |
-
"citationCount": sem.get("citationCount"),
|
| 242 |
-
"referenceCount": sem.get("referenceCount"),
|
| 243 |
-
"openAccessPdf": (sem.get("openAccessPdf") or {}).get("url") if sem else None
|
| 244 |
-
}
|
| 245 |
-
}
|
| 246 |
-
}
|
| 247 |
|
| 248 |
-
# ----------------- UI
|
| 249 |
INDEX_HTML = """
|
| 250 |
-
<!doctype html><html><head><meta charset="utf-8"/><title>CRAAP Bot</title>
|
| 251 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 252 |
<style>
|
| 253 |
body{font:16px system-ui,Segoe UI,Roboto,sans-serif;max-width:880px;margin:2rem auto;padding:0 1rem}
|
|
@@ -255,17 +193,18 @@ form,.card{border:1px solid #e5e7eb;border-radius:12px;padding:1rem;margin:1rem
|
|
| 255 |
label{display:block;font-weight:600;margin:.5rem 0 .25rem}
|
| 256 |
input[type="text"],textarea{width:100%;padding:.6rem .7rem;border:1px solid #d1d5db;border-radius:8px}
|
| 257 |
input[type="file"]{margin:.25rem 0 .75rem}button{background:#111827;color:#fff;border:0;padding:.6rem 1rem;border-radius:8px;cursor:pointer}
|
| 258 |
-
.muted{color:#6b7280}.warn{padding:.6rem .8rem;background:#fff7ed;border:1px solid #fed7aa;border-radius:8px;margin:.5rem 0}
|
| 259 |
.tag{display:inline-block;padding:.1rem .5rem;border-radius:999px;border:1px solid #d1d5db;margin-right:.4rem}
|
| 260 |
</style></head><body>
|
| 261 |
-
<header><h1>CRAAP Bot</h1><span class="tag">By:
|
| 262 |
<div class="card"><form method="POST" action="{{ url_for('analyze') }}" enctype="multipart/form-data">
|
| 263 |
<label>URL or DOI</label><input type="text" name="paper_source" placeholder="https://doi.org/10.xxxx/..."/>
|
| 264 |
<label>Or upload PDF</label><input type="file" name="pdf" accept="application/pdf"/>
|
| 265 |
-
<label>Assignment context (optional)</label><input type="text" name="assignment_context" placeholder="e.g.,
|
| 266 |
<button type="submit">Analyze</button></form>
|
| 267 |
<p class="muted">Tip: DOI or full PDF gives best results. Partial PDFs limit Accuracy/Purpose.</p></div>
|
| 268 |
{% if result %}{% if warnings %}<div class="warn">⚠️ {{ warnings|join(' · ') }}</div>{% endif %}
|
|
|
|
| 269 |
<div class="card"><h2>CRAAP Evaluation Summary</h2>
|
| 270 |
<p><strong>{{ result.metadata.title or '[unknown title]' }}</strong></p>
|
| 271 |
<p class="muted">{{ (result.metadata.authors or [])|join(', ') }} · {{ result.metadata.venue or 'unknown venue' }}{% if result.metadata.year %} · {{ result.metadata.year }}{% endif %}</p>
|
|
@@ -275,32 +214,55 @@ input[type="file"]{margin:.25rem 0 .75rem}button{background:#111827;color:#fff;b
|
|
| 275 |
<li><strong>Authority</strong>: {{ result.craap.Authority.score }}/5 — {{ result.craap.Authority.evidence }}</li>
|
| 276 |
<li><strong>Accuracy</strong>: {{ result.craap.Accuracy.score }}/5 — {{ result.craap.Accuracy.evidence }}</li>
|
| 277 |
<li><strong>Purpose</strong>: {{ result.craap.Purpose.score }}/5 — {{ result.craap.Purpose.evidence }}</li>
|
| 278 |
-
</ul><p><strong>Overall:</strong> {{ result.overall.average }} — <em>{{ result.overall.verdict }}</em></p>
|
| 279 |
-
|
| 280 |
-
<div class="card"><h3>Research signals</h3>
|
| 281 |
-
<ul>
|
| 282 |
-
<li><strong>Semantic Scholar citations:</strong>
|
| 283 |
-
{% if result.external.semantic_scholar.citationCount is not none %}
|
| 284 |
-
{{ result.external.semantic_scholar.citationCount }}
|
| 285 |
-
{% else %} n/a {% endif %}
|
| 286 |
-
{% if result.external.semantic_scholar.url %} · <a href="{{ result.external.semantic_scholar.url }}" target="_blank">S2 record</a>{% endif %}
|
| 287 |
-
{% if result.external.semantic_scholar.openAccessPdf %} · <a href="{{ result.external.semantic_scholar.openAccessPdf }}" target="_blank">OA PDF</a>{% endif %}
|
| 288 |
-
</li>
|
| 289 |
-
<li><strong>Venue/Year (S2):</strong>
|
| 290 |
-
{{ result.external.semantic_scholar.venue or 'n/a' }}{% if result.external.semantic_scholar.year %} · {{ result.external.semantic_scholar.year }}{% endif %}
|
| 291 |
-
</li>
|
| 292 |
-
</ul></div>
|
| 293 |
-
|
| 294 |
-
<div class="card"><h3>What to verify next</h3>
|
| 295 |
<ol>
|
| 296 |
<li>Confirm publication date & peer-review at the DOI/URL.</li>
|
| 297 |
-
<li>
|
| 298 |
-
<li>
|
| 299 |
-
<li>
|
| 300 |
-
<li>
|
| 301 |
-
</ol>
|
| 302 |
-
{% endif %}
|
| 303 |
-
</body></html>
|
| 304 |
"""
|
| 305 |
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io, re, json, datetime
|
| 2 |
from typing import Dict, Any, List, Tuple, Optional
|
| 3 |
|
| 4 |
from flask import Flask, request, jsonify, render_template_string, redirect, url_for
|
|
|
|
| 11 |
CORS(app, resources={r"/api/*": {"origins": "*"}})
|
| 12 |
|
| 13 |
app.config["MAX_CONTENT_LENGTH"] = 16 * 1024 * 1024 # 16 MB upload cap
|
| 14 |
+
|
| 15 |
THIS_YEAR = datetime.date.today().year
|
| 16 |
DOI_RX = re.compile(r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.I)
|
| 17 |
|
|
|
|
| 18 |
def _clean(s: Optional[str]) -> str:
|
| 19 |
return (s or "").strip()
|
| 20 |
|
|
|
|
| 27 |
return y
|
| 28 |
return None
|
| 29 |
|
| 30 |
+
def fetch_url_metadata(url_or_doi: str):
|
|
|
|
| 31 |
warnings = []
|
| 32 |
url = url_or_doi
|
| 33 |
m = DOI_RX.search(url_or_doi)
|
|
|
|
| 67 |
text_excerpt = (abst or "")[:4000]
|
| 68 |
return meta, text_excerpt, warnings
|
| 69 |
|
| 70 |
+
def extract_pdf_text_and_guess_meta(file_storage):
|
|
|
|
| 71 |
warnings = []
|
| 72 |
try:
|
| 73 |
data = file_storage.read()
|
|
|
|
| 109 |
except Exception as e:
|
| 110 |
return {}, "", [f"Failed to parse PDF: {e}"]
|
| 111 |
|
| 112 |
+
def score_currency(year: Optional[int]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
if not year:
|
| 114 |
return 2, "Publication year unknown.", ["Could not find a clear date; treat with caution."]
|
| 115 |
age = max(0, THIS_YEAR - year)
|
|
|
|
| 118 |
if age <= 10: return 3, f"Published in {year} (~{age} years old).", []
|
| 119 |
return 2, f"Published in {year} (>10 years old).", ["Potentially outdated."]
|
| 120 |
|
| 121 |
+
def score_authority(meta: Dict[str,Any]):
|
| 122 |
score = 1
|
| 123 |
notes = []
|
| 124 |
if meta.get("venue"):
|
|
|
|
| 129 |
a_count = len(meta["authors"])
|
| 130 |
if a_count >= 3: score += 1
|
| 131 |
notes.append(f"Authors: {a_count}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
return min(score,5), "; ".join(notes) if notes else "Insufficient venue/author info."
|
| 133 |
|
| 134 |
+
def score_accuracy(text_excerpt: str):
|
| 135 |
keys_present = sum(1 for k in ["methods","materials","results","limitations","confidence interval","validation","dataset","sample size"] if k in text_excerpt.lower())
|
| 136 |
if not text_excerpt:
|
| 137 |
return 2, "No body text available; cannot inspect methods."
|
|
|
|
| 140 |
if keys_present >= 1: return 3, "Limited methodological signals."
|
| 141 |
return 2, "Minimal methodological detail detected (likely a commentary/overview)."
|
| 142 |
|
| 143 |
+
def score_purpose(text_excerpt: str):
|
| 144 |
lower = text_excerpt.lower()
|
| 145 |
bias_hits = any(w in lower for w in ["sponsored", "advertisement", "marketing"])
|
| 146 |
conflicts = "conflict of interest" in lower or "competing interest" in lower
|
|
|
|
| 153 |
return 4, "Academic tone with disclosures/funding statements."
|
| 154 |
return 4, "Academic/educational purpose inferred."
|
| 155 |
|
| 156 |
+
def score_relevance(assignment_context: str, meta: Dict[str,Any], text_excerpt: str):
|
| 157 |
if not assignment_context:
|
| 158 |
return 4, "General relevance assumed (no assignment context provided)."
|
| 159 |
ctx = assignment_context.lower()
|
|
|
|
| 164 |
if hits >= 1: return 3, "Partial topical overlap."
|
| 165 |
return 2, "Low topical overlap; may be tangential."
|
| 166 |
|
| 167 |
+
def aggregate_scores(meta: Dict[str,Any], text: str, assignment_context: str, provisional: bool):
|
| 168 |
currency_score, currency_evd, currency_checks = score_currency(meta.get("year"))
|
| 169 |
+
authority_score, authority_evd = score_authority(meta)
|
| 170 |
accuracy_score, accuracy_evd = score_accuracy(text)
|
| 171 |
purpose_score, purpose_evd = score_purpose(text)
|
| 172 |
relevance_score, relevance_evd = score_relevance(assignment_context, meta, text)
|
|
|
|
| 182 |
}
|
| 183 |
avg = round(sum(v["score"] for v in craap.values())/5, 2)
|
| 184 |
verdict = "use" if avg >= 4.0 else ("use with caution" if avg >= 2.5 else "avoid")
|
| 185 |
+
return {"metadata": meta, "craap": craap, "overall": {"average": avg, "verdict": verdict}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
|
|
|
| 187 |
INDEX_HTML = """
|
| 188 |
+
<!doctype html><html><head><meta charset="utf-8"/><title>CRAAP Bot (Flask)</title>
|
| 189 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 190 |
<style>
|
| 191 |
body{font:16px system-ui,Segoe UI,Roboto,sans-serif;max-width:880px;margin:2rem auto;padding:0 1rem}
|
|
|
|
| 193 |
label{display:block;font-weight:600;margin:.5rem 0 .25rem}
|
| 194 |
input[type="text"],textarea{width:100%;padding:.6rem .7rem;border:1px solid #d1d5db;border-radius:8px}
|
| 195 |
input[type="file"]{margin:.25rem 0 .75rem}button{background:#111827;color:#fff;border:0;padding:.6rem 1rem;border-radius:8px;cursor:pointer}
|
| 196 |
+
pre{background:#0b1020;color:#d7e7ff;padding:1rem;border-radius:12px;overflow:auto}.muted{color:#6b7280}.warn{padding:.6rem .8rem;background:#fff7ed;border:1px solid #fed7aa;border-radius:8px;margin:.5rem 0}
|
| 197 |
.tag{display:inline-block;padding:.1rem .5rem;border-radius:999px;border:1px solid #d1d5db;margin-right:.4rem}
|
| 198 |
</style></head><body>
|
| 199 |
+
<header><h1>CRAAP Bot</h1><span class="tag">By: NADYA W</span></header>
|
| 200 |
<div class="card"><form method="POST" action="{{ url_for('analyze') }}" enctype="multipart/form-data">
|
| 201 |
<label>URL or DOI</label><input type="text" name="paper_source" placeholder="https://doi.org/10.xxxx/..."/>
|
| 202 |
<label>Or upload PDF</label><input type="file" name="pdf" accept="application/pdf"/>
|
| 203 |
+
<label>Assignment context (optional)</label><input type="text" name="assignment_context" placeholder="e.g., AI for zoonotic disease 2023-2025"/>
|
| 204 |
<button type="submit">Analyze</button></form>
|
| 205 |
<p class="muted">Tip: DOI or full PDF gives best results. Partial PDFs limit Accuracy/Purpose.</p></div>
|
| 206 |
{% if result %}{% if warnings %}<div class="warn">⚠️ {{ warnings|join(' · ') }}</div>{% endif %}
|
| 207 |
+
<div class="card"><h2>JSON</h2><pre>{{ result | tojson(indent=2) }}</pre></div>
|
| 208 |
<div class="card"><h2>CRAAP Evaluation Summary</h2>
|
| 209 |
<p><strong>{{ result.metadata.title or '[unknown title]' }}</strong></p>
|
| 210 |
<p class="muted">{{ (result.metadata.authors or [])|join(', ') }} · {{ result.metadata.venue or 'unknown venue' }}{% if result.metadata.year %} · {{ result.metadata.year }}{% endif %}</p>
|
|
|
|
| 214 |
<li><strong>Authority</strong>: {{ result.craap.Authority.score }}/5 — {{ result.craap.Authority.evidence }}</li>
|
| 215 |
<li><strong>Accuracy</strong>: {{ result.craap.Accuracy.score }}/5 — {{ result.craap.Accuracy.evidence }}</li>
|
| 216 |
<li><strong>Purpose</strong>: {{ result.craap.Purpose.score }}/5 — {{ result.craap.Purpose.evidence }}</li>
|
| 217 |
+
</ul><p><strong>Overall:</strong> {{ result.overall.average }} — <em>{{ result.overall.verdict }}</em></p>
|
| 218 |
+
<h3>What to verify next</h3>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
<ol>
|
| 220 |
<li>Confirm publication date & peer-review at the DOI/URL.</li>
|
| 221 |
+
<li>Skim methods/results for sample size, validation, limitations.</li>
|
| 222 |
+
<li>Check author affiliations and profiles (Semantic Scholar/ORCID).</li>
|
| 223 |
+
<li>Look for funding/conflict-of-interest statements.</li>
|
| 224 |
+
<li>Search for newer papers (last 1–2 years) that cite or challenge it.</li>
|
| 225 |
+
</ol>
|
| 226 |
+
</div>{% endif %}</body></html>
|
|
|
|
| 227 |
"""
|
| 228 |
|
| 229 |
+
@app.route("/", methods=["GET"])
|
| 230 |
+
def index():
|
| 231 |
+
return render_template_string(INDEX_HTML, result=None, warnings=None)
|
| 232 |
+
|
| 233 |
+
@app.route("/analyze", methods=["POST"])
|
| 234 |
+
def analyze():
|
| 235 |
+
paper_source = _clean(request.form.get("paper_source", ""))
|
| 236 |
+
assignment_context = _clean(request.form.get("assignment_context", ""))
|
| 237 |
+
provisional = False
|
| 238 |
+
warnings: List[str] = []
|
| 239 |
+
meta, text = {}, ""
|
| 240 |
+
if paper_source:
|
| 241 |
+
meta, text, w = fetch_url_metadata(paper_source)
|
| 242 |
+
warnings.extend(w)
|
| 243 |
+
elif "pdf" in request.files and request.files["pdf"].filename:
|
| 244 |
+
meta, text, w = extract_pdf_text_and_guess_meta(request.files["pdf"])
|
| 245 |
+
warnings.extend(w); provisional = True
|
| 246 |
+
else:
|
| 247 |
+
return redirect(url_for("index"))
|
| 248 |
+
result = aggregate_scores(meta, text, assignment_context, provisional or bool(warnings))
|
| 249 |
+
if not text:
|
| 250 |
+
warnings.append("Full text not available — Accuracy/Purpose are provisional. Provide a DOI/URL or full PDF for deeper evaluation.")
|
| 251 |
+
return render_template_string(INDEX_HTML, result=result, warnings=warnings)
|
| 252 |
+
|
| 253 |
+
@app.route("/api/analyze", methods=["POST"])
|
| 254 |
+
def api_analyze():
|
| 255 |
+
data = request.json or {}
|
| 256 |
+
paper_source = _clean(data.get("paper_source",""))
|
| 257 |
+
assignment_context = _clean(data.get("assignment_context",""))
|
| 258 |
+
meta, text, warnings = ({}, "", [])
|
| 259 |
+
provisional = False
|
| 260 |
+
if paper_source:
|
| 261 |
+
meta, text, warnings = fetch_url_metadata(paper_source)
|
| 262 |
+
else:
|
| 263 |
+
return jsonify({"error":"Provide paper_source (URL/DOI) or use /analyze form for PDF upload"}), 400
|
| 264 |
+
result = aggregate_scores(meta, text, assignment_context, provisional or bool(warnings))
|
| 265 |
+
return jsonify({"result": result, "warnings": warnings})
|
| 266 |
+
|
| 267 |
+
if __name__ == "__main__":
|
| 268 |
+
app.run(host="0.0.0.0", port=8000, debug=True)
|