Abu-Sameer-66 commited on
Commit
91b930f
·
1 Parent(s): 95102c9

feat: section-aware text routing — each module gets relevant paper sections

Browse files

- Replace flat 8000 char truncation with _smart_text()
- Statistics module gets Results + Methods sections
- Citations module gets References + Introduction
- LLM detector gets full paper spread evenly
- Retraction checker gets References section
- Fallback to _truncate() when no headers found
- Version bump to 1.5.0

Files changed (1) hide show
  1. src/scipeerai/api/routes.py +1130 -238
src/scipeerai/api/routes.py CHANGED
@@ -1,4 +1,737 @@
1
- # src/scipeerai/api/routes.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import tempfile
4
  from fastapi import APIRouter, HTTPException, UploadFile, File
@@ -20,19 +753,135 @@ from src.scipeerai.modules.llm_detector import LLMDetector
20
 
21
  router = APIRouter(prefix="/api/v1", tags=["Analysis"])
22
 
23
- # ── Smart text truncationhandles long papers ───────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def _truncate(text: str, limit: int = 8000) -> str:
25
- """Smart truncation — keeps abstract + methods sections."""
 
 
 
 
26
  if len(text) <= limit:
27
  return text
28
- lower = text.lower()
29
  methods_idx = lower.find('method')
30
- if methods_idx > 0 and methods_idx < len(text) - 1000:
31
  start = text[:3000]
32
  middle = text[methods_idx:methods_idx + 4000]
33
  return start + " [...] " + middle
34
  return text[:limit]
35
 
 
36
  # ── Engine initialization ─────────────────────────────────────────────────────
37
  _stat_engine = StatAuditEngine()
38
  _figure_engine = FigureForensicsEngine()
@@ -43,11 +892,12 @@ _novelty_engine = NoveltyScorer()
43
  _grim_engine = GrimTest()
44
  _sprite_engine = SpriteTest()
45
  _granularity_engine = GranularityAnalyzer()
46
- _pcurve_engine = PCurveAnalyzer()
47
- _effect_size_engine = EffectSizeValidator()
48
- _retraction_engine = RetractionChecker()
49
- _cartel_engine = CitationCartelDetector()
50
- _llm_engine = LLMDetector()
 
51
 
52
  # ── Request / Response Models ─────────────────────────────────────────────────
53
 
@@ -246,6 +1096,90 @@ class PCurveResponse(BaseModel):
246
  flags: list[PCurveFlagResponse]
247
  flags_count: int
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  # ── Endpoints ─────────────────────────────────────────────────────────────────
250
 
251
  @router.get("/status")
@@ -262,35 +1196,44 @@ def system_status():
262
  "sprite_test": True,
263
  "granularity": True,
264
  "pcurve": True,
 
 
 
 
265
  },
266
- "version": "1.4.0",
 
267
  }
268
 
 
269
  @router.post("/analyze/statistics", response_model=StatAuditResponse)
270
  def analyze_statistics(request: TextAnalysisRequest):
271
  """Analyze paper for statistical integrity issues."""
272
  try:
273
- result = _stat_engine.analyze(_truncate(request.text))
 
 
274
  return StatAuditResponse(
275
- risk_level=result.risk_level,
276
- risk_score=result.risk_score,
277
- summary=result.summary,
278
- flags=[
279
  FlagResponse(
280
- flag_type=f.flag_type,
281
- severity=f.severity,
282
- description=f.description,
283
- evidence=f.evidence,
284
- suggestion=f.suggestion,
285
  ) for f in result.flags
286
  ],
287
- p_values_found=result.p_values_found,
288
- sample_sizes_found=result.sample_sizes_found,
289
- flags_count=len(result.flags),
290
  )
291
  except Exception as e:
292
  raise HTTPException(status_code=500, detail=str(e))
293
 
 
294
  @router.post("/analyze/figures", response_model=FigureForensicsResponse)
295
  async def analyze_figures(file: UploadFile = File(...)):
296
  """Upload PDF and analyze figures for forensic anomalies."""
@@ -304,10 +1247,12 @@ async def analyze_figures(file: UploadFile = File(...)):
304
  tmp_path = tmp.name
305
  result = _figure_engine.analyze(tmp_path)
306
  return FigureForensicsResponse(
307
- figures_found=result.figures_found,
308
- flags=[ForensicFlagResponse(figures_involved=f.figures_involved)
309
- for f in result.flags],
310
- duplicate_pairs=result.duplicate_pairs,
 
 
311
  )
312
  except HTTPException:
313
  raise
@@ -317,252 +1262,256 @@ async def analyze_figures(file: UploadFile = File(...)):
317
  if tmp_path and os.path.exists(tmp_path):
318
  os.unlink(tmp_path)
319
 
 
320
  @router.post("/analyze/methodology", response_model=MethodologyResponse)
321
  def analyze_methodology(request: MethodologyRequest):
322
  """Analyze paper for methodology logic issues."""
323
  try:
324
  result = _method_engine.analyze(
325
- _truncate(request.text), request.abstract
 
326
  )
327
  return MethodologyResponse(
328
- flags=[
329
  MethodologyFlagResponse(
330
- claim=f.claim,
331
- issue=f.issue,
332
- suggestion=f.suggestion,
333
  ) for f in result.flags
334
  ],
335
- claims_found=result.claims_found,
336
- methods_found=result.methods_found,
337
- llm_assessment=result.llm_assessment,
338
- llm_available=result.llm_available,
339
  )
340
  except Exception as e:
341
  raise HTTPException(status_code=500, detail=str(e))
342
 
 
343
  @router.post("/analyze/citations", response_model=CitationResponse)
344
  def analyze_citations(request: CitationRequest):
345
  """Analyze citations for integrity issues."""
346
  try:
347
  result = _citation_engine.analyze(
348
- _truncate(request.text), request.author_name
 
349
  )
350
  return CitationResponse(
351
- total_citations=result.total_citations,
352
- self_citations=result.self_citations,
353
- self_citation_ratio=result.self_citation_ratio,
354
- unsupported_claims=result.unsupported_claims,
355
- flags=[
356
  CitationFlagResponse(
357
- flag_type=f.flag_type,
358
- severity=f.severity,
359
- description=f.description,
360
- evidence=f.evidence,
361
- suggestion=getattr(f, 'suggestion', ''),
362
  ) for f in result.flags
363
  ],
364
- risk_level=result.risk_level,
365
- risk_score=result.risk_score,
366
- summary=result.summary,
367
- flags_count=len(result.flags),
368
  )
369
  except Exception as e:
370
  raise HTTPException(status_code=500, detail=str(e))
371
 
 
372
  @router.post("/analyze/reproducibility", response_model=ReproducibilityResponse)
373
  def analyze_reproducibility(request: ReproducibilityRequest):
374
  """Scan paper for reproducibility indicators."""
375
  try:
376
- result = _repro_engine.analyze(_truncate(request.text))
 
 
377
  return ReproducibilityResponse(
378
- has_code_link=result.has_code_link,
379
- has_data_link=result.has_data_link,
380
- has_software_versions=result.has_software_versions,
381
- has_preregistration=result.has_preregistration,
382
- has_ethics_statement=result.has_ethics_statement,
383
- reproducibility_score=result.reproducibility_score,
384
- risk_level=result.risk_level,
385
- summary=result.summary,
386
- flags=[
387
  ReproducibilityFlagResponse(
388
- flag_type=f.flag_type,
389
- severity=f.severity,
390
- description=f.description,
391
- evidence=f.evidence,
392
- suggestion=getattr(f, 'suggestion', ''),
393
  ) for f in result.flags
394
  ],
395
- flags_count=len(result.flags),
396
  )
397
  except Exception as e:
398
  raise HTTPException(status_code=500, detail=str(e))
399
 
 
400
  @router.post("/analyze/novelty", response_model=NoveltyResponse)
401
  def analyze_novelty(request: NoveltyRequest):
402
  """Estimate paper novelty against existing literature."""
403
  try:
404
  result = _novelty_engine.analyze(
405
- _truncate(request.text, 4000), request.title
 
406
  )
407
  raw_flags = getattr(result, 'flags', []) or []
408
  return NoveltyResponse(
409
- novelty_score=result.novelty_score,
410
- novelty_level=result.novelty_level,
411
- risk_level=result.risk_level,
412
- risk_score=getattr(result, 'risk_score', result.novelty_score),
413
- summary=result.summary,
414
- flags=[
415
  NoveltyFlagResponse(
416
- flag_type=f.flag_type,
417
- severity=f.severity,
418
- description=f.description,
419
- evidence=f.evidence,
420
- suggestion=getattr(f, 'suggestion', ''),
421
  ) for f in raw_flags
422
  ],
423
- related_works_found=[
424
  RelatedWorkResponse(
425
- title=w.title,
426
- year=w.year,
427
- authors=w.authors,
428
- similarity_signal=w.similarity_signal,
429
  ) for w in result.related_works_found
430
  ],
431
- key_terms_extracted=result.key_terms_extracted,
432
- literature_accessible=result.literature_accessible,
433
- flags_count=len(raw_flags),
434
  )
435
  except Exception as e:
436
  raise HTTPException(status_code=500, detail=str(e))
437
 
 
438
  @router.post("/analyze/grim", response_model=GrimResponse)
439
  def analyze_grim(request: GrimRequest):
440
  """GRIM Test — detect mathematically impossible means."""
441
  try:
442
- result = _grim_engine.analyze(_truncate(request.text))
 
 
443
  return GrimResponse(
444
- impossible_means=result.impossible_means,
445
- possible_means=result.possible_means,
446
- grim_score=result.grim_score,
447
- risk_level=result.risk_level,
448
- summary=result.summary,
449
- flags=[
450
  GrimFlagResponse(
451
- flag_type=f.flag_type,
452
- severity=f.severity,
453
- description=f.description,
454
- evidence=f.evidence,
455
- suggestion=f.suggestion,
456
  ) for f in result.flags
457
  ],
458
- flags_count=result.flags_count,
459
  )
460
  except Exception as e:
461
  raise HTTPException(status_code=500, detail=str(e))
462
 
 
463
  @router.post("/analyze/sprite", response_model=SpriteResponse)
464
  def analyze_sprite(request: SpriteRequest):
465
  """SPRITE Test — detect impossible distributions."""
466
  try:
467
- result = _sprite_engine.analyze(_truncate(request.text))
 
 
468
  return SpriteResponse(
469
- impossible_combinations=result.impossible_combinations,
470
- possible_combinations=result.possible_combinations,
471
- sprite_score=result.sprite_score,
472
- risk_level=result.risk_level,
473
- summary=result.summary,
474
- flags=[
475
  SpriteFlagResponse(
476
- flag_type=f.flag_type,
477
- severity=f.severity,
478
- description=f.description,
479
- evidence=f.evidence,
480
- suggestion=f.suggestion,
481
  ) for f in result.flags
482
  ],
483
- flags_count=result.flags_count,
484
  )
485
  except Exception as e:
486
  raise HTTPException(status_code=500, detail=str(e))
487
 
 
488
  @router.post("/analyze/granularity", response_model=GranularityResponse)
489
  def analyze_granularity(request: GranularityRequest):
490
  """Granularity Analyzer — Benford Law + digit preference."""
491
  try:
492
- result = _granularity_engine.analyze(_truncate(request.text))
 
 
493
  return GranularityResponse(
494
- digit_preference_score=result.digit_preference_score,
495
- benford_score=result.benford_score,
496
- round_number_ratio=result.round_number_ratio,
497
- granularity_score=result.granularity_score,
498
- risk_level=result.risk_level,
499
- summary=result.summary,
500
- flags=[
501
  GranularityFlagResponse(
502
- flag_type=f.flag_type,
503
- severity=f.severity,
504
- description=f.description,
505
- evidence=f.evidence,
506
- suggestion=f.suggestion,
507
  ) for f in result.flags
508
  ],
509
- flags_count=result.flags_count,
510
  )
511
  except Exception as e:
512
  raise HTTPException(status_code=500, detail=str(e))
513
 
 
514
  @router.post("/analyze/pcurve", response_model=PCurveResponse)
515
  def analyze_pcurve(request: PCurveRequest):
516
  """P-Curve Analyzer — publication bias detector."""
517
  try:
518
- result = _pcurve_engine.analyze(_truncate(request.text))
 
 
519
  return PCurveResponse(
520
- p_values_found=result.p_values_found,
521
- significant_p=result.significant_p,
522
- right_skew_ratio=result.right_skew_ratio,
523
- clustering_score=result.clustering_score,
524
- pcurve_score=result.pcurve_score,
525
- risk_level=result.risk_level,
526
- summary=result.summary,
527
- flags=[
528
  PCurveFlagResponse(
529
- flag_type=f.flag_type,
530
- severity=f.severity,
531
- description=f.description,
532
- evidence=f.evidence,
533
- suggestion=f.suggestion,
534
  ) for f in result.flags
535
  ],
536
- flags_count=result.flags_count,
537
  )
538
  except Exception as e:
539
  raise HTTPException(status_code=500, detail=str(e))
540
 
541
- class EffectSizeRequest(BaseModel):
542
- text: str = Field(..., min_length=50)
543
-
544
- class EffectSizeFlagResponse(BaseModel):
545
- flag_type: str
546
- severity: str
547
- description: str
548
- evidence: str
549
- suggestion: str
550
-
551
- class EffectSizeResponse(BaseModel):
552
- effect_sizes_found: list
553
- power_estimates: list
554
- inflated_effects: list
555
- underpowered: list
556
- effect_score: float
557
- risk_level: str
558
- summary: str
559
- flags: list[EffectSizeFlagResponse]
560
- flags_count: int
561
 
562
  @router.post('/analyze/effect_size', response_model=EffectSizeResponse)
563
  def analyze_effect_size(request: EffectSizeRequest):
 
564
  try:
565
- result = _effect_size_engine.analyze(_truncate(request.text))
 
 
566
  return EffectSizeResponse(
567
  effect_sizes_found = result.effect_sizes_found,
568
  power_estimates = result.power_estimates,
@@ -578,8 +1527,7 @@ def analyze_effect_size(request: EffectSizeRequest):
578
  description = f.description,
579
  evidence = f.evidence,
580
  suggestion = f.suggestion,
581
- )
582
- for f in result.flags
583
  ],
584
  flags_count = result.flags_count,
585
  )
@@ -587,30 +1535,13 @@ def analyze_effect_size(request: EffectSizeRequest):
587
  raise HTTPException(status_code=500, detail=str(e))
588
 
589
 
590
- class RetractionRequest(BaseModel):
591
- text: str = Field(..., min_length=50)
592
-
593
- class RetractionFlagResponse(BaseModel):
594
- flag_type: str
595
- severity: str
596
- description: str
597
- evidence: str
598
- suggestion: str
599
-
600
- class RetractionResponse(BaseModel):
601
- dois_found: list
602
- retracted_found: list
603
- checked_count: int
604
- retraction_score: float
605
- risk_level: str
606
- summary: str
607
- flags: list[RetractionFlagResponse]
608
- flags_count: int
609
-
610
  @router.post('/analyze/retraction', response_model=RetractionResponse)
611
  def analyze_retraction(request: RetractionRequest):
 
612
  try:
613
- result = _retraction_engine.analyze(_truncate(request.text))
 
 
614
  return RetractionResponse(
615
  dois_found = result.dois_found,
616
  retracted_found = result.retracted_found,
@@ -625,8 +1556,7 @@ def analyze_retraction(request: RetractionRequest):
625
  description = f.description,
626
  evidence = f.evidence,
627
  suggestion = f.suggestion,
628
- )
629
- for f in result.flags
630
  ],
631
  flags_count = result.flags_count,
632
  )
@@ -634,31 +1564,13 @@ def analyze_retraction(request: RetractionRequest):
634
  raise HTTPException(status_code=500, detail=str(e))
635
 
636
 
637
- class CartelRequest(BaseModel):
638
- text: str = Field(..., min_length=50)
639
-
640
- class CartelFlagResponse(BaseModel):
641
- flag_type: str
642
- severity: str
643
- description: str
644
- evidence: str
645
- suggestion: str
646
-
647
- class CartelResponse(BaseModel):
648
- authors_found: list
649
- citation_network: dict
650
- cartel_score: float
651
- self_citation_ratio: float
652
- network_diversity: float
653
- risk_level: str
654
- summary: str
655
- flags: list[CartelFlagResponse]
656
- flags_count: int
657
-
658
  @router.post('/analyze/cartel', response_model=CartelResponse)
659
  def analyze_cartel(request: CartelRequest):
 
660
  try:
661
- result = _cartel_engine.analyze(_truncate(request.text))
 
 
662
  return CartelResponse(
663
  authors_found = result.authors_found,
664
  citation_network = result.citation_network,
@@ -674,8 +1586,7 @@ def analyze_cartel(request: CartelRequest):
674
  description = f.description,
675
  evidence = f.evidence,
676
  suggestion = f.suggestion,
677
- )
678
- for f in result.flags
679
  ],
680
  flags_count = result.flags_count,
681
  )
@@ -683,31 +1594,13 @@ def analyze_cartel(request: CartelRequest):
683
  raise HTTPException(status_code=500, detail=str(e))
684
 
685
 
686
- class LLMRequest(BaseModel):
687
- text: str = Field(..., min_length=50)
688
-
689
- class LLMFlagResponse(BaseModel):
690
- flag_type: str
691
- severity: str
692
- description: str
693
- evidence: str
694
- suggestion: str
695
-
696
- class LLMResponse(BaseModel):
697
- burstiness_score: float
698
- vocabulary_diversity: float
699
- sentence_uniformity: float
700
- llm_phrase_count: int
701
- llm_score: float
702
- risk_level: str
703
- summary: str
704
- flags: list[LLMFlagResponse]
705
- flags_count: int
706
-
707
  @router.post('/analyze/llm', response_model=LLMResponse)
708
  def analyze_llm(request: LLMRequest):
 
709
  try:
710
- result = _llm_engine.analyze(_truncate(request.text))
 
 
711
  return LLMResponse(
712
  burstiness_score = result.burstiness_score,
713
  vocabulary_diversity = result.vocabulary_diversity,
@@ -723,10 +1616,9 @@ def analyze_llm(request: LLMRequest):
723
  description = f.description,
724
  evidence = f.evidence,
725
  suggestion = f.suggestion,
726
- )
727
- for f in result.flags
728
  ],
729
  flags_count = result.flags_count,
730
  )
731
  except Exception as e:
732
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ # # src/scipeerai/api/routes.py
2
+ # import os
3
+ # import tempfile
4
+ # from fastapi import APIRouter, HTTPException, UploadFile, File
5
+ # from pydantic import BaseModel, Field
6
+ # from src.scipeerai.modules.reproducibility_scanner import ReproducibilityScanner
7
+ # from src.scipeerai.modules.stat_audit import StatAuditEngine
8
+ # from src.scipeerai.modules.figure_forensics import FigureForensicsEngine
9
+ # from src.scipeerai.modules.methodology_checker import MethodologyChecker
10
+ # from src.scipeerai.modules.citation_analyzer import CitationAnalyzer
11
+ # from src.scipeerai.modules.novelty_scorer import NoveltyScorer
12
+ # from src.scipeerai.modules.grim_test import GrimTest
13
+ # from src.scipeerai.modules.sprite_test import SpriteTest
14
+ # from src.scipeerai.modules.granularity_analyzer import GranularityAnalyzer
15
+ # from src.scipeerai.modules.pcurve_analyzer import PCurveAnalyzer
16
+ # from src.scipeerai.modules.effect_size_validator import EffectSizeValidator
17
+ # from src.scipeerai.modules.retraction_checker import RetractionChecker
18
+ # from src.scipeerai.modules.citation_cartel import CitationCartelDetector
19
+ # from src.scipeerai.modules.llm_detector import LLMDetector
20
+
21
+ # router = APIRouter(prefix="/api/v1", tags=["Analysis"])
22
+
23
+ # # ── Smart text truncation — handles long papers ───────────────────────────────
24
+ # def _truncate(text: str, limit: int = 8000) -> str:
25
+ # """Smart truncation — keeps abstract + methods sections."""
26
+ # if len(text) <= limit:
27
+ # return text
28
+ # lower = text.lower()
29
+ # methods_idx = lower.find('method')
30
+ # if methods_idx > 0 and methods_idx < len(text) - 1000:
31
+ # start = text[:3000]
32
+ # middle = text[methods_idx:methods_idx + 4000]
33
+ # return start + " [...] " + middle
34
+ # return text[:limit]
35
+
36
+ # # ── Engine initialization ─────────────────────────────────────────────────────
37
+ # _stat_engine = StatAuditEngine()
38
+ # _figure_engine = FigureForensicsEngine()
39
+ # _method_engine = MethodologyChecker()
40
+ # _citation_engine = CitationAnalyzer()
41
+ # _repro_engine = ReproducibilityScanner()
42
+ # _novelty_engine = NoveltyScorer()
43
+ # _grim_engine = GrimTest()
44
+ # _sprite_engine = SpriteTest()
45
+ # _granularity_engine = GranularityAnalyzer()
46
+ # _pcurve_engine = PCurveAnalyzer()
47
+ # _effect_size_engine = EffectSizeValidator()
48
+ # _retraction_engine = RetractionChecker()
49
+ # _cartel_engine = CitationCartelDetector()
50
+ # _llm_engine = LLMDetector()
51
+
52
+ # # ── Request / Response Models ─────────────────────────────────────────────────
53
+
54
+ # class TextAnalysisRequest(BaseModel):
55
+ # text: str = Field(..., min_length=50, description="Paper text to analyze")
56
+
57
+ # class FlagResponse(BaseModel):
58
+ # flag_type: str
59
+ # severity: str
60
+ # description: str
61
+ # evidence: str
62
+ # suggestion: str = ""
63
+
64
+ # class ForensicFlagResponse(BaseModel):
65
+ # figures_involved: list
66
+
67
+ # class StatAuditResponse(BaseModel):
68
+ # risk_level: str
69
+ # risk_score: float
70
+ # summary: str
71
+ # flags: list[FlagResponse]
72
+ # p_values_found: list[float]
73
+ # sample_sizes_found: list[int]
74
+ # flags_count: int
75
+
76
+ # class FigureForensicsResponse(BaseModel):
77
+ # figures_found: int
78
+ # flags: list[ForensicFlagResponse]
79
+ # duplicate_pairs: list
80
+
81
+ # class MethodologyRequest(BaseModel):
82
+ # text: str = Field(..., min_length=50)
83
+ # abstract: str = Field("")
84
+
85
+ # class MethodologyFlagResponse(BaseModel):
86
+ # claim: str
87
+ # issue: str
88
+ # suggestion: str
89
+
90
+ # class MethodologyResponse(BaseModel):
91
+ # flags: list[MethodologyFlagResponse]
92
+ # claims_found: list[str]
93
+ # methods_found: list[str]
94
+ # llm_assessment: str
95
+ # llm_available: bool
96
+
97
+ # class CitationRequest(BaseModel):
98
+ # text: str = Field(..., min_length=50)
99
+ # author_name: str = Field("")
100
+
101
+ # class CitationFlagResponse(BaseModel):
102
+ # flag_type: str
103
+ # severity: str
104
+ # description: str
105
+ # evidence: str
106
+ # suggestion: str = ""
107
+
108
+ # class CitationResponse(BaseModel):
109
+ # total_citations: int
110
+ # self_citations: int
111
+ # self_citation_ratio: float
112
+ # unsupported_claims: int
113
+ # flags: list[CitationFlagResponse]
114
+ # risk_level: str
115
+ # risk_score: float
116
+ # summary: str
117
+ # flags_count: int
118
+
119
+ # class ReproducibilityRequest(BaseModel):
120
+ # text: str = Field(..., min_length=50)
121
+
122
+ # class ReproducibilityFlagResponse(BaseModel):
123
+ # flag_type: str
124
+ # severity: str
125
+ # description: str
126
+ # evidence: str
127
+ # suggestion: str = ""
128
+
129
+ # class ReproducibilityResponse(BaseModel):
130
+ # has_code_link: bool
131
+ # has_data_link: bool
132
+ # has_software_versions: bool
133
+ # has_preregistration: bool
134
+ # has_ethics_statement: bool
135
+ # reproducibility_score: float
136
+ # risk_level: str
137
+ # summary: str
138
+ # flags: list[ReproducibilityFlagResponse]
139
+ # flags_count: int
140
+
141
+ # class NoveltyRequest(BaseModel):
142
+ # text: str = Field(..., min_length=50)
143
+ # title: str = Field("")
144
+
145
+ # class NoveltyFlagResponse(BaseModel):
146
+ # flag_type: str
147
+ # severity: str
148
+ # description: str
149
+ # evidence: str
150
+ # suggestion: str = ""
151
+
152
+ # class RelatedWorkResponse(BaseModel):
153
+ # title: str
154
+ # year: int
155
+ # authors: list
156
+ # similarity_signal: str
157
+
158
+ # class NoveltyResponse(BaseModel):
159
+ # novelty_score: float
160
+ # novelty_level: str
161
+ # risk_level: str
162
+ # risk_score: float
163
+ # summary: str
164
+ # flags: list[NoveltyFlagResponse]
165
+ # related_works_found: list[RelatedWorkResponse]
166
+ # key_terms_extracted: list[str]
167
+ # literature_accessible: bool
168
+ # flags_count: int
169
+
170
+ # class GrimRequest(BaseModel):
171
+ # text: str = Field(..., min_length=50)
172
+
173
+ # class GrimFlagResponse(BaseModel):
174
+ # flag_type: str
175
+ # severity: str
176
+ # description: str
177
+ # evidence: str
178
+ # suggestion: str
179
+
180
+ # class GrimResponse(BaseModel):
181
+ # impossible_means: list
182
+ # possible_means: list
183
+ # grim_score: float
184
+ # risk_level: str
185
+ # summary: str
186
+ # flags: list[GrimFlagResponse]
187
+ # flags_count: int
188
+
189
+ # class SpriteRequest(BaseModel):
190
+ # text: str = Field(..., min_length=50)
191
+
192
+ # class SpriteFlagResponse(BaseModel):
193
+ # flag_type: str
194
+ # severity: str
195
+ # description: str
196
+ # evidence: str
197
+ # suggestion: str
198
+
199
+ # class SpriteResponse(BaseModel):
200
+ # impossible_combinations: list
201
+ # possible_combinations: list
202
+ # sprite_score: float
203
+ # risk_level: str
204
+ # summary: str
205
+ # flags: list[SpriteFlagResponse]
206
+ # flags_count: int
207
+
208
+ # class GranularityRequest(BaseModel):
209
+ # text: str = Field(..., min_length=50)
210
+
211
+ # class GranularityFlagResponse(BaseModel):
212
+ # flag_type: str
213
+ # severity: str
214
+ # description: str
215
+ # evidence: str
216
+ # suggestion: str
217
+
218
+ # class GranularityResponse(BaseModel):
219
+ # digit_preference_score: float
220
+ # benford_score: float
221
+ # round_number_ratio: float
222
+ # granularity_score: float
223
+ # risk_level: str
224
+ # summary: str
225
+ # flags: list[GranularityFlagResponse]
226
+ # flags_count: int
227
+
228
+ # class PCurveRequest(BaseModel):
229
+ # text: str = Field(..., min_length=50)
230
+
231
+ # class PCurveFlagResponse(BaseModel):
232
+ # flag_type: str
233
+ # severity: str
234
+ # description: str
235
+ # evidence: str
236
+ # suggestion: str
237
+
238
+ # class PCurveResponse(BaseModel):
239
+ # p_values_found: list
240
+ # significant_p: list
241
+ # right_skew_ratio: float
242
+ # clustering_score: float
243
+ # pcurve_score: float
244
+ # risk_level: str
245
+ # summary: str
246
+ # flags: list[PCurveFlagResponse]
247
+ # flags_count: int
248
+
249
+ # # ── Endpoints ─────────────────────────────────────────────────────────────────
250
+
251
+ # @router.get("/status")
252
+ # def system_status():
253
+ # return {
254
+ # "modules_ready": {
255
+ # "stat_audit": True,
256
+ # "figure_forensics": True,
257
+ # "methodology_checker": True,
258
+ # "citation_analyzer": True,
259
+ # "reproducibility": True,
260
+ # "novelty_scorer": True,
261
+ # "grim_test": True,
262
+ # "sprite_test": True,
263
+ # "granularity": True,
264
+ # "pcurve": True,
265
+ # },
266
+ # "version": "1.4.0",
267
+ # }
268
+
269
+ # @router.post("/analyze/statistics", response_model=StatAuditResponse)
270
+ # def analyze_statistics(request: TextAnalysisRequest):
271
+ # """Analyze paper for statistical integrity issues."""
272
+ # try:
273
+ # result = _stat_engine.analyze(_truncate(request.text))
274
+ # return StatAuditResponse(
275
+ # risk_level=result.risk_level,
276
+ # risk_score=result.risk_score,
277
+ # summary=result.summary,
278
+ # flags=[
279
+ # FlagResponse(
280
+ # flag_type=f.flag_type,
281
+ # severity=f.severity,
282
+ # description=f.description,
283
+ # evidence=f.evidence,
284
+ # suggestion=f.suggestion,
285
+ # ) for f in result.flags
286
+ # ],
287
+ # p_values_found=result.p_values_found,
288
+ # sample_sizes_found=result.sample_sizes_found,
289
+ # flags_count=len(result.flags),
290
+ # )
291
+ # except Exception as e:
292
+ # raise HTTPException(status_code=500, detail=str(e))
293
+
294
+ # @router.post("/analyze/figures", response_model=FigureForensicsResponse)
295
+ # async def analyze_figures(file: UploadFile = File(...)):
296
+ # """Upload PDF and analyze figures for forensic anomalies."""
297
+ # if not file.filename.endswith(".pdf"):
298
+ # raise HTTPException(status_code=400, detail="Only PDF files accepted.")
299
+ # tmp_path = None
300
+ # try:
301
+ # contents = await file.read()
302
+ # with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
303
+ # tmp.write(contents)
304
+ # tmp_path = tmp.name
305
+ # result = _figure_engine.analyze(tmp_path)
306
+ # return FigureForensicsResponse(
307
+ # figures_found=result.figures_found,
308
+ # flags=[ForensicFlagResponse(figures_involved=f.figures_involved)
309
+ # for f in result.flags],
310
+ # duplicate_pairs=result.duplicate_pairs,
311
+ # )
312
+ # except HTTPException:
313
+ # raise
314
+ # except Exception as e:
315
+ # raise HTTPException(status_code=500, detail=str(e))
316
+ # finally:
317
+ # if tmp_path and os.path.exists(tmp_path):
318
+ # os.unlink(tmp_path)
319
+
320
+ # @router.post("/analyze/methodology", response_model=MethodologyResponse)
321
+ # def analyze_methodology(request: MethodologyRequest):
322
+ # """Analyze paper for methodology logic issues."""
323
+ # try:
324
+ # result = _method_engine.analyze(
325
+ # _truncate(request.text), request.abstract
326
+ # )
327
+ # return MethodologyResponse(
328
+ # flags=[
329
+ # MethodologyFlagResponse(
330
+ # claim=f.claim,
331
+ # issue=f.issue,
332
+ # suggestion=f.suggestion,
333
+ # ) for f in result.flags
334
+ # ],
335
+ # claims_found=result.claims_found,
336
+ # methods_found=result.methods_found,
337
+ # llm_assessment=result.llm_assessment,
338
+ # llm_available=result.llm_available,
339
+ # )
340
+ # except Exception as e:
341
+ # raise HTTPException(status_code=500, detail=str(e))
342
+
343
+ # @router.post("/analyze/citations", response_model=CitationResponse)
344
+ # def analyze_citations(request: CitationRequest):
345
+ # """Analyze citations for integrity issues."""
346
+ # try:
347
+ # result = _citation_engine.analyze(
348
+ # _truncate(request.text), request.author_name
349
+ # )
350
+ # return CitationResponse(
351
+ # total_citations=result.total_citations,
352
+ # self_citations=result.self_citations,
353
+ # self_citation_ratio=result.self_citation_ratio,
354
+ # unsupported_claims=result.unsupported_claims,
355
+ # flags=[
356
+ # CitationFlagResponse(
357
+ # flag_type=f.flag_type,
358
+ # severity=f.severity,
359
+ # description=f.description,
360
+ # evidence=f.evidence,
361
+ # suggestion=getattr(f, 'suggestion', ''),
362
+ # ) for f in result.flags
363
+ # ],
364
+ # risk_level=result.risk_level,
365
+ # risk_score=result.risk_score,
366
+ # summary=result.summary,
367
+ # flags_count=len(result.flags),
368
+ # )
369
+ # except Exception as e:
370
+ # raise HTTPException(status_code=500, detail=str(e))
371
+
372
+ # @router.post("/analyze/reproducibility", response_model=ReproducibilityResponse)
373
+ # def analyze_reproducibility(request: ReproducibilityRequest):
374
+ # """Scan paper for reproducibility indicators."""
375
+ # try:
376
+ # result = _repro_engine.analyze(_truncate(request.text))
377
+ # return ReproducibilityResponse(
378
+ # has_code_link=result.has_code_link,
379
+ # has_data_link=result.has_data_link,
380
+ # has_software_versions=result.has_software_versions,
381
+ # has_preregistration=result.has_preregistration,
382
+ # has_ethics_statement=result.has_ethics_statement,
383
+ # reproducibility_score=result.reproducibility_score,
384
+ # risk_level=result.risk_level,
385
+ # summary=result.summary,
386
+ # flags=[
387
+ # ReproducibilityFlagResponse(
388
+ # flag_type=f.flag_type,
389
+ # severity=f.severity,
390
+ # description=f.description,
391
+ # evidence=f.evidence,
392
+ # suggestion=getattr(f, 'suggestion', ''),
393
+ # ) for f in result.flags
394
+ # ],
395
+ # flags_count=len(result.flags),
396
+ # )
397
+ # except Exception as e:
398
+ # raise HTTPException(status_code=500, detail=str(e))
399
+
400
+ # @router.post("/analyze/novelty", response_model=NoveltyResponse)
401
+ # def analyze_novelty(request: NoveltyRequest):
402
+ # """Estimate paper novelty against existing literature."""
403
+ # try:
404
+ # result = _novelty_engine.analyze(
405
+ # _truncate(request.text, 4000), request.title
406
+ # )
407
+ # raw_flags = getattr(result, 'flags', []) or []
408
+ # return NoveltyResponse(
409
+ # novelty_score=result.novelty_score,
410
+ # novelty_level=result.novelty_level,
411
+ # risk_level=result.risk_level,
412
+ # risk_score=getattr(result, 'risk_score', result.novelty_score),
413
+ # summary=result.summary,
414
+ # flags=[
415
+ # NoveltyFlagResponse(
416
+ # flag_type=f.flag_type,
417
+ # severity=f.severity,
418
+ # description=f.description,
419
+ # evidence=f.evidence,
420
+ # suggestion=getattr(f, 'suggestion', ''),
421
+ # ) for f in raw_flags
422
+ # ],
423
+ # related_works_found=[
424
+ # RelatedWorkResponse(
425
+ # title=w.title,
426
+ # year=w.year,
427
+ # authors=w.authors,
428
+ # similarity_signal=w.similarity_signal,
429
+ # ) for w in result.related_works_found
430
+ # ],
431
+ # key_terms_extracted=result.key_terms_extracted,
432
+ # literature_accessible=result.literature_accessible,
433
+ # flags_count=len(raw_flags),
434
+ # )
435
+ # except Exception as e:
436
+ # raise HTTPException(status_code=500, detail=str(e))
437
+
438
+ # @router.post("/analyze/grim", response_model=GrimResponse)
439
+ # def analyze_grim(request: GrimRequest):
440
+ # """GRIM Test — detect mathematically impossible means."""
441
+ # try:
442
+ # result = _grim_engine.analyze(_truncate(request.text))
443
+ # return GrimResponse(
444
+ # impossible_means=result.impossible_means,
445
+ # possible_means=result.possible_means,
446
+ # grim_score=result.grim_score,
447
+ # risk_level=result.risk_level,
448
+ # summary=result.summary,
449
+ # flags=[
450
+ # GrimFlagResponse(
451
+ # flag_type=f.flag_type,
452
+ # severity=f.severity,
453
+ # description=f.description,
454
+ # evidence=f.evidence,
455
+ # suggestion=f.suggestion,
456
+ # ) for f in result.flags
457
+ # ],
458
+ # flags_count=result.flags_count,
459
+ # )
460
+ # except Exception as e:
461
+ # raise HTTPException(status_code=500, detail=str(e))
462
+
463
+ # @router.post("/analyze/sprite", response_model=SpriteResponse)
464
+ # def analyze_sprite(request: SpriteRequest):
465
+ # """SPRITE Test — detect impossible distributions."""
466
+ # try:
467
+ # result = _sprite_engine.analyze(_truncate(request.text))
468
+ # return SpriteResponse(
469
+ # impossible_combinations=result.impossible_combinations,
470
+ # possible_combinations=result.possible_combinations,
471
+ # sprite_score=result.sprite_score,
472
+ # risk_level=result.risk_level,
473
+ # summary=result.summary,
474
+ # flags=[
475
+ # SpriteFlagResponse(
476
+ # flag_type=f.flag_type,
477
+ # severity=f.severity,
478
+ # description=f.description,
479
+ # evidence=f.evidence,
480
+ # suggestion=f.suggestion,
481
+ # ) for f in result.flags
482
+ # ],
483
+ # flags_count=result.flags_count,
484
+ # )
485
+ # except Exception as e:
486
+ # raise HTTPException(status_code=500, detail=str(e))
487
+
488
+ # @router.post("/analyze/granularity", response_model=GranularityResponse)
489
+ # def analyze_granularity(request: GranularityRequest):
490
+ # """Granularity Analyzer — Benford Law + digit preference."""
491
+ # try:
492
+ # result = _granularity_engine.analyze(_truncate(request.text))
493
+ # return GranularityResponse(
494
+ # digit_preference_score=result.digit_preference_score,
495
+ # benford_score=result.benford_score,
496
+ # round_number_ratio=result.round_number_ratio,
497
+ # granularity_score=result.granularity_score,
498
+ # risk_level=result.risk_level,
499
+ # summary=result.summary,
500
+ # flags=[
501
+ # GranularityFlagResponse(
502
+ # flag_type=f.flag_type,
503
+ # severity=f.severity,
504
+ # description=f.description,
505
+ # evidence=f.evidence,
506
+ # suggestion=f.suggestion,
507
+ # ) for f in result.flags
508
+ # ],
509
+ # flags_count=result.flags_count,
510
+ # )
511
+ # except Exception as e:
512
+ # raise HTTPException(status_code=500, detail=str(e))
513
+
514
+ # @router.post("/analyze/pcurve", response_model=PCurveResponse)
515
+ # def analyze_pcurve(request: PCurveRequest):
516
+ # """P-Curve Analyzer — publication bias detector."""
517
+ # try:
518
+ # result = _pcurve_engine.analyze(_truncate(request.text))
519
+ # return PCurveResponse(
520
+ # p_values_found=result.p_values_found,
521
+ # significant_p=result.significant_p,
522
+ # right_skew_ratio=result.right_skew_ratio,
523
+ # clustering_score=result.clustering_score,
524
+ # pcurve_score=result.pcurve_score,
525
+ # risk_level=result.risk_level,
526
+ # summary=result.summary,
527
+ # flags=[
528
+ # PCurveFlagResponse(
529
+ # flag_type=f.flag_type,
530
+ # severity=f.severity,
531
+ # description=f.description,
532
+ # evidence=f.evidence,
533
+ # suggestion=f.suggestion,
534
+ # ) for f in result.flags
535
+ # ],
536
+ # flags_count=result.flags_count,
537
+ # )
538
+ # except Exception as e:
539
+ # raise HTTPException(status_code=500, detail=str(e))
540
+
541
+ # class EffectSizeRequest(BaseModel):
542
+ # text: str = Field(..., min_length=50)
543
+
544
+ # class EffectSizeFlagResponse(BaseModel):
545
+ # flag_type: str
546
+ # severity: str
547
+ # description: str
548
+ # evidence: str
549
+ # suggestion: str
550
+
551
+ # class EffectSizeResponse(BaseModel):
552
+ # effect_sizes_found: list
553
+ # power_estimates: list
554
+ # inflated_effects: list
555
+ # underpowered: list
556
+ # effect_score: float
557
+ # risk_level: str
558
+ # summary: str
559
+ # flags: list[EffectSizeFlagResponse]
560
+ # flags_count: int
561
+
562
+ # @router.post('/analyze/effect_size', response_model=EffectSizeResponse)
563
+ # def analyze_effect_size(request: EffectSizeRequest):
564
+ # try:
565
+ # result = _effect_size_engine.analyze(_truncate(request.text))
566
+ # return EffectSizeResponse(
567
+ # effect_sizes_found = result.effect_sizes_found,
568
+ # power_estimates = result.power_estimates,
569
+ # inflated_effects = result.inflated_effects,
570
+ # underpowered = result.underpowered,
571
+ # effect_score = result.effect_score,
572
+ # risk_level = result.risk_level,
573
+ # summary = result.summary,
574
+ # flags = [
575
+ # EffectSizeFlagResponse(
576
+ # flag_type = f.flag_type,
577
+ # severity = f.severity,
578
+ # description = f.description,
579
+ # evidence = f.evidence,
580
+ # suggestion = f.suggestion,
581
+ # )
582
+ # for f in result.flags
583
+ # ],
584
+ # flags_count = result.flags_count,
585
+ # )
586
+ # except Exception as e:
587
+ # raise HTTPException(status_code=500, detail=str(e))
588
+
589
+
590
+ # class RetractionRequest(BaseModel):
591
+ # text: str = Field(..., min_length=50)
592
+
593
+ # class RetractionFlagResponse(BaseModel):
594
+ # flag_type: str
595
+ # severity: str
596
+ # description: str
597
+ # evidence: str
598
+ # suggestion: str
599
+
600
+ # class RetractionResponse(BaseModel):
601
+ # dois_found: list
602
+ # retracted_found: list
603
+ # checked_count: int
604
+ # retraction_score: float
605
+ # risk_level: str
606
+ # summary: str
607
+ # flags: list[RetractionFlagResponse]
608
+ # flags_count: int
609
+
610
+ # @router.post('/analyze/retraction', response_model=RetractionResponse)
611
+ # def analyze_retraction(request: RetractionRequest):
612
+ # try:
613
+ # result = _retraction_engine.analyze(_truncate(request.text))
614
+ # return RetractionResponse(
615
+ # dois_found = result.dois_found,
616
+ # retracted_found = result.retracted_found,
617
+ # checked_count = result.checked_count,
618
+ # retraction_score = result.retraction_score,
619
+ # risk_level = result.risk_level,
620
+ # summary = result.summary,
621
+ # flags = [
622
+ # RetractionFlagResponse(
623
+ # flag_type = f.flag_type,
624
+ # severity = f.severity,
625
+ # description = f.description,
626
+ # evidence = f.evidence,
627
+ # suggestion = f.suggestion,
628
+ # )
629
+ # for f in result.flags
630
+ # ],
631
+ # flags_count = result.flags_count,
632
+ # )
633
+ # except Exception as e:
634
+ # raise HTTPException(status_code=500, detail=str(e))
635
+
636
+
637
+ # class CartelRequest(BaseModel):
638
+ # text: str = Field(..., min_length=50)
639
+
640
+ # class CartelFlagResponse(BaseModel):
641
+ # flag_type: str
642
+ # severity: str
643
+ # description: str
644
+ # evidence: str
645
+ # suggestion: str
646
+
647
+ # class CartelResponse(BaseModel):
648
+ # authors_found: list
649
+ # citation_network: dict
650
+ # cartel_score: float
651
+ # self_citation_ratio: float
652
+ # network_diversity: float
653
+ # risk_level: str
654
+ # summary: str
655
+ # flags: list[CartelFlagResponse]
656
+ # flags_count: int
657
+
658
+ # @router.post('/analyze/cartel', response_model=CartelResponse)
659
+ # def analyze_cartel(request: CartelRequest):
660
+ # try:
661
+ # result = _cartel_engine.analyze(_truncate(request.text))
662
+ # return CartelResponse(
663
+ # authors_found = result.authors_found,
664
+ # citation_network = result.citation_network,
665
+ # cartel_score = result.cartel_score,
666
+ # self_citation_ratio = result.self_citation_ratio,
667
+ # network_diversity = result.network_diversity,
668
+ # risk_level = result.risk_level,
669
+ # summary = result.summary,
670
+ # flags = [
671
+ # CartelFlagResponse(
672
+ # flag_type = f.flag_type,
673
+ # severity = f.severity,
674
+ # description = f.description,
675
+ # evidence = f.evidence,
676
+ # suggestion = f.suggestion,
677
+ # )
678
+ # for f in result.flags
679
+ # ],
680
+ # flags_count = result.flags_count,
681
+ # )
682
+ # except Exception as e:
683
+ # raise HTTPException(status_code=500, detail=str(e))
684
+
685
+
686
+ # class LLMRequest(BaseModel):
687
+ # text: str = Field(..., min_length=50)
688
+
689
+ # class LLMFlagResponse(BaseModel):
690
+ # flag_type: str
691
+ # severity: str
692
+ # description: str
693
+ # evidence: str
694
+ # suggestion: str
695
+
696
+ # class LLMResponse(BaseModel):
697
+ # burstiness_score: float
698
+ # vocabulary_diversity: float
699
+ # sentence_uniformity: float
700
+ # llm_phrase_count: int
701
+ # llm_score: float
702
+ # risk_level: str
703
+ # summary: str
704
+ # flags: list[LLMFlagResponse]
705
+ # flags_count: int
706
+
707
+ # @router.post('/analyze/llm', response_model=LLMResponse)
708
+ # def analyze_llm(request: LLMRequest):
709
+ # try:
710
+ # result = _llm_engine.analyze(_truncate(request.text))
711
+ # return LLMResponse(
712
+ # burstiness_score = result.burstiness_score,
713
+ # vocabulary_diversity = result.vocabulary_diversity,
714
+ # sentence_uniformity = result.sentence_uniformity,
715
+ # llm_phrase_count = result.llm_phrase_count,
716
+ # llm_score = result.llm_score,
717
+ # risk_level = result.risk_level,
718
+ # summary = result.summary,
719
+ # flags = [
720
+ # LLMFlagResponse(
721
+ # flag_type = f.flag_type,
722
+ # severity = f.severity,
723
+ # description = f.description,
724
+ # evidence = f.evidence,
725
+ # suggestion = f.suggestion,
726
+ # )
727
+ # for f in result.flags
728
+ # ],
729
+ # flags_count = result.flags_count,
730
+ # )
731
+ # except Exception as e:
732
+ # raise HTTPException(status_code=500, detail=str(e))
733
+
734
+
735
  import os
736
  import tempfile
737
  from fastapi import APIRouter, HTTPException, UploadFile, File
 
753
 
754
  router = APIRouter(prefix="/api/v1", tags=["Analysis"])
755
 
756
+ # ── Section-aware text extractionreplaces flat truncation ──────────────────
757
+
758
+ _SECTION_MARKERS = [
759
+ "abstract", "introduction", "background", "related work",
760
+ "methods", "methodology", "materials and methods",
761
+ "experimental", "experiments", "procedures",
762
+ "results", "findings", "data analysis",
763
+ "discussion", "conclusion", "conclusions",
764
+ "references", "bibliography", "acknowledgments",
765
+ "supplementary", "appendix",
766
+ ]
767
+
768
+ _MODULE_SECTIONS = {
769
+ "statistics": ["abstract", "results", "findings",
770
+ "data analysis", "methods", "methodology"],
771
+ "methodology": ["abstract", "introduction", "methods",
772
+ "methodology", "materials and methods",
773
+ "experimental", "conclusion", "conclusions"],
774
+ "citations": ["introduction", "background",
775
+ "related work", "references", "bibliography"],
776
+ "reproducibility": ["methods", "methodology",
777
+ "materials and methods", "experimental",
778
+ "procedures", "acknowledgments"],
779
+ "novelty": ["abstract", "introduction",
780
+ "background", "related work"],
781
+ "grim": ["results", "findings", "methods",
782
+ "methodology", "data analysis"],
783
+ "sprite": ["results", "findings", "methods",
784
+ "methodology", "data analysis"],
785
+ "granularity": ["results", "findings",
786
+ "methods", "data analysis"],
787
+ "pcurve": ["abstract", "results",
788
+ "findings", "data analysis"],
789
+ "effect_size": ["results", "findings",
790
+ "methods", "discussion"],
791
+ "retraction": ["references", "bibliography", "introduction"],
792
+ "cartel": ["references", "bibliography",
793
+ "introduction", "acknowledgments"],
794
+ "llm": ["abstract", "introduction", "methods",
795
+ "results", "discussion"],
796
+ }
797
+
798
+
799
+ def _extract_sections(text: str) -> dict:
800
+ """
801
+ Split plain academic text into named sections.
802
+ Looks for short lines matching known heading names.
803
+ Returns dict of section_name -> section_text.
804
+ """
805
+ text_lower = text.lower()
806
+ positions = []
807
+
808
+ for marker in _SECTION_MARKERS:
809
+ search_from = 0
810
+ while True:
811
+ idx = text_lower.find(marker, search_from)
812
+ if idx == -1:
813
+ break
814
+ line_start = text.rfind('\n', 0, idx) + 1
815
+ line_end = text.find('\n', idx)
816
+ if line_end == -1:
817
+ line_end = len(text)
818
+ line_content = text[line_start:line_end].strip()
819
+ if len(line_content) <= 60:
820
+ positions.append((idx, marker))
821
+ break
822
+ search_from = idx + 1
823
+
824
+ if not positions:
825
+ return {}
826
+
827
+ positions.sort(key=lambda x: x[0])
828
+
829
+ deduped = [positions[0]]
830
+ for pos in positions[1:]:
831
+ if pos[0] - deduped[-1][0] > 50:
832
+ deduped.append(pos)
833
+
834
+ sections = {}
835
+ for i, (start, name) in enumerate(deduped):
836
+ end = deduped[i + 1][0] if i + 1 < len(deduped) else len(text)
837
+ sections[name] = text[start:end].strip()
838
+
839
+ return sections
840
+
841
+
842
+ def _smart_text(text: str, module: str,
843
+ per_section_limit: int = 2500) -> str:
844
+ """
845
+ Route paper text to the sections each module actually needs.
846
+
847
+ Statistics module needs Results + Methods.
848
+ Citations module needs References + Introduction.
849
+ LLM detector needs the whole paper spread evenly.
850
+ ...and so on.
851
+
852
+ Falls back to flat truncation when no section headers found.
853
+ """
854
+ sections = _extract_sections(text)
855
+ target_keys = _MODULE_SECTIONS.get(module, [])
856
+
857
+ if sections and target_keys:
858
+ parts = []
859
+ for key in target_keys:
860
+ if key in sections:
861
+ parts.append(sections[key][:per_section_limit])
862
+ if parts:
863
+ return "\n\n".join(parts)[:12000]
864
+
865
+ return _truncate(text)
866
+
867
+
868
  def _truncate(text: str, limit: int = 8000) -> str:
869
+ """
870
+ Fallback flat truncation.
871
+ Used when paper has no recognisable section headers.
872
+ Tries to keep Abstract + Methods at minimum.
873
+ """
874
  if len(text) <= limit:
875
  return text
876
+ lower = text.lower()
877
  methods_idx = lower.find('method')
878
+ if 0 < methods_idx < len(text) - 1000:
879
  start = text[:3000]
880
  middle = text[methods_idx:methods_idx + 4000]
881
  return start + " [...] " + middle
882
  return text[:limit]
883
 
884
+
885
  # ── Engine initialization ─────────────────────────────────────────────────────
886
  _stat_engine = StatAuditEngine()
887
  _figure_engine = FigureForensicsEngine()
 
892
  _grim_engine = GrimTest()
893
  _sprite_engine = SpriteTest()
894
  _granularity_engine = GranularityAnalyzer()
895
+ _pcurve_engine = PCurveAnalyzer()
896
+ _effect_size_engine = EffectSizeValidator()
897
+ _retraction_engine = RetractionChecker()
898
+ _cartel_engine = CitationCartelDetector()
899
+ _llm_engine = LLMDetector()
900
+
901
 
902
  # ── Request / Response Models ─────────────────────────────────────────────────
903
 
 
1096
  flags: list[PCurveFlagResponse]
1097
  flags_count: int
1098
 
1099
+ class EffectSizeRequest(BaseModel):
1100
+ text: str = Field(..., min_length=50)
1101
+
1102
+ class EffectSizeFlagResponse(BaseModel):
1103
+ flag_type: str
1104
+ severity: str
1105
+ description: str
1106
+ evidence: str
1107
+ suggestion: str
1108
+
1109
+ class EffectSizeResponse(BaseModel):
1110
+ effect_sizes_found: list
1111
+ power_estimates: list
1112
+ inflated_effects: list
1113
+ underpowered: list
1114
+ effect_score: float
1115
+ risk_level: str
1116
+ summary: str
1117
+ flags: list[EffectSizeFlagResponse]
1118
+ flags_count: int
1119
+
1120
+ class RetractionRequest(BaseModel):
1121
+ text: str = Field(..., min_length=50)
1122
+
1123
+ class RetractionFlagResponse(BaseModel):
1124
+ flag_type: str
1125
+ severity: str
1126
+ description: str
1127
+ evidence: str
1128
+ suggestion: str
1129
+
1130
+ class RetractionResponse(BaseModel):
1131
+ dois_found: list
1132
+ retracted_found: list
1133
+ checked_count: int
1134
+ retraction_score: float
1135
+ risk_level: str
1136
+ summary: str
1137
+ flags: list[RetractionFlagResponse]
1138
+ flags_count: int
1139
+
1140
+ class CartelRequest(BaseModel):
1141
+ text: str = Field(..., min_length=50)
1142
+
1143
+ class CartelFlagResponse(BaseModel):
1144
+ flag_type: str
1145
+ severity: str
1146
+ description: str
1147
+ evidence: str
1148
+ suggestion: str
1149
+
1150
+ class CartelResponse(BaseModel):
1151
+ authors_found: list
1152
+ citation_network: dict
1153
+ cartel_score: float
1154
+ self_citation_ratio: float
1155
+ network_diversity: float
1156
+ risk_level: str
1157
+ summary: str
1158
+ flags: list[CartelFlagResponse]
1159
+ flags_count: int
1160
+
1161
+ class LLMRequest(BaseModel):
1162
+ text: str = Field(..., min_length=50)
1163
+
1164
+ class LLMFlagResponse(BaseModel):
1165
+ flag_type: str
1166
+ severity: str
1167
+ description: str
1168
+ evidence: str
1169
+ suggestion: str
1170
+
1171
+ class LLMResponse(BaseModel):
1172
+ burstiness_score: float
1173
+ vocabulary_diversity: float
1174
+ sentence_uniformity: float
1175
+ llm_phrase_count: int
1176
+ llm_score: float
1177
+ risk_level: str
1178
+ summary: str
1179
+ flags: list[LLMFlagResponse]
1180
+ flags_count: int
1181
+
1182
+
1183
  # ── Endpoints ─────────────────────────────────────────────────────────────────
1184
 
1185
  @router.get("/status")
 
1196
  "sprite_test": True,
1197
  "granularity": True,
1198
  "pcurve": True,
1199
+ "effect_size": True,
1200
+ "retraction": True,
1201
+ "citation_cartel": True,
1202
+ "llm_detector": True,
1203
  },
1204
+ "version": "1.5.0",
1205
+ "text_extraction": "section-aware",
1206
  }
1207
 
1208
+
1209
  @router.post("/analyze/statistics", response_model=StatAuditResponse)
1210
  def analyze_statistics(request: TextAnalysisRequest):
1211
  """Analyze paper for statistical integrity issues."""
1212
  try:
1213
+ result = _stat_engine.analyze(
1214
+ _smart_text(request.text, "statistics")
1215
+ )
1216
  return StatAuditResponse(
1217
+ risk_level = result.risk_level,
1218
+ risk_score = result.risk_score,
1219
+ summary = result.summary,
1220
+ flags = [
1221
  FlagResponse(
1222
+ flag_type = f.flag_type,
1223
+ severity = f.severity,
1224
+ description = f.description,
1225
+ evidence = f.evidence,
1226
+ suggestion = f.suggestion,
1227
  ) for f in result.flags
1228
  ],
1229
+ p_values_found = result.p_values_found,
1230
+ sample_sizes_found = result.sample_sizes_found,
1231
+ flags_count = len(result.flags),
1232
  )
1233
  except Exception as e:
1234
  raise HTTPException(status_code=500, detail=str(e))
1235
 
1236
+
1237
  @router.post("/analyze/figures", response_model=FigureForensicsResponse)
1238
  async def analyze_figures(file: UploadFile = File(...)):
1239
  """Upload PDF and analyze figures for forensic anomalies."""
 
1247
  tmp_path = tmp.name
1248
  result = _figure_engine.analyze(tmp_path)
1249
  return FigureForensicsResponse(
1250
+ figures_found = result.figures_found,
1251
+ flags = [
1252
+ ForensicFlagResponse(figures_involved=f.figures_involved)
1253
+ for f in result.flags
1254
+ ],
1255
+ duplicate_pairs = result.duplicate_pairs,
1256
  )
1257
  except HTTPException:
1258
  raise
 
1262
  if tmp_path and os.path.exists(tmp_path):
1263
  os.unlink(tmp_path)
1264
 
1265
+
1266
  @router.post("/analyze/methodology", response_model=MethodologyResponse)
1267
  def analyze_methodology(request: MethodologyRequest):
1268
  """Analyze paper for methodology logic issues."""
1269
  try:
1270
  result = _method_engine.analyze(
1271
+ _smart_text(request.text, "methodology"),
1272
+ request.abstract,
1273
  )
1274
  return MethodologyResponse(
1275
+ flags = [
1276
  MethodologyFlagResponse(
1277
+ claim = f.claim,
1278
+ issue = f.issue,
1279
+ suggestion = f.suggestion,
1280
  ) for f in result.flags
1281
  ],
1282
+ claims_found = result.claims_found,
1283
+ methods_found = result.methods_found,
1284
+ llm_assessment = result.llm_assessment,
1285
+ llm_available = result.llm_available,
1286
  )
1287
  except Exception as e:
1288
  raise HTTPException(status_code=500, detail=str(e))
1289
 
1290
+
1291
  @router.post("/analyze/citations", response_model=CitationResponse)
1292
  def analyze_citations(request: CitationRequest):
1293
  """Analyze citations for integrity issues."""
1294
  try:
1295
  result = _citation_engine.analyze(
1296
+ _smart_text(request.text, "citations"),
1297
+ request.author_name,
1298
  )
1299
  return CitationResponse(
1300
+ total_citations = result.total_citations,
1301
+ self_citations = result.self_citations,
1302
+ self_citation_ratio = result.self_citation_ratio,
1303
+ unsupported_claims = result.unsupported_claims,
1304
+ flags = [
1305
  CitationFlagResponse(
1306
+ flag_type = f.flag_type,
1307
+ severity = f.severity,
1308
+ description = f.description,
1309
+ evidence = f.evidence,
1310
+ suggestion = getattr(f, 'suggestion', ''),
1311
  ) for f in result.flags
1312
  ],
1313
+ risk_level = result.risk_level,
1314
+ risk_score = result.risk_score,
1315
+ summary = result.summary,
1316
+ flags_count = len(result.flags),
1317
  )
1318
  except Exception as e:
1319
  raise HTTPException(status_code=500, detail=str(e))
1320
 
1321
+
1322
  @router.post("/analyze/reproducibility", response_model=ReproducibilityResponse)
1323
  def analyze_reproducibility(request: ReproducibilityRequest):
1324
  """Scan paper for reproducibility indicators."""
1325
  try:
1326
+ result = _repro_engine.analyze(
1327
+ _smart_text(request.text, "reproducibility")
1328
+ )
1329
  return ReproducibilityResponse(
1330
+ has_code_link = result.has_code_link,
1331
+ has_data_link = result.has_data_link,
1332
+ has_software_versions = result.has_software_versions,
1333
+ has_preregistration = result.has_preregistration,
1334
+ has_ethics_statement = result.has_ethics_statement,
1335
+ reproducibility_score = result.reproducibility_score,
1336
+ risk_level = result.risk_level,
1337
+ summary = result.summary,
1338
+ flags = [
1339
  ReproducibilityFlagResponse(
1340
+ flag_type = f.flag_type,
1341
+ severity = f.severity,
1342
+ description = f.description,
1343
+ evidence = f.evidence,
1344
+ suggestion = getattr(f, 'suggestion', ''),
1345
  ) for f in result.flags
1346
  ],
1347
+ flags_count = len(result.flags),
1348
  )
1349
  except Exception as e:
1350
  raise HTTPException(status_code=500, detail=str(e))
1351
 
1352
+
1353
  @router.post("/analyze/novelty", response_model=NoveltyResponse)
1354
  def analyze_novelty(request: NoveltyRequest):
1355
  """Estimate paper novelty against existing literature."""
1356
  try:
1357
  result = _novelty_engine.analyze(
1358
+ _smart_text(request.text, "novelty", per_section_limit=2000),
1359
+ request.title,
1360
  )
1361
  raw_flags = getattr(result, 'flags', []) or []
1362
  return NoveltyResponse(
1363
+ novelty_score = result.novelty_score,
1364
+ novelty_level = result.novelty_level,
1365
+ risk_level = result.risk_level,
1366
+ risk_score = getattr(result, 'risk_score', result.novelty_score),
1367
+ summary = result.summary,
1368
+ flags = [
1369
  NoveltyFlagResponse(
1370
+ flag_type = f.flag_type,
1371
+ severity = f.severity,
1372
+ description = f.description,
1373
+ evidence = f.evidence,
1374
+ suggestion = getattr(f, 'suggestion', ''),
1375
  ) for f in raw_flags
1376
  ],
1377
+ related_works_found = [
1378
  RelatedWorkResponse(
1379
+ title = w.title,
1380
+ year = w.year,
1381
+ authors = w.authors,
1382
+ similarity_signal = w.similarity_signal,
1383
  ) for w in result.related_works_found
1384
  ],
1385
+ key_terms_extracted = result.key_terms_extracted,
1386
+ literature_accessible = result.literature_accessible,
1387
+ flags_count = len(raw_flags),
1388
  )
1389
  except Exception as e:
1390
  raise HTTPException(status_code=500, detail=str(e))
1391
 
1392
+
1393
  @router.post("/analyze/grim", response_model=GrimResponse)
1394
  def analyze_grim(request: GrimRequest):
1395
  """GRIM Test — detect mathematically impossible means."""
1396
  try:
1397
+ result = _grim_engine.analyze(
1398
+ _smart_text(request.text, "grim")
1399
+ )
1400
  return GrimResponse(
1401
+ impossible_means = result.impossible_means,
1402
+ possible_means = result.possible_means,
1403
+ grim_score = result.grim_score,
1404
+ risk_level = result.risk_level,
1405
+ summary = result.summary,
1406
+ flags = [
1407
  GrimFlagResponse(
1408
+ flag_type = f.flag_type,
1409
+ severity = f.severity,
1410
+ description = f.description,
1411
+ evidence = f.evidence,
1412
+ suggestion = f.suggestion,
1413
  ) for f in result.flags
1414
  ],
1415
+ flags_count = result.flags_count,
1416
  )
1417
  except Exception as e:
1418
  raise HTTPException(status_code=500, detail=str(e))
1419
 
1420
+
1421
  @router.post("/analyze/sprite", response_model=SpriteResponse)
1422
  def analyze_sprite(request: SpriteRequest):
1423
  """SPRITE Test — detect impossible distributions."""
1424
  try:
1425
+ result = _sprite_engine.analyze(
1426
+ _smart_text(request.text, "sprite")
1427
+ )
1428
  return SpriteResponse(
1429
+ impossible_combinations = result.impossible_combinations,
1430
+ possible_combinations = result.possible_combinations,
1431
+ sprite_score = result.sprite_score,
1432
+ risk_level = result.risk_level,
1433
+ summary = result.summary,
1434
+ flags = [
1435
  SpriteFlagResponse(
1436
+ flag_type = f.flag_type,
1437
+ severity = f.severity,
1438
+ description = f.description,
1439
+ evidence = f.evidence,
1440
+ suggestion = f.suggestion,
1441
  ) for f in result.flags
1442
  ],
1443
+ flags_count = result.flags_count,
1444
  )
1445
  except Exception as e:
1446
  raise HTTPException(status_code=500, detail=str(e))
1447
 
1448
+
1449
  @router.post("/analyze/granularity", response_model=GranularityResponse)
1450
  def analyze_granularity(request: GranularityRequest):
1451
  """Granularity Analyzer — Benford Law + digit preference."""
1452
  try:
1453
+ result = _granularity_engine.analyze(
1454
+ _smart_text(request.text, "granularity")
1455
+ )
1456
  return GranularityResponse(
1457
+ digit_preference_score = result.digit_preference_score,
1458
+ benford_score = result.benford_score,
1459
+ round_number_ratio = result.round_number_ratio,
1460
+ granularity_score = result.granularity_score,
1461
+ risk_level = result.risk_level,
1462
+ summary = result.summary,
1463
+ flags = [
1464
  GranularityFlagResponse(
1465
+ flag_type = f.flag_type,
1466
+ severity = f.severity,
1467
+ description = f.description,
1468
+ evidence = f.evidence,
1469
+ suggestion = f.suggestion,
1470
  ) for f in result.flags
1471
  ],
1472
+ flags_count = result.flags_count,
1473
  )
1474
  except Exception as e:
1475
  raise HTTPException(status_code=500, detail=str(e))
1476
 
1477
+
1478
  @router.post("/analyze/pcurve", response_model=PCurveResponse)
1479
  def analyze_pcurve(request: PCurveRequest):
1480
  """P-Curve Analyzer — publication bias detector."""
1481
  try:
1482
+ result = _pcurve_engine.analyze(
1483
+ _smart_text(request.text, "pcurve")
1484
+ )
1485
  return PCurveResponse(
1486
+ p_values_found = result.p_values_found,
1487
+ significant_p = result.significant_p,
1488
+ right_skew_ratio = result.right_skew_ratio,
1489
+ clustering_score = result.clustering_score,
1490
+ pcurve_score = result.pcurve_score,
1491
+ risk_level = result.risk_level,
1492
+ summary = result.summary,
1493
+ flags = [
1494
  PCurveFlagResponse(
1495
+ flag_type = f.flag_type,
1496
+ severity = f.severity,
1497
+ description = f.description,
1498
+ evidence = f.evidence,
1499
+ suggestion = f.suggestion,
1500
  ) for f in result.flags
1501
  ],
1502
+ flags_count = result.flags_count,
1503
  )
1504
  except Exception as e:
1505
  raise HTTPException(status_code=500, detail=str(e))
1506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1507
 
1508
  @router.post('/analyze/effect_size', response_model=EffectSizeResponse)
1509
  def analyze_effect_size(request: EffectSizeRequest):
1510
+ """Effect Size Validator — Cohen d, power analysis."""
1511
  try:
1512
+ result = _effect_size_engine.analyze(
1513
+ _smart_text(request.text, "effect_size")
1514
+ )
1515
  return EffectSizeResponse(
1516
  effect_sizes_found = result.effect_sizes_found,
1517
  power_estimates = result.power_estimates,
 
1527
  description = f.description,
1528
  evidence = f.evidence,
1529
  suggestion = f.suggestion,
1530
+ ) for f in result.flags
 
1531
  ],
1532
  flags_count = result.flags_count,
1533
  )
 
1535
  raise HTTPException(status_code=500, detail=str(e))
1536
 
1537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  @router.post('/analyze/retraction', response_model=RetractionResponse)
1539
  def analyze_retraction(request: RetractionRequest):
1540
+ """Retraction Checker — live CrossRef API."""
1541
  try:
1542
+ result = _retraction_engine.analyze(
1543
+ _smart_text(request.text, "retraction")
1544
+ )
1545
  return RetractionResponse(
1546
  dois_found = result.dois_found,
1547
  retracted_found = result.retracted_found,
 
1556
  description = f.description,
1557
  evidence = f.evidence,
1558
  suggestion = f.suggestion,
1559
+ ) for f in result.flags
 
1560
  ],
1561
  flags_count = result.flags_count,
1562
  )
 
1564
  raise HTTPException(status_code=500, detail=str(e))
1565
 
1566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1567
  @router.post('/analyze/cartel', response_model=CartelResponse)
1568
  def analyze_cartel(request: CartelRequest):
1569
+ """Citation Cartel Detector — graph-based ring detection."""
1570
  try:
1571
+ result = _cartel_engine.analyze(
1572
+ _smart_text(request.text, "cartel")
1573
+ )
1574
  return CartelResponse(
1575
  authors_found = result.authors_found,
1576
  citation_network = result.citation_network,
 
1586
  description = f.description,
1587
  evidence = f.evidence,
1588
  suggestion = f.suggestion,
1589
+ ) for f in result.flags
 
1590
  ],
1591
  flags_count = result.flags_count,
1592
  )
 
1594
  raise HTTPException(status_code=500, detail=str(e))
1595
 
1596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1597
  @router.post('/analyze/llm', response_model=LLMResponse)
1598
  def analyze_llm(request: LLMRequest):
1599
+ """LLM-Generated Paper Detector — burstiness + TTR."""
1600
  try:
1601
+ result = _llm_engine.analyze(
1602
+ _smart_text(request.text, "llm")
1603
+ )
1604
  return LLMResponse(
1605
  burstiness_score = result.burstiness_score,
1606
  vocabulary_diversity = result.vocabulary_diversity,
 
1616
  description = f.description,
1617
  evidence = f.evidence,
1618
  suggestion = f.suggestion,
1619
+ ) for f in result.flags
 
1620
  ],
1621
  flags_count = result.flags_count,
1622
  )
1623
  except Exception as e:
1624
+ raise HTTPException(status_code=500, detail=str(e))