Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from datetime import datetime, timezone
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 11 |
from pydantic import BaseModel, EmailStr
|
| 12 |
from passlib.context import CryptContext
|
| 13 |
import jwt
|
|
@@ -46,6 +47,13 @@ except Exception:
|
|
| 46 |
gector_predict = None
|
| 47 |
load_verb_dict = None
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# ------------------ ENV & DB SETUP ------------------
|
| 50 |
load_dotenv()
|
| 51 |
|
|
@@ -690,6 +698,380 @@ def corpus_plagiarism_combined(text: str):
|
|
| 690 |
return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
|
| 691 |
|
| 692 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
# ------------------ ENDPOINTS ------------------
|
| 694 |
|
| 695 |
@app.post("/api/signup")
|
|
@@ -919,6 +1301,133 @@ def api_ai_check_file(file: UploadFile = File(...), user=Depends(get_current_use
|
|
| 919 |
return api_ai_check.__wrapped__(TextRequest(text=text), user)
|
| 920 |
|
| 921 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
# ------------------ HISTORY ------------------
|
| 923 |
@app.get("/api/history")
|
| 924 |
def api_history(user=Depends(get_current_user)):
|
|
@@ -945,4 +1454,4 @@ def api_history(user=Depends(get_current_user)):
|
|
| 945 |
|
| 946 |
@app.get("/")
|
| 947 |
def read_root():
|
| 948 |
-
return {"status": "Backend is running with GECToR + 16GB RAM!"}
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from fastapi.responses import StreamingResponse
|
| 12 |
from pydantic import BaseModel, EmailStr
|
| 13 |
from passlib.context import CryptContext
|
| 14 |
import jwt
|
|
|
|
| 47 |
gector_predict = None
|
| 48 |
load_verb_dict = None
|
| 49 |
|
| 50 |
+
# PDF report libs
|
| 51 |
+
from reportlab.lib.pagesizes import A4
|
| 52 |
+
from reportlab.pdfgen import canvas
|
| 53 |
+
from reportlab.lib.units import mm
|
| 54 |
+
from reportlab.lib.utils import ImageReader
|
| 55 |
+
from reportlab.lib import colors
|
| 56 |
+
|
| 57 |
# ------------------ ENV & DB SETUP ------------------
|
| 58 |
load_dotenv()
|
| 59 |
|
|
|
|
| 698 |
return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
|
| 699 |
|
| 700 |
|
| 701 |
+
# ------------------ PDF HELPERS (COMMON STYLE) ------------------
|
| 702 |
+
|
| 703 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 704 |
+
PDF_LOGO_PATH = os.path.join(BASE_DIR, "logo.png") # ensure logo.png is here
|
| 705 |
+
|
| 706 |
+
BRAND_NAME = "TrueWrite Scan"
|
| 707 |
+
BRAND_FONT_SIZE = 18
|
| 708 |
+
TITLE_FONT_SIZE = 18
|
| 709 |
+
LOGO_SIZE_MM = 15 # logo height in mm
|
| 710 |
+
|
| 711 |
+
|
| 712 |
+
def _wrap_text(text: str, max_chars: int):
|
| 713 |
+
"""
|
| 714 |
+
Simple word-wrap: yields lines with at most max_chars characters.
|
| 715 |
+
"""
|
| 716 |
+
words = text.split()
|
| 717 |
+
line = []
|
| 718 |
+
length = 0
|
| 719 |
+
for w in words:
|
| 720 |
+
if length + len(w) + (1 if line else 0) > max_chars:
|
| 721 |
+
yield " ".join(line)
|
| 722 |
+
line = [w]
|
| 723 |
+
length = len(w)
|
| 724 |
+
else:
|
| 725 |
+
line.append(w)
|
| 726 |
+
length += len(w) + (1 if line else 0)
|
| 727 |
+
if line:
|
| 728 |
+
yield " ".join(line)
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
def _format_checked_on():
|
| 732 |
+
# e.g. "Checked On: Dec 08, 2025"
|
| 733 |
+
local_now = datetime.now(timezone.utc).astimezone()
|
| 734 |
+
return "Checked On: " + local_now.strftime("%b %d, %Y")
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
def _get_logo_reader():
|
| 738 |
+
try:
|
| 739 |
+
return ImageReader(PDF_LOGO_PATH)
|
| 740 |
+
except Exception as e:
|
| 741 |
+
print(f"[PDF] Failed to load logo at {PDF_LOGO_PATH}: {e}")
|
| 742 |
+
return None
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
def _draw_header_footer(c: canvas.Canvas, page_num: int):
|
| 746 |
+
"""
|
| 747 |
+
Draws the common header + footer for all reports.
|
| 748 |
+
Returns (x_margin, content_start_y, page_width, page_height)
|
| 749 |
+
"""
|
| 750 |
+
width, height = A4
|
| 751 |
+
x_margin = 20 * mm
|
| 752 |
+
top_y = height - 22 * mm
|
| 753 |
+
footer_y = 15 * mm
|
| 754 |
+
brand_spacing = 5 * mm
|
| 755 |
+
logo_size = LOGO_SIZE_MM
|
| 756 |
+
|
| 757 |
+
logo = _get_logo_reader()
|
| 758 |
+
|
| 759 |
+
# --- HEADER ---
|
| 760 |
+
if logo is not None:
|
| 761 |
+
c.drawImage(
|
| 762 |
+
logo,
|
| 763 |
+
x_margin,
|
| 764 |
+
top_y - logo_size,
|
| 765 |
+
width=logo_size,
|
| 766 |
+
height=logo_size,
|
| 767 |
+
mask="auto",
|
| 768 |
+
)
|
| 769 |
+
|
| 770 |
+
# Brand name
|
| 771 |
+
c.setFont("Helvetica-Bold", BRAND_FONT_SIZE)
|
| 772 |
+
text_y = top_y - logo_size / 2 - 1 * mm
|
| 773 |
+
c.drawString(x_margin + logo_size + brand_spacing, text_y, BRAND_NAME)
|
| 774 |
+
|
| 775 |
+
# Header right: Checked On: ...
|
| 776 |
+
checked_date = _format_checked_on()
|
| 777 |
+
c.setFont("Helvetica", 10)
|
| 778 |
+
text_width = c.stringWidth(checked_date, "Helvetica", 10)
|
| 779 |
+
c.drawString(width - x_margin - text_width, text_y, checked_date)
|
| 780 |
+
|
| 781 |
+
# Thin line under header
|
| 782 |
+
c.setLineWidth(0.5)
|
| 783 |
+
c.setStrokeColor(colors.black)
|
| 784 |
+
c.line(
|
| 785 |
+
x_margin,
|
| 786 |
+
top_y - logo_size - 5 * mm,
|
| 787 |
+
width - x_margin,
|
| 788 |
+
top_y - logo_size - 5 * mm,
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
# --- FOOTER ---
|
| 792 |
+
# Thin footer line
|
| 793 |
+
c.setLineWidth(0.35)
|
| 794 |
+
c.line(x_margin, footer_y + 8 * mm, width - x_margin, footer_y + 8 * mm)
|
| 795 |
+
|
| 796 |
+
# Page number centered
|
| 797 |
+
c.setFont("Helvetica", 9)
|
| 798 |
+
page_label = f"Page {page_num}"
|
| 799 |
+
label_width = c.stringWidth(page_label, "Helvetica", 9)
|
| 800 |
+
c.drawString((width - label_width) / 2, footer_y + 2 * mm, page_label)
|
| 801 |
+
|
| 802 |
+
content_start_y = top_y - logo_size - 18 * mm
|
| 803 |
+
return x_margin, content_start_y, width, height
|
| 804 |
+
|
| 805 |
+
|
| 806 |
+
# ------------------ PDF GENERATORS ------------------
|
| 807 |
+
|
| 808 |
+
def generate_plagiarism_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
|
| 809 |
+
"""
|
| 810 |
+
Generate plagiarism PDF in TrueWrite Scan style.
|
| 811 |
+
"""
|
| 812 |
+
buf = io.BytesIO()
|
| 813 |
+
c = canvas.Canvas(buf, pagesize=A4)
|
| 814 |
+
|
| 815 |
+
report_title = "Plagiarism Scan Report"
|
| 816 |
+
plagiarism_percent = float(result.get("plagiarism_percent", 0.0))
|
| 817 |
+
unique_percent = max(0.0, round(100.0 - plagiarism_percent, 2))
|
| 818 |
+
|
| 819 |
+
# ---------- PAGE 1 ----------
|
| 820 |
+
page_num = 1
|
| 821 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 822 |
+
|
| 823 |
+
# Title
|
| 824 |
+
c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
|
| 825 |
+
title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
|
| 826 |
+
c.drawString((width - title_width) / 2, y, report_title)
|
| 827 |
+
y -= 18 * mm
|
| 828 |
+
|
| 829 |
+
# Overall similarity big number
|
| 830 |
+
c.setFont("Helvetica-Bold", 22)
|
| 831 |
+
c.setFillColor(colors.red if plagiarism_percent >= 1.0 else colors.green)
|
| 832 |
+
c.drawString(x_margin, y, f"{plagiarism_percent:.0f}%")
|
| 833 |
+
|
| 834 |
+
c.setFont("Helvetica", 11)
|
| 835 |
+
c.setFillColor(colors.black)
|
| 836 |
+
c.drawString(x_margin + 22 * mm, y + 2 * mm, "Overall Similarity")
|
| 837 |
+
y -= 12 * mm
|
| 838 |
+
|
| 839 |
+
# Additional info
|
| 840 |
+
c.setFont("Helvetica", 10)
|
| 841 |
+
c.drawString(x_margin, y, f"Estimated Unique Content: {unique_percent:.0f}%")
|
| 842 |
+
y -= 6 * mm
|
| 843 |
+
summary = result.get("summary", "")
|
| 844 |
+
if summary:
|
| 845 |
+
for line in _wrap_text("Summary: " + summary, 110):
|
| 846 |
+
c.drawString(x_margin, y, line)
|
| 847 |
+
y -= 5 * mm
|
| 848 |
+
else:
|
| 849 |
+
y -= 5 * mm
|
| 850 |
+
y -= 10 * mm
|
| 851 |
+
|
| 852 |
+
# Body text: original text (truncated)
|
| 853 |
+
c.setFont("Helvetica", 10)
|
| 854 |
+
truncated = text.strip()
|
| 855 |
+
if len(truncated) > 8000:
|
| 856 |
+
truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
|
| 857 |
+
|
| 858 |
+
for line in _wrap_text(truncated, 110):
|
| 859 |
+
if y < 40 * mm:
|
| 860 |
+
c.showPage()
|
| 861 |
+
page_num += 1
|
| 862 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 863 |
+
c.setFont("Helvetica", 10)
|
| 864 |
+
c.drawString(x_margin, y, line)
|
| 865 |
+
y -= 5 * mm
|
| 866 |
+
|
| 867 |
+
# ---------- NEXT PAGE: MATCHED SOURCES ----------
|
| 868 |
+
c.showPage()
|
| 869 |
+
page_num += 1
|
| 870 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 871 |
+
c.setFont("Helvetica-Bold", 12)
|
| 872 |
+
c.drawString(x_margin, y, "Matched Sources")
|
| 873 |
+
y -= 10 * mm
|
| 874 |
+
c.setFont("Helvetica", 10)
|
| 875 |
+
|
| 876 |
+
matches = result.get("matches", []) or []
|
| 877 |
+
if not matches:
|
| 878 |
+
c.drawString(x_margin, y, "No specific sources recorded. Content appears mostly unique.")
|
| 879 |
+
else:
|
| 880 |
+
for idx, m in enumerate(matches[:10], start=1):
|
| 881 |
+
title = m.get("title", "Source")
|
| 882 |
+
score = m.get("score", m.get("tfidf_score", 0.0) or 0.0)
|
| 883 |
+
line = f"{idx}. {title} — {score:.2f}% match"
|
| 884 |
+
for part in _wrap_text(line, 110):
|
| 885 |
+
c.drawString(x_margin, y, part)
|
| 886 |
+
y -= 5 * mm
|
| 887 |
+
if y < 40 * mm:
|
| 888 |
+
c.showPage()
|
| 889 |
+
page_num += 1
|
| 890 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 891 |
+
c.setFont("Helvetica", 10)
|
| 892 |
+
|
| 893 |
+
c.save()
|
| 894 |
+
buf.seek(0)
|
| 895 |
+
return StreamingResponse(
|
| 896 |
+
buf,
|
| 897 |
+
media_type="application/pdf",
|
| 898 |
+
headers={"Content-Disposition": "attachment; filename=plagiarism-report.pdf"},
|
| 899 |
+
)
|
| 900 |
+
|
| 901 |
+
|
| 902 |
+
def generate_ai_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
|
| 903 |
+
"""
|
| 904 |
+
AI content analysis PDF.
|
| 905 |
+
result from ai-check logic.
|
| 906 |
+
"""
|
| 907 |
+
buf = io.BytesIO()
|
| 908 |
+
c = canvas.Canvas(buf, pagesize=A4)
|
| 909 |
+
|
| 910 |
+
report_title = "AI Content Analysis Report"
|
| 911 |
+
ai_percent = float(result.get("ai_percent", 0.0))
|
| 912 |
+
human_percent = float(result.get("human_percent", 100.0))
|
| 913 |
+
word_count = int(result.get("word_count", 0))
|
| 914 |
+
avg_len = float(result.get("avg_sentence_length", 0.0))
|
| 915 |
+
|
| 916 |
+
# PAGE 1
|
| 917 |
+
page_num = 1
|
| 918 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 919 |
+
|
| 920 |
+
# Title
|
| 921 |
+
c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
|
| 922 |
+
title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
|
| 923 |
+
c.drawString((width - title_width) / 2, y, report_title)
|
| 924 |
+
y -= 18 * mm
|
| 925 |
+
|
| 926 |
+
# Big AI probability
|
| 927 |
+
c.setFont("Helvetica-Bold", 22)
|
| 928 |
+
if ai_percent >= 50:
|
| 929 |
+
c.setFillColor(colors.red)
|
| 930 |
+
else:
|
| 931 |
+
c.setFillColor(colors.green)
|
| 932 |
+
c.drawString(x_margin, y, f"{ai_percent:.0f}%")
|
| 933 |
+
c.setFont("Helvetica", 11)
|
| 934 |
+
c.setFillColor(colors.black)
|
| 935 |
+
c.drawString(x_margin + 22 * mm, y + 2 * mm, "Estimated AI Probability")
|
| 936 |
+
y -= 12 * mm
|
| 937 |
+
|
| 938 |
+
# Extra stats
|
| 939 |
+
c.setFont("Helvetica", 10)
|
| 940 |
+
c.drawString(x_margin, y, f"Estimated Human Probability: {human_percent:.0f}%")
|
| 941 |
+
y -= 6 * mm
|
| 942 |
+
c.drawString(x_margin, y, f"Word Count: {word_count}")
|
| 943 |
+
y -= 6 * mm
|
| 944 |
+
c.drawString(x_margin, y, f"Average Sentence Length: {avg_len:.2f} words")
|
| 945 |
+
y -= 6 * mm
|
| 946 |
+
|
| 947 |
+
summary = result.get("summary", "")
|
| 948 |
+
if summary:
|
| 949 |
+
for line in _wrap_text("Summary: " + summary, 110):
|
| 950 |
+
c.drawString(x_margin, y, line)
|
| 951 |
+
y -= 5 * mm
|
| 952 |
+
y -= 5 * mm
|
| 953 |
+
else:
|
| 954 |
+
y -= 10 * mm
|
| 955 |
+
|
| 956 |
+
# Body text
|
| 957 |
+
c.setFont("Helvetica", 10)
|
| 958 |
+
truncated = text.strip()
|
| 959 |
+
if len(truncated) > 8000:
|
| 960 |
+
truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
|
| 961 |
+
|
| 962 |
+
for line in _wrap_text(truncated, 110):
|
| 963 |
+
if y < 40 * mm:
|
| 964 |
+
c.showPage()
|
| 965 |
+
page_num += 1
|
| 966 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 967 |
+
c.setFont("Helvetica", 10)
|
| 968 |
+
c.drawString(x_margin, y, line)
|
| 969 |
+
y -= 5 * mm
|
| 970 |
+
|
| 971 |
+
c.save()
|
| 972 |
+
buf.seek(0)
|
| 973 |
+
return StreamingResponse(
|
| 974 |
+
buf,
|
| 975 |
+
media_type="application/pdf",
|
| 976 |
+
headers={"Content-Disposition": "attachment; filename=truewrite-ai-report.pdf"},
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
|
| 980 |
+
def generate_grammar_pdf(user: dict, original_text: str, corrected_text: str, result: dict) -> StreamingResponse:
|
| 981 |
+
"""
|
| 982 |
+
Grammar correction PDF.
|
| 983 |
+
result from grammar-check logic.
|
| 984 |
+
"""
|
| 985 |
+
buf = io.BytesIO()
|
| 986 |
+
c = canvas.Canvas(buf, pagesize=A4)
|
| 987 |
+
|
| 988 |
+
report_title = "Grammar Correction Report"
|
| 989 |
+
corrections = int(result.get("corrections", 0))
|
| 990 |
+
original_words = int(result.get("original_words", 0))
|
| 991 |
+
summary = result.get("summary", "")
|
| 992 |
+
|
| 993 |
+
# PAGE 1: Metrics + Original
|
| 994 |
+
page_num = 1
|
| 995 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 996 |
+
|
| 997 |
+
# Title
|
| 998 |
+
c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
|
| 999 |
+
title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
|
| 1000 |
+
c.drawString((width - title_width) / 2, y, report_title)
|
| 1001 |
+
y -= 18 * mm
|
| 1002 |
+
|
| 1003 |
+
# Big metric: corrections
|
| 1004 |
+
c.setFont("Helvetica-Bold", 22)
|
| 1005 |
+
c.setFillColor(colors.blue if corrections > 0 else colors.green)
|
| 1006 |
+
c.drawString(x_margin, y, f"{corrections}")
|
| 1007 |
+
c.setFont("Helvetica", 11)
|
| 1008 |
+
c.setFillColor(colors.black)
|
| 1009 |
+
c.drawString(x_margin + 22 * mm, y + 2 * mm, "Corrections Applied")
|
| 1010 |
+
y -= 12 * mm
|
| 1011 |
+
|
| 1012 |
+
c.setFont("Helvetica", 10)
|
| 1013 |
+
c.drawString(x_margin, y, f"Words Analysed: {original_words}")
|
| 1014 |
+
y -= 6 * mm
|
| 1015 |
+
|
| 1016 |
+
if summary:
|
| 1017 |
+
for line in _wrap_text("Summary: " + summary, 110):
|
| 1018 |
+
c.drawString(x_margin, y, line)
|
| 1019 |
+
y -= 5 * mm
|
| 1020 |
+
y -= 5 * mm
|
| 1021 |
+
else:
|
| 1022 |
+
y -= 10 * mm
|
| 1023 |
+
|
| 1024 |
+
# Original text
|
| 1025 |
+
c.setFont("Helvetica-Bold", 11)
|
| 1026 |
+
c.drawString(x_margin, y, "Original Text")
|
| 1027 |
+
y -= 7 * mm
|
| 1028 |
+
c.setFont("Helvetica", 10)
|
| 1029 |
+
|
| 1030 |
+
orig = original_text.strip()
|
| 1031 |
+
if len(orig) > 4000:
|
| 1032 |
+
orig = orig[:4000] + "\n...\n[Content truncated for report]"
|
| 1033 |
+
|
| 1034 |
+
for line in _wrap_text(orig, 110):
|
| 1035 |
+
if y < 40 * mm:
|
| 1036 |
+
c.showPage()
|
| 1037 |
+
page_num += 1
|
| 1038 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 1039 |
+
c.setFont("Helvetica", 10)
|
| 1040 |
+
c.drawString(x_margin, y, line)
|
| 1041 |
+
y -= 5 * mm
|
| 1042 |
+
|
| 1043 |
+
# PAGE 2: Corrected text
|
| 1044 |
+
c.showPage()
|
| 1045 |
+
page_num += 1
|
| 1046 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 1047 |
+
|
| 1048 |
+
c.setFont("Helvetica-Bold", 11)
|
| 1049 |
+
c.drawString(x_margin, y, "Corrected Text")
|
| 1050 |
+
y -= 7 * mm
|
| 1051 |
+
c.setFont("Helvetica", 10)
|
| 1052 |
+
|
| 1053 |
+
corr = corrected_text.strip()
|
| 1054 |
+
if len(corr) > 4000:
|
| 1055 |
+
corr = corr[:4000] + "\n...\n[Content truncated for report]"
|
| 1056 |
+
|
| 1057 |
+
for line in _wrap_text(corr, 110):
|
| 1058 |
+
if y < 40 * mm:
|
| 1059 |
+
c.showPage()
|
| 1060 |
+
page_num += 1
|
| 1061 |
+
x_margin, y, width, height = _draw_header_footer(c, page_num)
|
| 1062 |
+
c.setFont("Helvetica", 10)
|
| 1063 |
+
c.drawString(x_margin, y, line)
|
| 1064 |
+
y -= 5 * mm
|
| 1065 |
+
|
| 1066 |
+
c.save()
|
| 1067 |
+
buf.seek(0)
|
| 1068 |
+
return StreamingResponse(
|
| 1069 |
+
buf,
|
| 1070 |
+
media_type="application/pdf",
|
| 1071 |
+
headers={"Content-Disposition": "attachment; filename=truewrite-grammar-report.pdf"},
|
| 1072 |
+
)
|
| 1073 |
+
|
| 1074 |
+
|
| 1075 |
# ------------------ ENDPOINTS ------------------
|
| 1076 |
|
| 1077 |
@app.post("/api/signup")
|
|
|
|
| 1301 |
return api_ai_check.__wrapped__(TextRequest(text=text), user)
|
| 1302 |
|
| 1303 |
|
| 1304 |
+
# ------------------ PDF REPORT ENDPOINTS ------------------
|
| 1305 |
+
|
| 1306 |
+
@app.post("/api/plagiarism-report")
|
| 1307 |
+
def api_plagiarism_report(req: TextRequest, user=Depends(get_current_user)):
|
| 1308 |
+
"""
|
| 1309 |
+
Generate a PDF plagiarism report in the TrueWrite Scan style.
|
| 1310 |
+
"""
|
| 1311 |
+
text = (req.text or "").strip()
|
| 1312 |
+
if not text:
|
| 1313 |
+
raise HTTPException(status_code=400, detail="Text is required")
|
| 1314 |
+
|
| 1315 |
+
try:
|
| 1316 |
+
result = corpus_plagiarism_combined(text)
|
| 1317 |
+
except Exception as e:
|
| 1318 |
+
print("[Plagiarism-Report] Combined engine failed, falling back:", e)
|
| 1319 |
+
result = demo_plagiarism_fallback(text)
|
| 1320 |
+
|
| 1321 |
+
save_history(user["id"], "plagiarism_report", text, result.get("summary", ""))
|
| 1322 |
+
|
| 1323 |
+
user_info = {
|
| 1324 |
+
"name": user.get("name"),
|
| 1325 |
+
"email": user.get("email"),
|
| 1326 |
+
}
|
| 1327 |
+
return generate_plagiarism_pdf(user_info, text, result)
|
| 1328 |
+
|
| 1329 |
+
|
| 1330 |
+
@app.post("/api/ai-report")
|
| 1331 |
+
def api_ai_report(req: TextRequest, user=Depends(get_current_user)):
|
| 1332 |
+
"""
|
| 1333 |
+
Generate a PDF AI analysis report in the TrueWrite Scan style.
|
| 1334 |
+
"""
|
| 1335 |
+
text = (req.text or "").strip()
|
| 1336 |
+
if not text:
|
| 1337 |
+
raise HTTPException(status_code=400, detail="Text is required")
|
| 1338 |
+
|
| 1339 |
+
result = None
|
| 1340 |
+
if model is not None and tokenizer is not None:
|
| 1341 |
+
try:
|
| 1342 |
+
max_len = getattr(tokenizer, "model_max_length", 512)
|
| 1343 |
+
if max_len is None or max_len > 1024:
|
| 1344 |
+
max_len = 512
|
| 1345 |
+
|
| 1346 |
+
words = text.split()
|
| 1347 |
+
chunk_size = min(400, max_len - 10)
|
| 1348 |
+
chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
| 1349 |
+
probs = []
|
| 1350 |
+
for chunk in chunks:
|
| 1351 |
+
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_len)
|
| 1352 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 1353 |
+
with torch.no_grad():
|
| 1354 |
+
outputs = model(**inputs)
|
| 1355 |
+
logits = outputs.logits
|
| 1356 |
+
p = torch.softmax(logits, dim=1).cpu().numpy()[0]
|
| 1357 |
+
ai_prob = float(p[1]) if p.shape[0] > 1 else float(p[0])
|
| 1358 |
+
probs.append(ai_prob)
|
| 1359 |
+
avg_ai_prob = float(np.mean(probs)) if probs else 0.0
|
| 1360 |
+
ai_percent = round(avg_ai_prob * 100, 2)
|
| 1361 |
+
human_percent = round(100 - ai_percent, 2)
|
| 1362 |
+
words_count = len(words)
|
| 1363 |
+
sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()]
|
| 1364 |
+
avg_sentence_len = round(words_count / (len(sentences) or 1), 2)
|
| 1365 |
+
summary = f"Model: {AI_DETECTOR_MODEL}; AI probability: {ai_percent}%"
|
| 1366 |
+
result = {
|
| 1367 |
+
"ai_percent": ai_percent,
|
| 1368 |
+
"human_percent": human_percent,
|
| 1369 |
+
"word_count": words_count,
|
| 1370 |
+
"avg_sentence_length": avg_sentence_len,
|
| 1371 |
+
"summary": summary,
|
| 1372 |
+
}
|
| 1373 |
+
except Exception as e:
|
| 1374 |
+
print("[AI-report] model inference failed:", e)
|
| 1375 |
+
|
| 1376 |
+
if result is None:
|
| 1377 |
+
ai_percent, human_percent, wc, avg_len, uniq = heuristic_ai_score(text)
|
| 1378 |
+
summary = f"HEURISTIC fallback — AI probability: {ai_percent}%"
|
| 1379 |
+
result = {
|
| 1380 |
+
"ai_percent": ai_percent,
|
| 1381 |
+
"human_percent": human_percent,
|
| 1382 |
+
"word_count": wc,
|
| 1383 |
+
"avg_sentence_length": avg_len,
|
| 1384 |
+
"unique_ratio": round(uniq, 3),
|
| 1385 |
+
"summary": summary,
|
| 1386 |
+
}
|
| 1387 |
+
|
| 1388 |
+
save_history(user["id"], "ai_report", text, result.get("summary", ""))
|
| 1389 |
+
|
| 1390 |
+
user_info = {
|
| 1391 |
+
"name": user.get("name"),
|
| 1392 |
+
"email": user.get("email"),
|
| 1393 |
+
}
|
| 1394 |
+
return generate_ai_pdf(user_info, text, result)
|
| 1395 |
+
|
| 1396 |
+
|
| 1397 |
+
@app.post("/api/grammar-report")
|
| 1398 |
+
def api_grammar_report(req: TextRequest, user=Depends(get_current_user)):
|
| 1399 |
+
"""
|
| 1400 |
+
Generate a PDF grammar correction report in the TrueWrite Scan style.
|
| 1401 |
+
"""
|
| 1402 |
+
text = (req.text or "").strip()
|
| 1403 |
+
if not text:
|
| 1404 |
+
raise HTTPException(status_code=400, detail="Text is required")
|
| 1405 |
+
|
| 1406 |
+
if GEC_MODEL is not None:
|
| 1407 |
+
corrected, corrections, original_words = gector_correct(text)
|
| 1408 |
+
summary = f"GECToR neural GEC: {corrections} edits; words analysed: {original_words}"
|
| 1409 |
+
elif lt_tool is not None:
|
| 1410 |
+
corrected, corrections, original_words = grammar_with_languagetool(text)
|
| 1411 |
+
summary = f"LanguageTool corrections: {corrections}; words analysed: {original_words}"
|
| 1412 |
+
else:
|
| 1413 |
+
corrected, corrections, original_words = simple_grammar_correct(text)
|
| 1414 |
+
summary = f"HEURISTIC corrections: {corrections}; words analysed: {original_words}"
|
| 1415 |
+
|
| 1416 |
+
result = {
|
| 1417 |
+
"original_words": original_words,
|
| 1418 |
+
"corrections": corrections,
|
| 1419 |
+
"summary": summary,
|
| 1420 |
+
}
|
| 1421 |
+
|
| 1422 |
+
save_history(user["id"], "grammar_report", text, summary)
|
| 1423 |
+
|
| 1424 |
+
user_info = {
|
| 1425 |
+
"name": user.get("name"),
|
| 1426 |
+
"email": user.get("email"),
|
| 1427 |
+
}
|
| 1428 |
+
return generate_grammar_pdf(user_info, text, corrected, result)
|
| 1429 |
+
|
| 1430 |
+
|
| 1431 |
# ------------------ HISTORY ------------------
|
| 1432 |
@app.get("/api/history")
|
| 1433 |
def api_history(user=Depends(get_current_user)):
|
|
|
|
| 1454 |
|
| 1455 |
@app.get("/")
|
| 1456 |
def read_root():
|
| 1457 |
+
return {"status": "Backend is running with GECToR + 16GB RAM + PDF reports!"}
|