Spaces:

GopalKrushnaMahapatra
/

TrueWrite-Scan-Backend

Sleeping

App Files Files Community

GopalKrushnaMahapatra commited on Dec 10, 2025

Commit

f3f2bd2

verified ·

1 Parent(s): b239b36

Update app.py

Browse files

Files changed (1) hide show

app.py +510 -1

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from datetime import datetime, timezone
 from dotenv import load_dotenv
 from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, EmailStr
 from passlib.context import CryptContext
 import jwt
@@ -46,6 +47,13 @@ except Exception:
     gector_predict = None
     load_verb_dict = None
 # ------------------ ENV & DB SETUP ------------------
 load_dotenv()
@@ -690,6 +698,380 @@ def corpus_plagiarism_combined(text: str):
     return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
 # ------------------ ENDPOINTS ------------------
 @app.post("/api/signup")
@@ -919,6 +1301,133 @@ def api_ai_check_file(file: UploadFile = File(...), user=Depends(get_current_use
     return api_ai_check.__wrapped__(TextRequest(text=text), user)
 # ------------------ HISTORY ------------------
 @app.get("/api/history")
 def api_history(user=Depends(get_current_user)):
@@ -945,4 +1454,4 @@ def api_history(user=Depends(get_current_user)):
 @app.get("/")
 def read_root():
-    return {"status": "Backend is running with GECToR + 16GB RAM!"}

 from dotenv import load_dotenv
 from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, EmailStr
 from passlib.context import CryptContext
 import jwt
     gector_predict = None
     load_verb_dict = None
+# PDF report libs
+from reportlab.lib.pagesizes import A4
+from reportlab.pdfgen import canvas
+from reportlab.lib.units import mm
+from reportlab.lib.utils import ImageReader
+from reportlab.lib import colors
 # ------------------ ENV & DB SETUP ------------------
 load_dotenv()
     return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
+# ------------------ PDF HELPERS (COMMON STYLE) ------------------
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+PDF_LOGO_PATH = os.path.join(BASE_DIR, "logo.png")  # ensure logo.png is here
+BRAND_NAME = "TrueWrite Scan"
+BRAND_FONT_SIZE = 18
+TITLE_FONT_SIZE = 18
+LOGO_SIZE_MM = 15  # logo height in mm
+def _wrap_text(text: str, max_chars: int):
+    """
+    Simple word-wrap: yields lines with at most max_chars characters.
+    """
+    words = text.split()
+    line = []
+    length = 0
+    for w in words:
+        if length + len(w) + (1 if line else 0) > max_chars:
+            yield " ".join(line)
+            line = [w]
+            length = len(w)
+        else:
+            line.append(w)
+            length += len(w) + (1 if line else 0)
+    if line:
+        yield " ".join(line)
+def _format_checked_on():
+    # e.g. "Checked On: Dec 08, 2025"
+    local_now = datetime.now(timezone.utc).astimezone()
+    return "Checked On: " + local_now.strftime("%b %d, %Y")
+def _get_logo_reader():
+    try:
+        return ImageReader(PDF_LOGO_PATH)
+    except Exception as e:
+        print(f"[PDF] Failed to load logo at {PDF_LOGO_PATH}: {e}")
+        return None
+def _draw_header_footer(c: canvas.Canvas, page_num: int):
+    """
+    Draws the common header + footer for all reports.
+    Returns (x_margin, content_start_y, page_width, page_height)
+    """
+    width, height = A4
+    x_margin = 20 * mm
+    top_y = height - 22 * mm
+    footer_y = 15 * mm
+    brand_spacing = 5 * mm
+    logo_size = LOGO_SIZE_MM
+    logo = _get_logo_reader()
+    # --- HEADER ---
+    if logo is not None:
+        c.drawImage(
+            logo,
+            x_margin,
+            top_y - logo_size,
+            width=logo_size,
+            height=logo_size,
+            mask="auto",
+        )
+    # Brand name
+    c.setFont("Helvetica-Bold", BRAND_FONT_SIZE)
+    text_y = top_y - logo_size / 2 - 1 * mm
+    c.drawString(x_margin + logo_size + brand_spacing, text_y, BRAND_NAME)
+    # Header right: Checked On: ...
+    checked_date = _format_checked_on()
+    c.setFont("Helvetica", 10)
+    text_width = c.stringWidth(checked_date, "Helvetica", 10)
+    c.drawString(width - x_margin - text_width, text_y, checked_date)
+    # Thin line under header
+    c.setLineWidth(0.5)
+    c.setStrokeColor(colors.black)
+    c.line(
+        x_margin,
+        top_y - logo_size - 5 * mm,
+        width - x_margin,
+        top_y - logo_size - 5 * mm,
+    )
+    # --- FOOTER ---
+    # Thin footer line
+    c.setLineWidth(0.35)
+    c.line(x_margin, footer_y + 8 * mm, width - x_margin, footer_y + 8 * mm)
+    # Page number centered
+    c.setFont("Helvetica", 9)
+    page_label = f"Page {page_num}"
+    label_width = c.stringWidth(page_label, "Helvetica", 9)
+    c.drawString((width - label_width) / 2, footer_y + 2 * mm, page_label)
+    content_start_y = top_y - logo_size - 18 * mm
+    return x_margin, content_start_y, width, height
+# ------------------ PDF GENERATORS ------------------
+def generate_plagiarism_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
+    """
+    Generate plagiarism PDF in TrueWrite Scan style.
+    """
+    buf = io.BytesIO()
+    c = canvas.Canvas(buf, pagesize=A4)
+    report_title = "Plagiarism Scan Report"
+    plagiarism_percent = float(result.get("plagiarism_percent", 0.0))
+    unique_percent = max(0.0, round(100.0 - plagiarism_percent, 2))
+    # ---------- PAGE 1 ----------
+    page_num = 1
+    x_margin, y, width, height = _draw_header_footer(c, page_num)
+    # Title
+    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
+    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
+    c.drawString((width - title_width) / 2, y, report_title)
+    y -= 18 * mm
+    # Overall similarity big number
+    c.setFont("Helvetica-Bold", 22)
+    c.setFillColor(colors.red if plagiarism_percent >= 1.0 else colors.green)
+    c.drawString(x_margin, y, f"{plagiarism_percent:.0f}%")
+    c.setFont("Helvetica", 11)
+    c.setFillColor(colors.black)
+    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Overall Similarity")
+    y -= 12 * mm
+    # Additional info
+    c.setFont("Helvetica", 10)
+    c.drawString(x_margin, y, f"Estimated Unique Content: {unique_percent:.0f}%")
+    y -= 6 * mm
+    summary = result.get("summary", "")
+    if summary:
+        for line in _wrap_text("Summary: " + summary, 110):
+            c.drawString(x_margin, y, line)
+            y -= 5 * mm
+    else:
+        y -= 5 * mm
+    y -= 10 * mm
+    # Body text: original text (truncated)
+    c.setFont("Helvetica", 10)
+    truncated = text.strip()
+    if len(truncated) > 8000:
+        truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
+    for line in _wrap_text(truncated, 110):
+        if y < 40 * mm:
+            c.showPage()
+            page_num += 1
+            x_margin, y, width, height = _draw_header_footer(c, page_num)
+            c.setFont("Helvetica", 10)
+        c.drawString(x_margin, y, line)
+        y -= 5 * mm
+    # ---------- NEXT PAGE: MATCHED SOURCES ----------
+    c.showPage()
+    page_num += 1
+    x_margin, y, width, height = _draw_header_footer(c, page_num)
+    c.setFont("Helvetica-Bold", 12)
+    c.drawString(x_margin, y, "Matched Sources")
+    y -= 10 * mm
+    c.setFont("Helvetica", 10)
+    matches = result.get("matches", []) or []
+    if not matches:
+        c.drawString(x_margin, y, "No specific sources recorded. Content appears mostly unique.")
+    else:
+        for idx, m in enumerate(matches[:10], start=1):
+            title = m.get("title", "Source")
+            score = m.get("score", m.get("tfidf_score", 0.0) or 0.0)
+            line = f"{idx}. {title} — {score:.2f}% match"
+            for part in _wrap_text(line, 110):
+                c.drawString(x_margin, y, part)
+                y -= 5 * mm
+                if y < 40 * mm:
+                    c.showPage()
+                    page_num += 1
+                    x_margin, y, width, height = _draw_header_footer(c, page_num)
+                    c.setFont("Helvetica", 10)
+    c.save()
+    buf.seek(0)
+    return StreamingResponse(
+        buf,
+        media_type="application/pdf",
+        headers={"Content-Disposition": "attachment; filename=plagiarism-report.pdf"},
+    )
+def generate_ai_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
+    """
+    AI content analysis PDF.
+    result from ai-check logic.
+    """
+    buf = io.BytesIO()
+    c = canvas.Canvas(buf, pagesize=A4)
+    report_title = "AI Content Analysis Report"
+    ai_percent = float(result.get("ai_percent", 0.0))
+    human_percent = float(result.get("human_percent", 100.0))
+    word_count = int(result.get("word_count", 0))
+    avg_len = float(result.get("avg_sentence_length", 0.0))
+    # PAGE 1
+    page_num = 1
+    x_margin, y, width, height = _draw_header_footer(c, page_num)
+    # Title
+    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
+    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
+    c.drawString((width - title_width) / 2, y, report_title)
+    y -= 18 * mm
+    # Big AI probability
+    c.setFont("Helvetica-Bold", 22)
+    if ai_percent >= 50:
+        c.setFillColor(colors.red)
+    else:
+        c.setFillColor(colors.green)
+    c.drawString(x_margin, y, f"{ai_percent:.0f}%")
+    c.setFont("Helvetica", 11)
+    c.setFillColor(colors.black)
+    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Estimated AI Probability")
+    y -= 12 * mm
+    # Extra stats
+    c.setFont("Helvetica", 10)
+    c.drawString(x_margin, y, f"Estimated Human Probability: {human_percent:.0f}%")
+    y -= 6 * mm
+    c.drawString(x_margin, y, f"Word Count: {word_count}")
+    y -= 6 * mm
+    c.drawString(x_margin, y, f"Average Sentence Length: {avg_len:.2f} words")
+    y -= 6 * mm
+    summary = result.get("summary", "")
+    if summary:
+        for line in _wrap_text("Summary: " + summary, 110):
+            c.drawString(x_margin, y, line)
+            y -= 5 * mm
+        y -= 5 * mm
+    else:
+        y -= 10 * mm
+    # Body text
+    c.setFont("Helvetica", 10)
+    truncated = text.strip()
+    if len(truncated) > 8000:
+        truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
+    for line in _wrap_text(truncated, 110):
+        if y < 40 * mm:
+            c.showPage()
+            page_num += 1
+            x_margin, y, width, height = _draw_header_footer(c, page_num)
+            c.setFont("Helvetica", 10)
+        c.drawString(x_margin, y, line)
+        y -= 5 * mm
+    c.save()
+    buf.seek(0)
+    return StreamingResponse(
+        buf,
+        media_type="application/pdf",
+        headers={"Content-Disposition": "attachment; filename=truewrite-ai-report.pdf"},
+    )
+def generate_grammar_pdf(user: dict, original_text: str, corrected_text: str, result: dict) -> StreamingResponse:
+    """
+    Grammar correction PDF.
+    result from grammar-check logic.
+    """
+    buf = io.BytesIO()
+    c = canvas.Canvas(buf, pagesize=A4)
+    report_title = "Grammar Correction Report"
+    corrections = int(result.get("corrections", 0))
+    original_words = int(result.get("original_words", 0))
+    summary = result.get("summary", "")
+    # PAGE 1: Metrics + Original
+    page_num = 1
+    x_margin, y, width, height = _draw_header_footer(c, page_num)
+    # Title
+    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
+    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
+    c.drawString((width - title_width) / 2, y, report_title)
+    y -= 18 * mm
+    # Big metric: corrections
+    c.setFont("Helvetica-Bold", 22)
+    c.setFillColor(colors.blue if corrections > 0 else colors.green)
+    c.drawString(x_margin, y, f"{corrections}")
+    c.setFont("Helvetica", 11)
+    c.setFillColor(colors.black)
+    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Corrections Applied")
+    y -= 12 * mm
+    c.setFont("Helvetica", 10)
+    c.drawString(x_margin, y, f"Words Analysed: {original_words}")
+    y -= 6 * mm
+    if summary:
+        for line in _wrap_text("Summary: " + summary, 110):
+            c.drawString(x_margin, y, line)
+            y -= 5 * mm
+        y -= 5 * mm
+    else:
+        y -= 10 * mm
+    # Original text
+    c.setFont("Helvetica-Bold", 11)
+    c.drawString(x_margin, y, "Original Text")
+    y -= 7 * mm
+    c.setFont("Helvetica", 10)
+    orig = original_text.strip()
+    if len(orig) > 4000:
+        orig = orig[:4000] + "\n...\n[Content truncated for report]"
+    for line in _wrap_text(orig, 110):
+        if y < 40 * mm:
+            c.showPage()
+            page_num += 1
+            x_margin, y, width, height = _draw_header_footer(c, page_num)
+            c.setFont("Helvetica", 10)
+        c.drawString(x_margin, y, line)
+        y -= 5 * mm
+    # PAGE 2: Corrected text
+    c.showPage()
+    page_num += 1
+    x_margin, y, width, height = _draw_header_footer(c, page_num)
+    c.setFont("Helvetica-Bold", 11)
+    c.drawString(x_margin, y, "Corrected Text")
+    y -= 7 * mm
+    c.setFont("Helvetica", 10)
+    corr = corrected_text.strip()
+    if len(corr) > 4000:
+        corr = corr[:4000] + "\n...\n[Content truncated for report]"
+    for line in _wrap_text(corr, 110):
+        if y < 40 * mm:
+            c.showPage()
+            page_num += 1
+            x_margin, y, width, height = _draw_header_footer(c, page_num)
+            c.setFont("Helvetica", 10)
+        c.drawString(x_margin, y, line)
+        y -= 5 * mm
+    c.save()
+    buf.seek(0)
+    return StreamingResponse(
+        buf,
+        media_type="application/pdf",
+        headers={"Content-Disposition": "attachment; filename=truewrite-grammar-report.pdf"},
+    )
 # ------------------ ENDPOINTS ------------------
 @app.post("/api/signup")
     return api_ai_check.__wrapped__(TextRequest(text=text), user)
+# ------------------ PDF REPORT ENDPOINTS ------------------
+@app.post("/api/plagiarism-report")
+def api_plagiarism_report(req: TextRequest, user=Depends(get_current_user)):
+    """
+    Generate a PDF plagiarism report in the TrueWrite Scan style.
+    """
+    text = (req.text or "").strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Text is required")
+    try:
+        result = corpus_plagiarism_combined(text)
+    except Exception as e:
+        print("[Plagiarism-Report] Combined engine failed, falling back:", e)
+        result = demo_plagiarism_fallback(text)
+    save_history(user["id"], "plagiarism_report", text, result.get("summary", ""))
+    user_info = {
+        "name": user.get("name"),
+        "email": user.get("email"),
+    }
+    return generate_plagiarism_pdf(user_info, text, result)
+@app.post("/api/ai-report")
+def api_ai_report(req: TextRequest, user=Depends(get_current_user)):
+    """
+    Generate a PDF AI analysis report in the TrueWrite Scan style.
+    """
+    text = (req.text or "").strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Text is required")
+    result = None
+    if model is not None and tokenizer is not None:
+        try:
+            max_len = getattr(tokenizer, "model_max_length", 512)
+            if max_len is None or max_len > 1024:
+                max_len = 512
+            words = text.split()
+            chunk_size = min(400, max_len - 10)
+            chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+            probs = []
+            for chunk in chunks:
+                inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_len)
+                inputs = {k: v.to(device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    outputs = model(**inputs)
+                    logits = outputs.logits
+                    p = torch.softmax(logits, dim=1).cpu().numpy()[0]
+                    ai_prob = float(p[1]) if p.shape[0] > 1 else float(p[0])
+                    probs.append(ai_prob)
+            avg_ai_prob = float(np.mean(probs)) if probs else 0.0
+            ai_percent = round(avg_ai_prob * 100, 2)
+            human_percent = round(100 - ai_percent, 2)
+            words_count = len(words)
+            sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()]
+            avg_sentence_len = round(words_count / (len(sentences) or 1), 2)
+            summary = f"Model: {AI_DETECTOR_MODEL}; AI probability: {ai_percent}%"
+            result = {
+                "ai_percent": ai_percent,
+                "human_percent": human_percent,
+                "word_count": words_count,
+                "avg_sentence_length": avg_sentence_len,
+                "summary": summary,
+            }
+        except Exception as e:
+            print("[AI-report] model inference failed:", e)
+    if result is None:
+        ai_percent, human_percent, wc, avg_len, uniq = heuristic_ai_score(text)
+        summary = f"HEURISTIC fallback — AI probability: {ai_percent}%"
+        result = {
+            "ai_percent": ai_percent,
+            "human_percent": human_percent,
+            "word_count": wc,
+            "avg_sentence_length": avg_len,
+            "unique_ratio": round(uniq, 3),
+            "summary": summary,
+        }
+    save_history(user["id"], "ai_report", text, result.get("summary", ""))
+    user_info = {
+        "name": user.get("name"),
+        "email": user.get("email"),
+    }
+    return generate_ai_pdf(user_info, text, result)
+@app.post("/api/grammar-report")
+def api_grammar_report(req: TextRequest, user=Depends(get_current_user)):
+    """
+    Generate a PDF grammar correction report in the TrueWrite Scan style.
+    """
+    text = (req.text or "").strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Text is required")
+    if GEC_MODEL is not None:
+        corrected, corrections, original_words = gector_correct(text)
+        summary = f"GECToR neural GEC: {corrections} edits; words analysed: {original_words}"
+    elif lt_tool is not None:
+        corrected, corrections, original_words = grammar_with_languagetool(text)
+        summary = f"LanguageTool corrections: {corrections}; words analysed: {original_words}"
+    else:
+        corrected, corrections, original_words = simple_grammar_correct(text)
+        summary = f"HEURISTIC corrections: {corrections}; words analysed: {original_words}"
+    result = {
+        "original_words": original_words,
+        "corrections": corrections,
+        "summary": summary,
+    }
+    save_history(user["id"], "grammar_report", text, summary)
+    user_info = {
+        "name": user.get("name"),
+        "email": user.get("email"),
+    }
+    return generate_grammar_pdf(user_info, text, corrected, result)
 # ------------------ HISTORY ------------------
 @app.get("/api/history")
 def api_history(user=Depends(get_current_user)):
 @app.get("/")
 def read_root():
+    return {"status": "Backend is running with GECToR + 16GB RAM + PDF reports!"}