Spaces:

GopalKrushnaMahapatra
/

TrueWrite-Scan-Backend

Sleeping

App Files Files Community

GopalKrushnaMahapatra commited on Dec 10, 2025

Commit

c9dc165

verified ·

1 Parent(s): f3f2bd2

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -510

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ from datetime import datetime, timezone
 from dotenv import load_dotenv
 from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, EmailStr
 from passlib.context import CryptContext
 import jwt
@@ -47,13 +46,6 @@ except Exception:
     gector_predict = None
     load_verb_dict = None
-# PDF report libs
-from reportlab.lib.pagesizes import A4
-from reportlab.pdfgen import canvas
-from reportlab.lib.units import mm
-from reportlab.lib.utils import ImageReader
-from reportlab.lib import colors
 # ------------------ ENV & DB SETUP ------------------
 load_dotenv()
@@ -698,380 +690,6 @@ def corpus_plagiarism_combined(text: str):
     return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
-# ------------------ PDF HELPERS (COMMON STYLE) ------------------
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-PDF_LOGO_PATH = os.path.join(BASE_DIR, "logo.png")  # ensure logo.png is here
-BRAND_NAME = "TrueWrite Scan"
-BRAND_FONT_SIZE = 18
-TITLE_FONT_SIZE = 18
-LOGO_SIZE_MM = 15  # logo height in mm
-def _wrap_text(text: str, max_chars: int):
-    """
-    Simple word-wrap: yields lines with at most max_chars characters.
-    """
-    words = text.split()
-    line = []
-    length = 0
-    for w in words:
-        if length + len(w) + (1 if line else 0) > max_chars:
-            yield " ".join(line)
-            line = [w]
-            length = len(w)
-        else:
-            line.append(w)
-            length += len(w) + (1 if line else 0)
-    if line:
-        yield " ".join(line)
-def _format_checked_on():
-    # e.g. "Checked On: Dec 08, 2025"
-    local_now = datetime.now(timezone.utc).astimezone()
-    return "Checked On: " + local_now.strftime("%b %d, %Y")
-def _get_logo_reader():
-    try:
-        return ImageReader(PDF_LOGO_PATH)
-    except Exception as e:
-        print(f"[PDF] Failed to load logo at {PDF_LOGO_PATH}: {e}")
-        return None
-def _draw_header_footer(c: canvas.Canvas, page_num: int):
-    """
-    Draws the common header + footer for all reports.
-    Returns (x_margin, content_start_y, page_width, page_height)
-    """
-    width, height = A4
-    x_margin = 20 * mm
-    top_y = height - 22 * mm
-    footer_y = 15 * mm
-    brand_spacing = 5 * mm
-    logo_size = LOGO_SIZE_MM
-    logo = _get_logo_reader()
-    # --- HEADER ---
-    if logo is not None:
-        c.drawImage(
-            logo,
-            x_margin,
-            top_y - logo_size,
-            width=logo_size,
-            height=logo_size,
-            mask="auto",
-        )
-    # Brand name
-    c.setFont("Helvetica-Bold", BRAND_FONT_SIZE)
-    text_y = top_y - logo_size / 2 - 1 * mm
-    c.drawString(x_margin + logo_size + brand_spacing, text_y, BRAND_NAME)
-    # Header right: Checked On: ...
-    checked_date = _format_checked_on()
-    c.setFont("Helvetica", 10)
-    text_width = c.stringWidth(checked_date, "Helvetica", 10)
-    c.drawString(width - x_margin - text_width, text_y, checked_date)
-    # Thin line under header
-    c.setLineWidth(0.5)
-    c.setStrokeColor(colors.black)
-    c.line(
-        x_margin,
-        top_y - logo_size - 5 * mm,
-        width - x_margin,
-        top_y - logo_size - 5 * mm,
-    )
-    # --- FOOTER ---
-    # Thin footer line
-    c.setLineWidth(0.35)
-    c.line(x_margin, footer_y + 8 * mm, width - x_margin, footer_y + 8 * mm)
-    # Page number centered
-    c.setFont("Helvetica", 9)
-    page_label = f"Page {page_num}"
-    label_width = c.stringWidth(page_label, "Helvetica", 9)
-    c.drawString((width - label_width) / 2, footer_y + 2 * mm, page_label)
-    content_start_y = top_y - logo_size - 18 * mm
-    return x_margin, content_start_y, width, height
-# ------------------ PDF GENERATORS ------------------
-def generate_plagiarism_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
-    """
-    Generate plagiarism PDF in TrueWrite Scan style.
-    """
-    buf = io.BytesIO()
-    c = canvas.Canvas(buf, pagesize=A4)
-    report_title = "Plagiarism Scan Report"
-    plagiarism_percent = float(result.get("plagiarism_percent", 0.0))
-    unique_percent = max(0.0, round(100.0 - plagiarism_percent, 2))
-    # ---------- PAGE 1 ----------
-    page_num = 1
-    x_margin, y, width, height = _draw_header_footer(c, page_num)
-    # Title
-    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
-    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
-    c.drawString((width - title_width) / 2, y, report_title)
-    y -= 18 * mm
-    # Overall similarity big number
-    c.setFont("Helvetica-Bold", 22)
-    c.setFillColor(colors.red if plagiarism_percent >= 1.0 else colors.green)
-    c.drawString(x_margin, y, f"{plagiarism_percent:.0f}%")
-    c.setFont("Helvetica", 11)
-    c.setFillColor(colors.black)
-    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Overall Similarity")
-    y -= 12 * mm
-    # Additional info
-    c.setFont("Helvetica", 10)
-    c.drawString(x_margin, y, f"Estimated Unique Content: {unique_percent:.0f}%")
-    y -= 6 * mm
-    summary = result.get("summary", "")
-    if summary:
-        for line in _wrap_text("Summary: " + summary, 110):
-            c.drawString(x_margin, y, line)
-            y -= 5 * mm
-    else:
-        y -= 5 * mm
-    y -= 10 * mm
-    # Body text: original text (truncated)
-    c.setFont("Helvetica", 10)
-    truncated = text.strip()
-    if len(truncated) > 8000:
-        truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
-    for line in _wrap_text(truncated, 110):
-        if y < 40 * mm:
-            c.showPage()
-            page_num += 1
-            x_margin, y, width, height = _draw_header_footer(c, page_num)
-            c.setFont("Helvetica", 10)
-        c.drawString(x_margin, y, line)
-        y -= 5 * mm
-    # ---------- NEXT PAGE: MATCHED SOURCES ----------
-    c.showPage()
-    page_num += 1
-    x_margin, y, width, height = _draw_header_footer(c, page_num)
-    c.setFont("Helvetica-Bold", 12)
-    c.drawString(x_margin, y, "Matched Sources")
-    y -= 10 * mm
-    c.setFont("Helvetica", 10)
-    matches = result.get("matches", []) or []
-    if not matches:
-        c.drawString(x_margin, y, "No specific sources recorded. Content appears mostly unique.")
-    else:
-        for idx, m in enumerate(matches[:10], start=1):
-            title = m.get("title", "Source")
-            score = m.get("score", m.get("tfidf_score", 0.0) or 0.0)
-            line = f"{idx}. {title} — {score:.2f}% match"
-            for part in _wrap_text(line, 110):
-                c.drawString(x_margin, y, part)
-                y -= 5 * mm
-                if y < 40 * mm:
-                    c.showPage()
-                    page_num += 1
-                    x_margin, y, width, height = _draw_header_footer(c, page_num)
-                    c.setFont("Helvetica", 10)
-    c.save()
-    buf.seek(0)
-    return StreamingResponse(
-        buf,
-        media_type="application/pdf",
-        headers={"Content-Disposition": "attachment; filename=plagiarism-report.pdf"},
-    )
-def generate_ai_pdf(user: dict, text: str, result: dict) -> StreamingResponse:
-    """
-    AI content analysis PDF.
-    result from ai-check logic.
-    """
-    buf = io.BytesIO()
-    c = canvas.Canvas(buf, pagesize=A4)
-    report_title = "AI Content Analysis Report"
-    ai_percent = float(result.get("ai_percent", 0.0))
-    human_percent = float(result.get("human_percent", 100.0))
-    word_count = int(result.get("word_count", 0))
-    avg_len = float(result.get("avg_sentence_length", 0.0))
-    # PAGE 1
-    page_num = 1
-    x_margin, y, width, height = _draw_header_footer(c, page_num)
-    # Title
-    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
-    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
-    c.drawString((width - title_width) / 2, y, report_title)
-    y -= 18 * mm
-    # Big AI probability
-    c.setFont("Helvetica-Bold", 22)
-    if ai_percent >= 50:
-        c.setFillColor(colors.red)
-    else:
-        c.setFillColor(colors.green)
-    c.drawString(x_margin, y, f"{ai_percent:.0f}%")
-    c.setFont("Helvetica", 11)
-    c.setFillColor(colors.black)
-    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Estimated AI Probability")
-    y -= 12 * mm
-    # Extra stats
-    c.setFont("Helvetica", 10)
-    c.drawString(x_margin, y, f"Estimated Human Probability: {human_percent:.0f}%")
-    y -= 6 * mm
-    c.drawString(x_margin, y, f"Word Count: {word_count}")
-    y -= 6 * mm
-    c.drawString(x_margin, y, f"Average Sentence Length: {avg_len:.2f} words")
-    y -= 6 * mm
-    summary = result.get("summary", "")
-    if summary:
-        for line in _wrap_text("Summary: " + summary, 110):
-            c.drawString(x_margin, y, line)
-            y -= 5 * mm
-        y -= 5 * mm
-    else:
-        y -= 10 * mm
-    # Body text
-    c.setFont("Helvetica", 10)
-    truncated = text.strip()
-    if len(truncated) > 8000:
-        truncated = truncated[:8000] + "\n...\n[Content truncated for report]"
-    for line in _wrap_text(truncated, 110):
-        if y < 40 * mm:
-            c.showPage()
-            page_num += 1
-            x_margin, y, width, height = _draw_header_footer(c, page_num)
-            c.setFont("Helvetica", 10)
-        c.drawString(x_margin, y, line)
-        y -= 5 * mm
-    c.save()
-    buf.seek(0)
-    return StreamingResponse(
-        buf,
-        media_type="application/pdf",
-        headers={"Content-Disposition": "attachment; filename=truewrite-ai-report.pdf"},
-    )
-def generate_grammar_pdf(user: dict, original_text: str, corrected_text: str, result: dict) -> StreamingResponse:
-    """
-    Grammar correction PDF.
-    result from grammar-check logic.
-    """
-    buf = io.BytesIO()
-    c = canvas.Canvas(buf, pagesize=A4)
-    report_title = "Grammar Correction Report"
-    corrections = int(result.get("corrections", 0))
-    original_words = int(result.get("original_words", 0))
-    summary = result.get("summary", "")
-    # PAGE 1: Metrics + Original
-    page_num = 1
-    x_margin, y, width, height = _draw_header_footer(c, page_num)
-    # Title
-    c.setFont("Helvetica-Bold", TITLE_FONT_SIZE)
-    title_width = c.stringWidth(report_title, "Helvetica-Bold", TITLE_FONT_SIZE)
-    c.drawString((width - title_width) / 2, y, report_title)
-    y -= 18 * mm
-    # Big metric: corrections
-    c.setFont("Helvetica-Bold", 22)
-    c.setFillColor(colors.blue if corrections > 0 else colors.green)
-    c.drawString(x_margin, y, f"{corrections}")
-    c.setFont("Helvetica", 11)
-    c.setFillColor(colors.black)
-    c.drawString(x_margin + 22 * mm, y + 2 * mm, "Corrections Applied")
-    y -= 12 * mm
-    c.setFont("Helvetica", 10)
-    c.drawString(x_margin, y, f"Words Analysed: {original_words}")
-    y -= 6 * mm
-    if summary:
-        for line in _wrap_text("Summary: " + summary, 110):
-            c.drawString(x_margin, y, line)
-            y -= 5 * mm
-        y -= 5 * mm
-    else:
-        y -= 10 * mm
-    # Original text
-    c.setFont("Helvetica-Bold", 11)
-    c.drawString(x_margin, y, "Original Text")
-    y -= 7 * mm
-    c.setFont("Helvetica", 10)
-    orig = original_text.strip()
-    if len(orig) > 4000:
-        orig = orig[:4000] + "\n...\n[Content truncated for report]"
-    for line in _wrap_text(orig, 110):
-        if y < 40 * mm:
-            c.showPage()
-            page_num += 1
-            x_margin, y, width, height = _draw_header_footer(c, page_num)
-            c.setFont("Helvetica", 10)
-        c.drawString(x_margin, y, line)
-        y -= 5 * mm
-    # PAGE 2: Corrected text
-    c.showPage()
-    page_num += 1
-    x_margin, y, width, height = _draw_header_footer(c, page_num)
-    c.setFont("Helvetica-Bold", 11)
-    c.drawString(x_margin, y, "Corrected Text")
-    y -= 7 * mm
-    c.setFont("Helvetica", 10)
-    corr = corrected_text.strip()
-    if len(corr) > 4000:
-        corr = corr[:4000] + "\n...\n[Content truncated for report]"
-    for line in _wrap_text(corr, 110):
-        if y < 40 * mm:
-            c.showPage()
-            page_num += 1
-            x_margin, y, width, height = _draw_header_footer(c, page_num)
-            c.setFont("Helvetica", 10)
-        c.drawString(x_margin, y, line)
-        y -= 5 * mm
-    c.save()
-    buf.seek(0)
-    return StreamingResponse(
-        buf,
-        media_type="application/pdf",
-        headers={"Content-Disposition": "attachment; filename=truewrite-grammar-report.pdf"},
-    )
 # ------------------ ENDPOINTS ------------------
 @app.post("/api/signup")
@@ -1301,133 +919,6 @@ def api_ai_check_file(file: UploadFile = File(...), user=Depends(get_current_use
     return api_ai_check.__wrapped__(TextRequest(text=text), user)
-# ------------------ PDF REPORT ENDPOINTS ------------------
-@app.post("/api/plagiarism-report")
-def api_plagiarism_report(req: TextRequest, user=Depends(get_current_user)):
-    """
-    Generate a PDF plagiarism report in the TrueWrite Scan style.
-    """
-    text = (req.text or "").strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text is required")
-    try:
-        result = corpus_plagiarism_combined(text)
-    except Exception as e:
-        print("[Plagiarism-Report] Combined engine failed, falling back:", e)
-        result = demo_plagiarism_fallback(text)
-    save_history(user["id"], "plagiarism_report", text, result.get("summary", ""))
-    user_info = {
-        "name": user.get("name"),
-        "email": user.get("email"),
-    }
-    return generate_plagiarism_pdf(user_info, text, result)
-@app.post("/api/ai-report")
-def api_ai_report(req: TextRequest, user=Depends(get_current_user)):
-    """
-    Generate a PDF AI analysis report in the TrueWrite Scan style.
-    """
-    text = (req.text or "").strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text is required")
-    result = None
-    if model is not None and tokenizer is not None:
-        try:
-            max_len = getattr(tokenizer, "model_max_length", 512)
-            if max_len is None or max_len > 1024:
-                max_len = 512
-            words = text.split()
-            chunk_size = min(400, max_len - 10)
-            chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
-            probs = []
-            for chunk in chunks:
-                inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_len)
-                inputs = {k: v.to(device) for k, v in inputs.items()}
-                with torch.no_grad():
-                    outputs = model(**inputs)
-                    logits = outputs.logits
-                    p = torch.softmax(logits, dim=1).cpu().numpy()[0]
-                    ai_prob = float(p[1]) if p.shape[0] > 1 else float(p[0])
-                    probs.append(ai_prob)
-            avg_ai_prob = float(np.mean(probs)) if probs else 0.0
-            ai_percent = round(avg_ai_prob * 100, 2)
-            human_percent = round(100 - ai_percent, 2)
-            words_count = len(words)
-            sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()]
-            avg_sentence_len = round(words_count / (len(sentences) or 1), 2)
-            summary = f"Model: {AI_DETECTOR_MODEL}; AI probability: {ai_percent}%"
-            result = {
-                "ai_percent": ai_percent,
-                "human_percent": human_percent,
-                "word_count": words_count,
-                "avg_sentence_length": avg_sentence_len,
-                "summary": summary,
-            }
-        except Exception as e:
-            print("[AI-report] model inference failed:", e)
-    if result is None:
-        ai_percent, human_percent, wc, avg_len, uniq = heuristic_ai_score(text)
-        summary = f"HEURISTIC fallback — AI probability: {ai_percent}%"
-        result = {
-            "ai_percent": ai_percent,
-            "human_percent": human_percent,
-            "word_count": wc,
-            "avg_sentence_length": avg_len,
-            "unique_ratio": round(uniq, 3),
-            "summary": summary,
-        }
-    save_history(user["id"], "ai_report", text, result.get("summary", ""))
-    user_info = {
-        "name": user.get("name"),
-        "email": user.get("email"),
-    }
-    return generate_ai_pdf(user_info, text, result)
-@app.post("/api/grammar-report")
-def api_grammar_report(req: TextRequest, user=Depends(get_current_user)):
-    """
-    Generate a PDF grammar correction report in the TrueWrite Scan style.
-    """
-    text = (req.text or "").strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text is required")
-    if GEC_MODEL is not None:
-        corrected, corrections, original_words = gector_correct(text)
-        summary = f"GECToR neural GEC: {corrections} edits; words analysed: {original_words}"
-    elif lt_tool is not None:
-        corrected, corrections, original_words = grammar_with_languagetool(text)
-        summary = f"LanguageTool corrections: {corrections}; words analysed: {original_words}"
-    else:
-        corrected, corrections, original_words = simple_grammar_correct(text)
-        summary = f"HEURISTIC corrections: {corrections}; words analysed: {original_words}"
-    result = {
-        "original_words": original_words,
-        "corrections": corrections,
-        "summary": summary,
-    }
-    save_history(user["id"], "grammar_report", text, summary)
-    user_info = {
-        "name": user.get("name"),
-        "email": user.get("email"),
-    }
-    return generate_grammar_pdf(user_info, text, corrected, result)
 # ------------------ HISTORY ------------------
 @app.get("/api/history")
 def api_history(user=Depends(get_current_user)):
@@ -1454,4 +945,4 @@ def api_history(user=Depends(get_current_user)):
 @app.get("/")
 def read_root():
-    return {"status": "Backend is running with GECToR + 16GB RAM + PDF reports!"}

 from dotenv import load_dotenv
 from fastapi import FastAPI, HTTPException, status, Header, Depends, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, EmailStr
 from passlib.context import CryptContext
 import jwt
     gector_predict = None
     load_verb_dict = None
 # ------------------ ENV & DB SETUP ------------------
 load_dotenv()
     return {"plagiarism_percent": plagiarism_percent, "matches": matches, "summary": summary}
 # ------------------ ENDPOINTS ------------------
 @app.post("/api/signup")
     return api_ai_check.__wrapped__(TextRequest(text=text), user)
 # ------------------ HISTORY ------------------
 @app.get("/api/history")
 def api_history(user=Depends(get_current_user)):
 @app.get("/")
 def read_root():
+    return {"status": "Backend is running with GECToR + 16GB RAM!"}