Spaces:

httpsAkayush
/

docker_api

Sleeping

File size: 4,452 Bytes

3be0a64
db33294
3be0a64
 
 
 
 
db33294
3be0a64
 
b5017bd
3be0a64
 
db33294
 
 
3be0a64
db33294
 
 
 
3be0a64
 
 
 
 
 
 
 
 
 
 
 
 
 
db33294
3be0a64
 
db33294
3be0a64
 
 
 
 
 
 
 
 
 
 
 
 
 
db33294
3be0a64
 
db33294
3be0a64
 
 
b5017bd
3be0a64
fdf5004
3be0a64
 
db33294
3be0a64
 
 
 
 
 
 
 
 
db33294
3be0a64
 
db33294
3be0a64
 
db33294
3be0a64
f2d04f3
3be0a64
 
 
 
 
 
 
 
 
f2d04f3
3be0a64
 
 
f2d04f3
3be0a64
 
 
f2d04f3
3be0a64
f2d04f3
3be0a64
f2d04f3
3be0a64
 
 
 
 
f2d04f3
3be0a64
 
 
 
 
f2d04f3
3be0a64
 
 
 
 
 
 
 
 
 
 
f2d04f3
3be0a64
 
 
f2d04f3
3be0a64
f2d04f3
3be0a64
f2d04f3
3be0a64
 
 
 
f2d04f3
3be0a64
 
f2d04f3
3be0a64
f2d04f3
3be0a64
 
f2d04f3
3be0a64
 
f2d04f3
3be0a64
f2d04f3
3be0a64

from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
import pdfplumber
import pickle
import io
import os
from datetime import datetime

# ================= APP =================
app = FastAPI(title="Jimma University Plagiarism API")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ================= CONFIG =================
MODEL_PATH = "plagiarism_model"
EMBEDDINGS_FILE = "reference_embeddings.pkl"
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_KEY_HERE")

SIMILARITY_THRESHOLD = 30.0

# ================= LOAD SBERT MODEL =================
model = SentenceTransformer(MODEL_PATH)
print("✅ Model loaded:", MODEL_PATH)

# ================= LOAD REFERENCE DATA =================
with open(EMBEDDINGS_FILE, "rb") as f:
    data = pickle.load(f)

ref_embeddings = data["embeddings"]
df_ref = data["df_ref"]

print("✅ Reference dataset loaded")

# ================= GEMINI =================
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel("gemini-2.5-flash")

# ================= REQUEST MODEL =================
class PlagiarismRequest(BaseModel):
    text: str
    title: str = "Unknown"
    student_name: str = "Unknown"
    year: str = "2026"

# ================= HEALTH CHECK =================
@app.get("/")
def home():
    return {"message": "Plagiarism API Running 🚀"}

# ================= TEXT CHECK API =================
@app.post("/check_plagiarism")
async def check_plagiarism(req: PlagiarismRequest):

    text = req.text.strip()

    if len(text) < 100:
        raise HTTPException(400, "Text too short")

    if len(text) > 8000:
        text = text[:8000]

    # ================= SBERT =================
    query_embedding = model.encode(
        text,
        convert_to_tensor=True,
        normalize_embeddings=True
    )

    scores = util.cos_sim(query_embedding, ref_embeddings)[0]
    scores = (scores * 100).cpu().numpy()

    top_idx = int(scores.argmax())
    top_score = float(scores[top_idx])

    row = df_ref.iloc[top_idx]

    # ================= LOW RISK =================
    if top_score < SIMILARITY_THRESHOLD:
        return {
            "status": "low_risk",
            "similarity_percent": round(top_score, 2),
            "rating": 1,
            "most_similar_source": str(row.get("title", "N/A")),
            "message": "No significant plagiarism detected"
        }

    # ================= GEMINI REPORT =================
    prompt = f"""
You are an academic plagiarism expert.

Title: {req.title}
Student: {req.student_name}
Year: {req.year}

Similarity: {top_score:.2f}%

Source: {row.get("title", "N/A")}

Give:
1. Similarity explanation
2. Risk level
3. Recommendation
"""

    try:
        response = gemini_model.generate_content(prompt)
        report = response.text
    except Exception as e:
        report = f"Gemini error: {str(e)}"

    # ================= RESPONSE =================
    return {
        "status": "suspicious",
        "similarity_percent": round(top_score, 2),
        "rating": 4 if top_score > 70 else 3,
        "stars": "★★★★☆" if top_score > 70 else "★★★☆☆",
        "most_similar_source": str(row.get("title", "N/A")),
        "source_student": str(row.get("student_name", "N/A")),
        "gemini_report": report,
        "timestamp": datetime.now().isoformat()
    }

# ================= PDF UPLOAD API (OPTIONAL) =================
@app.post("/check_pdf")
async def check_pdf(file: UploadFile = File(...)):

    content = await file.read()

    text = ""

    with pdfplumber.open(io.BytesIO(content)) as pdf:
        for page in pdf.pages:
            if page.extract_text():
                text += page.extract_text() + "\n"

    if len(text) < 100:
        return {"error": "PDF too short"}

    query_embedding = model.encode(text, convert_to_tensor=True, normalize_embeddings=True)

    scores = util.cos_sim(query_embedding, ref_embeddings)[0]
    scores = (scores * 100).cpu().numpy()

    top_idx = int(scores.argmax())
    top_score = float(scores[top_idx])

    row = df_ref.iloc[top_idx]

    return {
        "status": "done",
        "similarity_percent": round(top_score, 2),
        "best_match": str(row.get("title", "N/A"))
    }