Spaces:

krrishsinha
/

Lawlyticsback

Sleeping

App Files Files Community

krrishsinha commited on Nov 7, 2025

Commit

eb305fe

verified ·

1 Parent(s): 038b34c

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -95

app.py CHANGED Viewed

@@ -1,160 +1,193 @@
 from pydantic import BaseModel
-from fastapi import FastAPI, HTTPException, UploadFile, File
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import Field
 import torch
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
 import torch.nn.functional as F
-import fitz
-# -----------------------------------------
-# GLOBAL PDF CACHE
-# -----------------------------------------
-pdf_cache = {"text": None}
-# -----------------------------------------
-# HUGGINGFACE MODEL PATHS
-# -----------------------------------------
-SUMMARY_MODEL = "krrishsinha/legal_summariser"
-QNA_MODEL = "krrishsinha/nlpques-ans"
-CLAUSE_MODEL = "krrishsinha/clausedetectionfinal"
-# -----------------------------------------
-# PDF READER
-# -----------------------------------------
-def pdfopen(filepath: str) -> str:
     doc = fitz.open(filepath)
     text = ""
     for page in doc:
-        text += page.get_text()
     doc.close()
     return text.strip()
-# -----------------------------------------
-# SUMMARIZER PIPELINE
-# -----------------------------------------
 def summarizer():
-    return pipeline("summarization", model=SUMMARY_MODEL)
-# -----------------------------------------
-# QNA PIPELINE
-# -----------------------------------------
 def anq():
-    return pipeline("question-answering", model=QNA_MODEL)
-# -----------------------------------------
-# CLAUSE DETECTION
-# -----------------------------------------
 def clause(sen):
-    tokenizer = AutoTokenizer.from_pretrained(CLAUSE_MODEL)
-    model = AutoModelForSequenceClassification.from_pretrained(CLAUSE_MODEL)
-    config = AutoConfig.from_pretrained(CLAUSE_MODEL)
     inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
         pred_id = int(torch.argmax(logits, dim=1).item())
     predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
     return predicted_label
-# -----------------------------------------
-# FASTAPI APP
-# -----------------------------------------
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 @app.get("/")
 def welcome():
-    return {"welcome": "Lawlytics AI Corporate Legal Intelligence"}
-# -----------------------------------------
-# PDF UPLOAD
-# -----------------------------------------
 @app.post("/upload")
-async def uploading(file: UploadFile = File(...)):
     try:
         file_path = f"./{file.filename}"
         with open(file_path, "wb") as f:
-            f.write(await file.read())
         t = pdfopen(file_path)
         if not t:
-            raise HTTPException(status_code=400, detail="No text found in PDF")
         pdf_cache["text"] = t
-        return {"message": "PDF processed successfully", "characters_extracted": len(t)}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-# -----------------------------------------
-# SUMMARISATION
-# -----------------------------------------
 @app.post("/summarise")
 def summary():
     txt = pdf_cache["text"]
     if not txt:
-        raise HTTPException(status_code=400, detail="Upload PDF first")
-    summarise_fn = summarizer()
-    output = summarise_fn(txt, max_length=100, min_length=30, do_sample=False)
-    return {"summary": output}
-# -----------------------------------------
-# QUESTION ANSWERING
-# -----------------------------------------
-class QnaRequest(BaseModel):
-    question: str
-    context: str = None
 @app.post("/qna")
-def quesans(payload: QnaRequest):
-    if not pdf_cache["text"] and not payload.context:
-        raise HTTPException(status_code=400, detail="Upload PDF first")
-    context = payload.context or pdf_cache["text"]
-    qna_fn = anq()
-    result = qna_fn(question=payload.question, context=context)
-    return {"answer": result["answer"]}
-# -----------------------------------------
-# CLAUSE DETECTION
-# -----------------------------------------
-class ClauseRequest(BaseModel):
-    text: str = None
-@app.post("/clausedetection")
-def clausing(payload: ClauseRequest):
-    text = payload.text or pdf_cache["text"]
-    if not text:
-        raise HTTPException(status_code=400, detail="Provide text or upload PDF first")
-    detected = clause(text)
-    return {"detected_clause": detected}

 from pydantic import BaseModel
+from fastapi import FastAPI, HTTPException
+from typing import Annotated, Literal
 from pydantic import Field
+from fastapi.responses import JSONResponse
+import numpy as np
+from transformers import pipeline
 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import AutoConfig
 import torch.nn.functional as F
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import UploadFile, File
+import fitz
+summary = "krrishsinha/legal_summariser"
+qna = "krrishsinha/nlpques-ans"
+clause = "krrishsinha/clausedetectionfinal"
+pdf_cache = {"text": None}
+def pdfopen(filepath : str) -> str:
     doc = fitz.open(filepath)
     text = ""
     for page in doc:
+        text = text + page.get_text()
     doc.close()
     return text.strip()
 def summarizer():
+    summarypath = r"E:/FastAPI/Lawlytics/legal_summariser"
+    o = pipeline("summarization", model= summary)
+    return o
 def anq():
+    qnapath = r"E:/FastAPI/Lawlytics/nlpques&ans"
+    k = pipeline("question-answering", model= qna)
+    return k
 def clause(sen):
+    clausepath = r"E:/FastAPI/Lawlytics/clausedetectionfinal"
+    tokenizer = AutoTokenizer.from_pretrained(clause)
+    model = AutoModelForSequenceClassification.from_pretrained(clause)
+    config = AutoConfig.from_pretrained(clause)
     inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
+        probs = F.softmax(logits, dim=1).squeeze().tolist()
         pred_id = int(torch.argmax(logits, dim=1).item())
     predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
     return predicted_label
+class summariser(BaseModel):
+    pdf : Annotated[str, Field(..., description = "here goes your pdf")]
+class qna(BaseModel):
+    question : Annotated[str, Field(..., description = "here goes your question regarding the document you want to ask")]
+    context : Annotated[str, Field(..., description = "context on whicht the question should be asked")]
+class clausedetection(BaseModel):
+    text : Annotated[str, Field(..., description = "here goes your text for detecting its clause")]
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 @app.get("/")
 def welcome():
+    return {"welcome to Lawlytics"  : "AI Corporate Legal Document Intelligence"}
 @app.post("/upload")
+async def uploading(file : UploadFile = File(...)):
     try:
         file_path = f"./{file.filename}"
         with open(file_path, "wb") as f:
+            content = await file.read()
+            f.write(content)
         t = pdfopen(file_path)
         if not t:
+            raise HTTPException(status_code=400, detail="No text found in PDF. Maybe it's scanned?")
         pdf_cache["text"] = t
+        return {"message": "PDF uploaded & text extracted successfully", "characters_extracted": len(t)}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/summarise")
 def summary():
     txt = pdf_cache["text"]
     if not txt:
+        raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
+    p = summarizer()
+    e = p (txt,
+        max_length=100,
+        min_length=30,
+        do_sample=False
+    )
+    return {"summary": e}
 @app.post("/qna")
+def quesans(py : qna):
+    txt2 = pdf_cache["text"]
+    if not txt2:
+        raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
+    g = anq()
+    result = g (question= py.question, context= txt2)
+    return {"answer" : result["answer"]}
+@app.post("/clausedetection")
+def clausing(l : clausedetection):
+    text3 = l.text or pdf_cache["text"]
+    if not text3:
+        raise HTTPException(status_code=400, detail="Provide text or upload PDF first.")
+    a = clause(sen = text3)
+    return {"detected clause" : a}