Spaces:
Sleeping
Sleeping
File size: 4,879 Bytes
038b34c eb305fe 038b34c eb305fe ca0b46f 038b34c eb305fe 038b34c eb305fe 99ecba3 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c 1941458 038b34c eb305fe 7598eba eb305fe 7598eba eb305fe 038b34c ca0b46f 038b34c 7598eba eb305fe 2443581 7598eba 9df5448 eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c ca0b46f eb305fe ca0b46f eb305fe ca0b46f eb305fe ca0b46f 1941458 ca0b46f 1941458 ca0b46f 1941458 ca0b46f eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c eb305fe 038b34c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | from pydantic import BaseModel
from fastapi import FastAPI, HTTPException
from typing import Annotated, Literal
from pydantic import Field
from fastapi.responses import JSONResponse
import numpy as np
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import AutoConfig
import torch.nn.functional as F
from fastapi.middleware.cors import CORSMiddleware
from fastapi import UploadFile, File
import fitz
from transformers import AutoModelForSequenceClassification, AutoModelForQuestionAnswering
pdf_cache = {"text": None}
def pdfopen(filepath : str) -> str:
doc = fitz.open(filepath)
text = ""
for page in doc:
text = text + page.get_text()
doc.close()
return text.strip()
def clean_short(ans):
words = ans.split()
return " ".join(words[:3])
def summarizer():
summaryp = "krrishsinha/legal_summariser"
o = pipeline("summarization", model= summaryp)
return o
def anq():
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return (tokenizer, model)
def clause(sen):
clausep = "krrishsinha/clausedetectionfinal"
tokenizer = AutoTokenizer.from_pretrained(clausep)
model = AutoModelForSequenceClassification.from_pretrained(clausep)
config = AutoConfig.from_pretrained(clausep)
inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = F.softmax(logits, dim=1).squeeze().tolist()
pred_id = int(torch.argmax(logits, dim=1).item())
predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
return predicted_label
class summariser(BaseModel):
pdf : Annotated[str, Field(..., description = "here goes your pdf")]
class qna(BaseModel):
question : Annotated[str, Field(..., description = "here goes your question regarding the document you want to ask")]
class clausedetection(BaseModel):
text : Annotated[str, Field(..., description = "here goes your text for detecting its clause")]
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
def welcome():
return {"welcome to Lawlytics" : "AI Corporate Legal Document Intelligence"}
@app.post("/upload")
async def uploading(file : UploadFile = File(...)):
try:
file_path = f"./{file.filename}"
with open(file_path, "wb") as f:
content = await file.read()
f.write(content)
t = pdfopen(file_path)
if not t:
raise HTTPException(status_code=400, detail="No text found in PDF. Maybe it's scanned?")
pdf_cache["text"] = t
return {"message": "PDF uploaded & text extracted successfully", "characters_extracted": len(t)}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/summarise")
def summary():
txt = pdf_cache["text"]
if not txt:
raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
p = summarizer()
e = p (txt,
max_length=100,
min_length=30,
do_sample=False
)
return {"summary": e}
@app.post("/qna")
def quesans(py: qna):
txt2 = pdf_cache["text"]
if not txt2:
raise HTTPException(
status_code=400,
detail="No PDF text found. Upload PDF first."
)
tokenizer, model = anq()
# FLAN-T5 prompt
prompt = (
f"Provide the answer in only 1 to 3 words.\n"
f"Question: {py.question}\n"
f"Context: {txt2}\n"
f"Answer:"
)
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=20,
num_beams=5,
early_stopping=True
)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Final small cleanup (optional but useful)
answer = clean_short(answer)
return {"answer": answer}
@app.post("/clausedetection")
def clausing(l : clausedetection):
text3 = l.text or pdf_cache["text"]
if not text3:
raise HTTPException(status_code=400, detail="Provide text or upload PDF first.")
a = clause(sen = text3)
return {"detected clause" : a}
|