Spaces:

krrishsinha
/

Lawlyticsback

Sleeping

App Files Files Community

Lawlyticsback / app.py

krrishsinha

Update app.py

ca0b46f verified 4 months ago

raw

history blame contribute delete

4.88 kB

	from pydantic import BaseModel
	from fastapi import FastAPI, HTTPException
	from typing import Annotated, Literal
	from pydantic import Field
	from fastapi.responses import JSONResponse
	import numpy as np
	from transformers import pipeline
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from transformers import AutoConfig
	import torch.nn.functional as F
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi import UploadFile, File
	import fitz
	from transformers import AutoModelForSequenceClassification, AutoModelForQuestionAnswering


	pdf_cache = {"text": None}

	def pdfopen(filepath : str) -> str:

	doc = fitz.open(filepath)

	text = ""

	for page in doc:

	text = text + page.get_text()

	doc.close()

	return text.strip()

	def clean_short(ans):
	words = ans.split()
	return " ".join(words[:3])


	def summarizer():

	summaryp = "krrishsinha/legal_summariser"

	o = pipeline("summarization", model= summaryp)

	return o

	def anq():
	model_name = "google/flan-t5-large"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	return (tokenizer, model)


	def clause(sen):

	clausep = "krrishsinha/clausedetectionfinal"

	tokenizer = AutoTokenizer.from_pretrained(clausep)
	model = AutoModelForSequenceClassification.from_pretrained(clausep)
	config = AutoConfig.from_pretrained(clausep)

	inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)

	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probs = F.softmax(logits, dim=1).squeeze().tolist()
	pred_id = int(torch.argmax(logits, dim=1).item())

	predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
	return predicted_label





	class summariser(BaseModel):

	pdf : Annotated[str, Field(..., description = "here goes your pdf")]


	class qna(BaseModel):

	question : Annotated[str, Field(..., description = "here goes your question regarding the document you want to ask")]


	class clausedetection(BaseModel):

	text : Annotated[str, Field(..., description = "here goes your text for detecting its clause")]




	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	@app.get("/")

	def welcome():

	return {"welcome to Lawlytics" : "AI Corporate Legal Document Intelligence"}


	@app.post("/upload")

	async def uploading(file : UploadFile = File(...)):

	try:

	file_path = f"./{file.filename}"

	with open(file_path, "wb") as f:
	content = await file.read()
	f.write(content)

	t = pdfopen(file_path)

	if not t:
	raise HTTPException(status_code=400, detail="No text found in PDF. Maybe it's scanned?")

	pdf_cache["text"] = t

	return {"message": "PDF uploaded & text extracted successfully", "characters_extracted": len(t)}

	except Exception as e:

	raise HTTPException(status_code=500, detail=str(e))


	@app.post("/summarise")

	def summary():

	txt = pdf_cache["text"]

	if not txt:
	raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")

	p = summarizer()

	e = p (txt,
	max_length=100,
	min_length=30,
	do_sample=False
	)

	return {"summary": e}


	@app.post("/qna")

	def quesans(py: qna):

	txt2 = pdf_cache["text"]

	if not txt2:
	raise HTTPException(
	status_code=400,
	detail="No PDF text found. Upload PDF first."
	)

	tokenizer, model = anq()

	# FLAN-T5 prompt

	prompt = (
	f"Provide the answer in only 1 to 3 words.\n"
	f"Question: {py.question}\n"
	f"Context: {txt2}\n"
	f"Answer:"
	)

	inputs = tokenizer(prompt, return_tensors="pt")

	outputs = model.generate(
	**inputs,
	max_length=20,
	num_beams=5,
	early_stopping=True
	)

	answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Final small cleanup (optional but useful)
	answer = clean_short(answer)

	return {"answer": answer}


	@app.post("/clausedetection")

	def clausing(l : clausedetection):

	text3 = l.text or pdf_cache["text"]

	if not text3:
	raise HTTPException(status_code=400, detail="Provide text or upload PDF first.")

	a = clause(sen = text3)

	return {"detected clause" : a}