krrishsinha commited on
Commit
038b34c
·
1 Parent(s): f64cf0e

backend added

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. app.py +160 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from fastapi import FastAPI, HTTPException, UploadFile, File
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import Field
5
+ import torch
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
7
+ import torch.nn.functional as F
8
+ import fitz
9
+
10
+ # -----------------------------------------
11
+ # GLOBAL PDF CACHE
12
+ # -----------------------------------------
13
+ pdf_cache = {"text": None}
14
+
15
+ # -----------------------------------------
16
+ # HUGGINGFACE MODEL PATHS
17
+ # -----------------------------------------
18
+ SUMMARY_MODEL = "krrishsinha/legal_summariser"
19
+ QNA_MODEL = "krrishsinha/nlpques-ans"
20
+ CLAUSE_MODEL = "krrishsinha/clausedetectionfinal"
21
+
22
+
23
+ # -----------------------------------------
24
+ # PDF READER
25
+ # -----------------------------------------
26
+ def pdfopen(filepath: str) -> str:
27
+ doc = fitz.open(filepath)
28
+ text = ""
29
+ for page in doc:
30
+ text += page.get_text()
31
+ doc.close()
32
+ return text.strip()
33
+
34
+
35
+ # -----------------------------------------
36
+ # SUMMARIZER PIPELINE
37
+ # -----------------------------------------
38
+ def summarizer():
39
+ return pipeline("summarization", model=SUMMARY_MODEL)
40
+
41
+
42
+ # -----------------------------------------
43
+ # QNA PIPELINE
44
+ # -----------------------------------------
45
+ def anq():
46
+ return pipeline("question-answering", model=QNA_MODEL)
47
+
48
+
49
+ # -----------------------------------------
50
+ # CLAUSE DETECTION
51
+ # -----------------------------------------
52
+ def clause(sen):
53
+
54
+ tokenizer = AutoTokenizer.from_pretrained(CLAUSE_MODEL)
55
+ model = AutoModelForSequenceClassification.from_pretrained(CLAUSE_MODEL)
56
+ config = AutoConfig.from_pretrained(CLAUSE_MODEL)
57
+
58
+ inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
59
+
60
+ with torch.no_grad():
61
+ outputs = model(**inputs)
62
+ logits = outputs.logits
63
+ pred_id = int(torch.argmax(logits, dim=1).item())
64
+
65
+ predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
66
+ return predicted_label
67
+
68
+
69
+ # -----------------------------------------
70
+ # FASTAPI APP
71
+ # -----------------------------------------
72
+ app = FastAPI()
73
+
74
+ app.add_middleware(
75
+ CORSMiddleware,
76
+ allow_origins=["*"],
77
+ allow_credentials=True,
78
+ allow_methods=["*"],
79
+ allow_headers=["*"],
80
+ )
81
+
82
+
83
+ @app.get("/")
84
+ def welcome():
85
+ return {"welcome": "Lawlytics AI Corporate Legal Intelligence"}
86
+
87
+
88
+ # -----------------------------------------
89
+ # PDF UPLOAD
90
+ # -----------------------------------------
91
+ @app.post("/upload")
92
+ async def uploading(file: UploadFile = File(...)):
93
+ try:
94
+ file_path = f"./{file.filename}"
95
+ with open(file_path, "wb") as f:
96
+ f.write(await file.read())
97
+
98
+ t = pdfopen(file_path)
99
+
100
+ if not t:
101
+ raise HTTPException(status_code=400, detail="No text found in PDF")
102
+
103
+ pdf_cache["text"] = t
104
+ return {"message": "PDF processed successfully", "characters_extracted": len(t)}
105
+
106
+ except Exception as e:
107
+ raise HTTPException(status_code=500, detail=str(e))
108
+
109
+
110
+ # -----------------------------------------
111
+ # SUMMARISATION
112
+ # -----------------------------------------
113
+ @app.post("/summarise")
114
+ def summary():
115
+ txt = pdf_cache["text"]
116
+ if not txt:
117
+ raise HTTPException(status_code=400, detail="Upload PDF first")
118
+
119
+ summarise_fn = summarizer()
120
+ output = summarise_fn(txt, max_length=100, min_length=30, do_sample=False)
121
+
122
+ return {"summary": output}
123
+
124
+
125
+ # -----------------------------------------
126
+ # QUESTION ANSWERING
127
+ # -----------------------------------------
128
+ class QnaRequest(BaseModel):
129
+ question: str
130
+ context: str = None
131
+
132
+
133
+ @app.post("/qna")
134
+ def quesans(payload: QnaRequest):
135
+ if not pdf_cache["text"] and not payload.context:
136
+ raise HTTPException(status_code=400, detail="Upload PDF first")
137
+
138
+ context = payload.context or pdf_cache["text"]
139
+
140
+ qna_fn = anq()
141
+ result = qna_fn(question=payload.question, context=context)
142
+
143
+ return {"answer": result["answer"]}
144
+
145
+
146
+ # -----------------------------------------
147
+ # CLAUSE DETECTION
148
+ # -----------------------------------------
149
+ class ClauseRequest(BaseModel):
150
+ text: str = None
151
+
152
+
153
+ @app.post("/clausedetection")
154
+ def clausing(payload: ClauseRequest):
155
+ text = payload.text or pdf_cache["text"]
156
+ if not text:
157
+ raise HTTPException(status_code=400, detail="Provide text or upload PDF first")
158
+
159
+ detected = clause(text)
160
+ return {"detected_clause": detected}
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pymupdf
5
+ transformers
6
+ torch
7
+ huggingface_hub
8
+ safetensors
9
+ numpy