krrishsinha commited on
Commit
eb305fe
·
verified ·
1 Parent(s): 038b34c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -95
app.py CHANGED
@@ -1,160 +1,193 @@
1
  from pydantic import BaseModel
2
- from fastapi import FastAPI, HTTPException, UploadFile, File
3
- from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import Field
 
 
 
5
  import torch
6
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
 
7
  import torch.nn.functional as F
8
- import fitz
9
-
10
- # -----------------------------------------
11
- # GLOBAL PDF CACHE
12
- # -----------------------------------------
13
- pdf_cache = {"text": None}
14
 
15
- # -----------------------------------------
16
- # HUGGINGFACE MODEL PATHS
17
- # -----------------------------------------
18
- SUMMARY_MODEL = "krrishsinha/legal_summariser"
19
- QNA_MODEL = "krrishsinha/nlpques-ans"
20
- CLAUSE_MODEL = "krrishsinha/clausedetectionfinal"
21
 
 
22
 
23
- # -----------------------------------------
24
- # PDF READER
25
- # -----------------------------------------
26
- def pdfopen(filepath: str) -> str:
27
  doc = fitz.open(filepath)
 
28
  text = ""
 
29
  for page in doc:
30
- text += page.get_text()
 
 
31
  doc.close()
 
32
  return text.strip()
33
 
34
 
35
- # -----------------------------------------
36
- # SUMMARIZER PIPELINE
37
- # -----------------------------------------
38
  def summarizer():
39
- return pipeline("summarization", model=SUMMARY_MODEL)
40
-
 
 
 
 
41
 
42
- # -----------------------------------------
43
- # QNA PIPELINE
44
- # -----------------------------------------
45
  def anq():
46
- return pipeline("question-answering", model=QNA_MODEL)
47
-
 
 
 
 
48
 
49
- # -----------------------------------------
50
- # CLAUSE DETECTION
51
- # -----------------------------------------
52
  def clause(sen):
53
 
54
- tokenizer = AutoTokenizer.from_pretrained(CLAUSE_MODEL)
55
- model = AutoModelForSequenceClassification.from_pretrained(CLAUSE_MODEL)
56
- config = AutoConfig.from_pretrained(CLAUSE_MODEL)
57
-
 
 
58
  inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
59
-
60
  with torch.no_grad():
61
  outputs = model(**inputs)
62
  logits = outputs.logits
 
63
  pred_id = int(torch.argmax(logits, dim=1).item())
64
-
65
  predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
66
  return predicted_label
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
68
 
69
- # -----------------------------------------
70
- # FASTAPI APP
71
- # -----------------------------------------
 
 
 
 
 
72
  app = FastAPI()
73
 
74
  app.add_middleware(
75
  CORSMiddleware,
76
- allow_origins=["*"],
77
  allow_credentials=True,
78
  allow_methods=["*"],
79
  allow_headers=["*"],
80
  )
81
 
82
-
83
  @app.get("/")
 
84
  def welcome():
85
- return {"welcome": "Lawlytics AI Corporate Legal Intelligence"}
 
86
 
87
 
88
- # -----------------------------------------
89
- # PDF UPLOAD
90
- # -----------------------------------------
91
  @app.post("/upload")
92
- async def uploading(file: UploadFile = File(...)):
 
 
93
  try:
 
94
  file_path = f"./{file.filename}"
 
95
  with open(file_path, "wb") as f:
96
- f.write(await file.read())
97
-
 
98
  t = pdfopen(file_path)
99
-
100
  if not t:
101
- raise HTTPException(status_code=400, detail="No text found in PDF")
102
-
103
  pdf_cache["text"] = t
104
- return {"message": "PDF processed successfully", "characters_extracted": len(t)}
105
 
 
 
106
  except Exception as e:
 
107
  raise HTTPException(status_code=500, detail=str(e))
 
108
 
109
-
110
- # -----------------------------------------
111
- # SUMMARISATION
112
- # -----------------------------------------
113
  @app.post("/summarise")
 
114
  def summary():
 
115
  txt = pdf_cache["text"]
 
116
  if not txt:
117
- raise HTTPException(status_code=400, detail="Upload PDF first")
118
-
119
- summarise_fn = summarizer()
120
- output = summarise_fn(txt, max_length=100, min_length=30, do_sample=False)
121
-
122
- return {"summary": output}
123
-
124
-
125
- # -----------------------------------------
126
- # QUESTION ANSWERING
127
- # -----------------------------------------
128
- class QnaRequest(BaseModel):
129
- question: str
130
- context: str = None
131
-
132
 
133
  @app.post("/qna")
134
- def quesans(payload: QnaRequest):
135
- if not pdf_cache["text"] and not payload.context:
136
- raise HTTPException(status_code=400, detail="Upload PDF first")
137
-
138
- context = payload.context or pdf_cache["text"]
139
-
140
- qna_fn = anq()
141
- result = qna_fn(question=payload.question, context=context)
142
 
143
- return {"answer": result["answer"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
 
 
 
 
 
 
145
 
146
- # -----------------------------------------
147
- # CLAUSE DETECTION
148
- # -----------------------------------------
149
- class ClauseRequest(BaseModel):
150
- text: str = None
151
 
152
 
153
- @app.post("/clausedetection")
154
- def clausing(payload: ClauseRequest):
155
- text = payload.text or pdf_cache["text"]
156
- if not text:
157
- raise HTTPException(status_code=400, detail="Provide text or upload PDF first")
158
 
159
- detected = clause(text)
160
- return {"detected_clause": detected}
 
1
  from pydantic import BaseModel
2
+ from fastapi import FastAPI, HTTPException
3
+ from typing import Annotated, Literal
4
  from pydantic import Field
5
+ from fastapi.responses import JSONResponse
6
+ import numpy as np
7
+ from transformers import pipeline
8
  import torch
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ from transformers import AutoConfig
11
  import torch.nn.functional as F
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi import UploadFile, File
14
+ import fitz
 
 
 
15
 
16
+ summary = "krrishsinha/legal_summariser"
17
+ qna = "krrishsinha/nlpques-ans"
18
+ clause = "krrishsinha/clausedetectionfinal"
 
 
 
19
 
20
+ pdf_cache = {"text": None}
21
 
22
+ def pdfopen(filepath : str) -> str:
23
+
 
 
24
  doc = fitz.open(filepath)
25
+
26
  text = ""
27
+
28
  for page in doc:
29
+
30
+ text = text + page.get_text()
31
+
32
  doc.close()
33
+
34
  return text.strip()
35
 
36
 
 
 
 
37
  def summarizer():
38
+
39
+ summarypath = r"E:/FastAPI/Lawlytics/legal_summariser"
40
+
41
+ o = pipeline("summarization", model= summary)
42
+
43
+ return o
44
 
 
 
 
45
  def anq():
46
+
47
+ qnapath = r"E:/FastAPI/Lawlytics/nlpques&ans"
48
+
49
+ k = pipeline("question-answering", model= qna)
50
+
51
+ return k
52
 
 
 
 
53
  def clause(sen):
54
 
55
+ clausepath = r"E:/FastAPI/Lawlytics/clausedetectionfinal"
56
+
57
+ tokenizer = AutoTokenizer.from_pretrained(clause)
58
+ model = AutoModelForSequenceClassification.from_pretrained(clause)
59
+ config = AutoConfig.from_pretrained(clause)
60
+
61
  inputs = tokenizer(sen, return_tensors="pt", truncation=True, padding=True)
62
+
63
  with torch.no_grad():
64
  outputs = model(**inputs)
65
  logits = outputs.logits
66
+ probs = F.softmax(logits, dim=1).squeeze().tolist()
67
  pred_id = int(torch.argmax(logits, dim=1).item())
68
+
69
  predicted_label = config.id2label.get(pred_id, f"LABEL_{pred_id}")
70
  return predicted_label
71
+
72
+
73
+
74
+
75
+
76
+ class summariser(BaseModel):
77
+
78
+ pdf : Annotated[str, Field(..., description = "here goes your pdf")]
79
 
80
+
81
+ class qna(BaseModel):
82
+
83
+ question : Annotated[str, Field(..., description = "here goes your question regarding the document you want to ask")]
84
+ context : Annotated[str, Field(..., description = "context on whicht the question should be asked")]
85
 
86
+
87
+ class clausedetection(BaseModel):
88
+
89
+ text : Annotated[str, Field(..., description = "here goes your text for detecting its clause")]
90
+
91
+
92
+
93
+
94
  app = FastAPI()
95
 
96
  app.add_middleware(
97
  CORSMiddleware,
98
+ allow_origins=["*"],
99
  allow_credentials=True,
100
  allow_methods=["*"],
101
  allow_headers=["*"],
102
  )
103
 
 
104
  @app.get("/")
105
+
106
  def welcome():
107
+
108
+ return {"welcome to Lawlytics" : "AI Corporate Legal Document Intelligence"}
109
 
110
 
 
 
 
111
  @app.post("/upload")
112
+
113
+ async def uploading(file : UploadFile = File(...)):
114
+
115
  try:
116
+
117
  file_path = f"./{file.filename}"
118
+
119
  with open(file_path, "wb") as f:
120
+ content = await file.read()
121
+ f.write(content)
122
+
123
  t = pdfopen(file_path)
124
+
125
  if not t:
126
+ raise HTTPException(status_code=400, detail="No text found in PDF. Maybe it's scanned?")
127
+
128
  pdf_cache["text"] = t
 
129
 
130
+ return {"message": "PDF uploaded & text extracted successfully", "characters_extracted": len(t)}
131
+
132
  except Exception as e:
133
+
134
  raise HTTPException(status_code=500, detail=str(e))
135
+
136
 
 
 
 
 
137
  @app.post("/summarise")
138
+
139
  def summary():
140
+
141
  txt = pdf_cache["text"]
142
+
143
  if not txt:
144
+ raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
145
+
146
+ p = summarizer()
147
+
148
+ e = p (txt,
149
+ max_length=100,
150
+ min_length=30,
151
+ do_sample=False
152
+ )
153
+
154
+ return {"summary": e}
155
+
 
 
 
156
 
157
  @app.post("/qna")
 
 
 
 
 
 
 
 
158
 
159
+ def quesans(py : qna):
160
+
161
+ txt2 = pdf_cache["text"]
162
+
163
+ if not txt2:
164
+
165
+ raise HTTPException(status_code=400, detail="No PDF text found. Upload PDF first.")
166
+
167
+ g = anq()
168
+
169
+ result = g (question= py.question, context= txt2)
170
+
171
+ return {"answer" : result["answer"]}
172
+
173
+
174
+ @app.post("/clausedetection")
175
 
176
+ def clausing(l : clausedetection):
177
+
178
+ text3 = l.text or pdf_cache["text"]
179
+
180
+ if not text3:
181
+ raise HTTPException(status_code=400, detail="Provide text or upload PDF first.")
182
 
183
+ a = clause(sen = text3)
184
+
185
+ return {"detected clause" : a}
186
+
187
+
188
 
189
 
190
+
191
+
192
+
 
 
193