Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,51 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
| 2 |
from inference import get_policy_decision
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
def predict(policy_pdf, query, reasoning_style):
|
| 5 |
-
# 1. Extract and chunk the PDF file
|
| 6 |
if policy_pdf is None:
|
| 7 |
return "Please upload a document!", None, {}
|
| 8 |
chunks = extract_and_chunk(policy_pdf.name)
|
|
@@ -13,16 +56,6 @@ def predict(policy_pdf, query, reasoning_style):
|
|
| 13 |
amount = output.get("amount", None)
|
| 14 |
return f"{emoji} <b>{decision}</b>", amount, output["justification"]
|
| 15 |
|
| 16 |
-
def extract_and_chunk(pdf_path):
|
| 17 |
-
# Insert your PDF extraction + chunking code here, return the list of chunks
|
| 18 |
-
import fitz
|
| 19 |
-
doc = fitz.open(pdf_path)
|
| 20 |
-
pages = [page.get_text() for page in doc]
|
| 21 |
-
text = "\n".join(pages)
|
| 22 |
-
# Use your smart_chunk_policy or chunk_text_with_headers here
|
| 23 |
-
from your_chunking_module import smart_chunk_policy # import from your code!
|
| 24 |
-
return smart_chunk_policy(text)
|
| 25 |
-
|
| 26 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
| 27 |
gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
|
| 28 |
gr.Markdown("Upload your insurance policy document and enter your claim query:")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import fitz
|
| 3 |
+
|
| 4 |
from inference import get_policy_decision
|
| 5 |
|
| 6 |
+
def smart_chunk_policy(text, max_words=500, stride=250):
|
| 7 |
+
import re
|
| 8 |
+
KEY_SECTIONS = [
|
| 9 |
+
"exclusion", "waiting period", "specified disease", "specified procedure",
|
| 10 |
+
"pre-existing", "joint replacement", "coverage", "benefit", "day care"
|
| 11 |
+
]
|
| 12 |
+
SECTION_REGEX = re.compile("|".join(rf"({k})" for k in KEY_SECTIONS), re.IGNORECASE)
|
| 13 |
+
lines = text.splitlines()
|
| 14 |
+
chunks = []
|
| 15 |
+
words = []
|
| 16 |
+
headers = []
|
| 17 |
+
for line in lines:
|
| 18 |
+
if SECTION_REGEX.search(line):
|
| 19 |
+
if words:
|
| 20 |
+
chunks.append(" ".join(words))
|
| 21 |
+
words = []
|
| 22 |
+
headers.append(line.strip())
|
| 23 |
+
continue
|
| 24 |
+
for word in line.split():
|
| 25 |
+
words.append(word)
|
| 26 |
+
if len(words) >= max_words:
|
| 27 |
+
chunk = ""
|
| 28 |
+
if headers:
|
| 29 |
+
chunk += " ".join(headers) + "\n"
|
| 30 |
+
chunk += " ".join(words)
|
| 31 |
+
chunks.append(chunk)
|
| 32 |
+
words = words[-stride:] if stride else []
|
| 33 |
+
if words:
|
| 34 |
+
chunk = ""
|
| 35 |
+
if headers:
|
| 36 |
+
chunk += " ".join(headers) + "\n"
|
| 37 |
+
chunk += " ".join(words)
|
| 38 |
+
chunks.append(chunk)
|
| 39 |
+
return chunks
|
| 40 |
+
|
| 41 |
+
def extract_and_chunk(pdf_path):
|
| 42 |
+
doc = fitz.open(pdf_path)
|
| 43 |
+
pages = [page.get_text() for page in doc]
|
| 44 |
+
text = "\n".join(pages)
|
| 45 |
+
return smart_chunk_policy(text)
|
| 46 |
+
|
| 47 |
def predict(policy_pdf, query, reasoning_style):
|
| 48 |
+
# 1. Extract and chunk the PDF file
|
| 49 |
if policy_pdf is None:
|
| 50 |
return "Please upload a document!", None, {}
|
| 51 |
chunks = extract_and_chunk(policy_pdf.name)
|
|
|
|
| 56 |
amount = output.get("amount", None)
|
| 57 |
return f"{emoji} <b>{decision}</b>", amount, output["justification"]
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
| 60 |
gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
|
| 61 |
gr.Markdown("Upload your insurance policy document and enter your claim query:")
|