Spaces:

Kaiyeee
/

SkimMed

Sleeping

App Files Files Community

Kaiyeee commited on Aug 7, 2025

Commit

31f7371

verified ·

1 Parent(s): c813a0f

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -11

app.py CHANGED Viewed

@@ -1,8 +1,51 @@
 import gradio as gr
 from inference import get_policy_decision
 def predict(policy_pdf, query, reasoning_style):
-    # 1. Extract and chunk the PDF file (call your chunking code here)
     if policy_pdf is None:
         return "Please upload a document!", None, {}
     chunks = extract_and_chunk(policy_pdf.name)
@@ -13,16 +56,6 @@ def predict(policy_pdf, query, reasoning_style):
     amount = output.get("amount", None)
     return f"{emoji} <b>{decision}</b>", amount, output["justification"]
-def extract_and_chunk(pdf_path):
-    # Insert your PDF extraction + chunking code here, return the list of chunks
-    import fitz
-    doc = fitz.open(pdf_path)
-    pages = [page.get_text() for page in doc]
-    text = "\n".join(pages)
-    # Use your smart_chunk_policy or chunk_text_with_headers here
-    from your_chunking_module import smart_chunk_policy  # import from your code!
-    return smart_chunk_policy(text)
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
     gr.Markdown("Upload your insurance policy document and enter your claim query:")

 import gradio as gr
+import fitz
 from inference import get_policy_decision
+def smart_chunk_policy(text, max_words=500, stride=250):
+    import re
+    KEY_SECTIONS = [
+        "exclusion", "waiting period", "specified disease", "specified procedure",
+        "pre-existing", "joint replacement", "coverage", "benefit", "day care"
+    ]
+    SECTION_REGEX = re.compile("|".join(rf"({k})" for k in KEY_SECTIONS), re.IGNORECASE)
+    lines = text.splitlines()
+    chunks = []
+    words = []
+    headers = []
+    for line in lines:
+        if SECTION_REGEX.search(line):
+            if words:
+                chunks.append(" ".join(words))
+                words = []
+            headers.append(line.strip())
+            continue
+        for word in line.split():
+            words.append(word)
+            if len(words) >= max_words:
+                chunk = ""
+                if headers:
+                    chunk += " ".join(headers) + "\n"
+                chunk += " ".join(words)
+                chunks.append(chunk)
+                words = words[-stride:] if stride else []
+    if words:
+        chunk = ""
+        if headers:
+            chunk += " ".join(headers) + "\n"
+        chunk += " ".join(words)
+        chunks.append(chunk)
+    return chunks
+def extract_and_chunk(pdf_path):
+    doc = fitz.open(pdf_path)
+    pages = [page.get_text() for page in doc]
+    text = "\n".join(pages)
+    return smart_chunk_policy(text)
 def predict(policy_pdf, query, reasoning_style):
+    # 1. Extract and chunk the PDF file
     if policy_pdf is None:
         return "Please upload a document!", None, {}
     chunks = extract_and_chunk(policy_pdf.name)
     amount = output.get("amount", None)
     return f"{emoji} <b>{decision}</b>", amount, output["justification"]
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
     gr.Markdown("Upload your insurance policy document and enter your claim query:")