Kaiyeee commited on
Commit
31f7371
·
verified ·
1 Parent(s): c813a0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -11
app.py CHANGED
@@ -1,8 +1,51 @@
1
  import gradio as gr
 
 
2
  from inference import get_policy_decision
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  def predict(policy_pdf, query, reasoning_style):
5
- # 1. Extract and chunk the PDF file (call your chunking code here)
6
  if policy_pdf is None:
7
  return "Please upload a document!", None, {}
8
  chunks = extract_and_chunk(policy_pdf.name)
@@ -13,16 +56,6 @@ def predict(policy_pdf, query, reasoning_style):
13
  amount = output.get("amount", None)
14
  return f"{emoji} <b>{decision}</b>", amount, output["justification"]
15
 
16
- def extract_and_chunk(pdf_path):
17
- # Insert your PDF extraction + chunking code here, return the list of chunks
18
- import fitz
19
- doc = fitz.open(pdf_path)
20
- pages = [page.get_text() for page in doc]
21
- text = "\n".join(pages)
22
- # Use your smart_chunk_policy or chunk_text_with_headers here
23
- from your_chunking_module import smart_chunk_policy # import from your code!
24
- return smart_chunk_policy(text)
25
-
26
  with gr.Blocks(theme=gr.themes.Default()) as demo:
27
  gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
28
  gr.Markdown("Upload your insurance policy document and enter your claim query:")
 
1
  import gradio as gr
2
+ import fitz
3
+
4
  from inference import get_policy_decision
5
 
6
+ def smart_chunk_policy(text, max_words=500, stride=250):
7
+ import re
8
+ KEY_SECTIONS = [
9
+ "exclusion", "waiting period", "specified disease", "specified procedure",
10
+ "pre-existing", "joint replacement", "coverage", "benefit", "day care"
11
+ ]
12
+ SECTION_REGEX = re.compile("|".join(rf"({k})" for k in KEY_SECTIONS), re.IGNORECASE)
13
+ lines = text.splitlines()
14
+ chunks = []
15
+ words = []
16
+ headers = []
17
+ for line in lines:
18
+ if SECTION_REGEX.search(line):
19
+ if words:
20
+ chunks.append(" ".join(words))
21
+ words = []
22
+ headers.append(line.strip())
23
+ continue
24
+ for word in line.split():
25
+ words.append(word)
26
+ if len(words) >= max_words:
27
+ chunk = ""
28
+ if headers:
29
+ chunk += " ".join(headers) + "\n"
30
+ chunk += " ".join(words)
31
+ chunks.append(chunk)
32
+ words = words[-stride:] if stride else []
33
+ if words:
34
+ chunk = ""
35
+ if headers:
36
+ chunk += " ".join(headers) + "\n"
37
+ chunk += " ".join(words)
38
+ chunks.append(chunk)
39
+ return chunks
40
+
41
+ def extract_and_chunk(pdf_path):
42
+ doc = fitz.open(pdf_path)
43
+ pages = [page.get_text() for page in doc]
44
+ text = "\n".join(pages)
45
+ return smart_chunk_policy(text)
46
+
47
  def predict(policy_pdf, query, reasoning_style):
48
+ # 1. Extract and chunk the PDF file
49
  if policy_pdf is None:
50
  return "Please upload a document!", None, {}
51
  chunks = extract_and_chunk(policy_pdf.name)
 
56
  amount = output.get("amount", None)
57
  return f"{emoji} <b>{decision}</b>", amount, output["justification"]
58
 
 
 
 
 
 
 
 
 
 
 
59
  with gr.Blocks(theme=gr.themes.Default()) as demo:
60
  gr.Markdown("# 🏥 Policy Decision System (LLM-powered)")
61
  gr.Markdown("Upload your insurance policy document and enter your claim query:")