mehakkhan commited on
Commit
d177c4e
Β·
verified Β·
1 Parent(s): cbd6691

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import os
3
+ import json
4
+ import gradio as gr
5
+ from groq import Groq
6
+
7
+ # πŸ” Initialize Groq Client
8
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
9
+
10
+ def extract_form_fields(pdf_bytes):
11
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
12
+ form_fields = {}
13
+ for page in doc:
14
+ widgets = page.widgets()
15
+ if widgets:
16
+ for widget in widgets:
17
+ key = widget.field_name
18
+ value = widget.field_value if widget.field_value else ""
19
+ form_fields[key] = value
20
+ return form_fields
21
+
22
+ def get_pdf_text(pdf_bytes):
23
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
24
+ return "\n".join([page.get_text() for page in doc])
25
+
26
+ def get_field_details(form_fields, pdf_text):
27
+ prompt = f"""
28
+ You are an expert at analyzing and auto-filling PDF form fields.
29
+ Here is the extracted PDF text:
30
+ {pdf_text}
31
+
32
+ Based on this, explain the meaning or expected value of each of the following fields in JSON format:
33
+
34
+ {json.dumps(list(form_fields.keys()), indent=2)}
35
+
36
+ Return your output in the following JSON format:
37
+ {{ "field_name_1": "description", "field_name_2": "description", ... }}
38
+ """
39
+ response = client.chat.completions.create(
40
+ model="llama3-8b-8192",
41
+ messages=[{"role": "user", "content": prompt}]
42
+ )
43
+
44
+ return response.choices[0].message.content
45
+
46
+ def analyze_form(pdf_file):
47
+ if not pdf_file:
48
+ return "❌ No file provided.", "", ""
49
+
50
+ try:
51
+ pdf_bytes = pdf_file.read()
52
+ fields = extract_form_fields(pdf_bytes)
53
+ pdf_text = get_pdf_text(pdf_bytes)
54
+ explanation = get_field_details(fields, pdf_text)
55
+
56
+ return "βœ… Analysis complete.", json.dumps(fields, indent=2), explanation
57
+ except Exception as e:
58
+ return f"❌ Error: {str(e)}", "", ""
59
+
60
+ # 🎨 Gradio Interface
61
+ with gr.Blocks() as demo:
62
+ gr.Markdown("## πŸ“„ Form Field Analyzer")
63
+ gr.Markdown("Upload a **tax or registration PDF form**. This tool extracts fillable fields and describes what each might represent.")
64
+
65
+ with gr.Row():
66
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
67
+ analyze_btn = gr.Button("Analyze Form")
68
+
69
+ status_output = gr.Textbox(label="Status")
70
+ fields_output = gr.Code(label="πŸ“‹ Extracted Fields (JSON)", language="json")
71
+ desc_output = gr.Code(label="πŸ’‘ Field Descriptions (LLM Response)", language="json")
72
+
73
+ analyze_btn.click(fn=analyze_form, inputs=[file_input], outputs=[status_output, fields_output, desc_output])
74
+
75
+ demo.launch()