PDF_Upload

Sleeping

App Files Files Community

Seth0330 commited on May 30, 2025

Commit

b3696a8

verified ·

1 Parent(s): 784a877

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -7

app.py CHANGED Viewed

@@ -195,8 +195,6 @@ def get_extraction_prompt(model_choice, txt):
         f"{txt}"
     )
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)
@@ -254,17 +252,48 @@ with tab1:
 with tab2:
     st.title("Invoice Extractor")
-    mdl = st.selectbox("Model", list(MODELS.keys()))
     inv_pdf = st.file_uploader("Invoice PDF", type="pdf")
     if st.button("Extract") and inv_pdf:
         txt = read_pdf(io.BytesIO(inv_pdf.getvalue()))
-        info = extract_invoice_info(mdl, txt)
-        if info:
             st.success("Extraction Complete")
             st.subheader("Invoice Metadata")
-            st.table([{k.replace("_", " ").title(): v for k, v in info["invoice_header"].items()}])
             st.subheader("Line Items")
-            st.table(info["line_items"])
     if "last_api" in st.session_state:
         with st.expander("Debug"):

         f"{txt}"
     )
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)
 with tab2:
     st.title("Invoice Extractor")
+    mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
     inv_pdf = st.file_uploader("Invoice PDF", type="pdf")
+    extracted_info = None
     if st.button("Extract") and inv_pdf:
         txt = read_pdf(io.BytesIO(inv_pdf.getvalue()))
+        extracted_info = extract_invoice_info(mdl, txt)
+        if extracted_info:
             st.success("Extraction Complete")
             st.subheader("Invoice Metadata")
+            st.table([{k.replace("_", " ").title(): v for k, v in extracted_info["invoice_header"].items()}])
             st.subheader("Line Items")
+            st.table(extracted_info["line_items"])
+            st.session_state["last_extracted_info"] = extracted_info  # store in session
+    # If we've already extracted info, or in this session, show further controls
+    extracted_info = extracted_info or st.session_state.get("last_extracted_info", None)
+    if extracted_info:
+        st.markdown("---")
+        st.subheader("📝 Fine-tune Extracted Data with Your Own Prompt")
+        user_prompt = st.text_area(
+            "Enter your prompt for further processing or transformation (the extracted JSON will be available as context).",
+            height=120,
+            key="custom_prompt"
+        )
+        model_2 = st.selectbox("Model for Fine-Tuning Prompt", list(MODELS.keys()), key="refine_model")
+        if st.button("Run Custom Prompt"):
+            # Compose the prompt for the LLM, including the JSON and user's instruction
+            refine_input = (
+                "Here is an extracted invoice in JSON format:\n"
+                f"{json.dumps(extracted_info, indent=2)}\n"
+                "Follow this instruction and return the result as a JSON object only (no explanation):\n"
+                f"{user_prompt}"
+            )
+            result = query_llm(model_2, refine_input)
+            refined_json = clean_json_response(result)
+            st.subheader("Fine-Tuned Output")
+            if refined_json:
+                st.json(refined_json)
+            else:
+                st.error("Could not parse a valid JSON output from the model.")
+        st.caption("The prompt is run on the above-extracted fields as JSON. Try instructions like: 'Add a new field for net_amount (amount minus tax) to each line item', or 'Summarize the total quantity ordered', etc.")
     if "last_api" in st.session_state:
         with st.expander("Debug"):