Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -195,8 +195,6 @@ def get_extraction_prompt(model_choice, txt):
|
|
| 195 |
f"{txt}"
|
| 196 |
)
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
| 200 |
def extract_invoice_info(model_choice, text):
|
| 201 |
prompt = get_extraction_prompt(model_choice, text)
|
| 202 |
raw = query_llm(model_choice, prompt)
|
|
@@ -254,17 +252,48 @@ with tab1:
|
|
| 254 |
|
| 255 |
with tab2:
|
| 256 |
st.title("Invoice Extractor")
|
| 257 |
-
mdl = st.selectbox("Model", list(MODELS.keys()))
|
| 258 |
inv_pdf = st.file_uploader("Invoice PDF", type="pdf")
|
|
|
|
|
|
|
| 259 |
if st.button("Extract") and inv_pdf:
|
| 260 |
txt = read_pdf(io.BytesIO(inv_pdf.getvalue()))
|
| 261 |
-
|
| 262 |
-
if
|
| 263 |
st.success("Extraction Complete")
|
| 264 |
st.subheader("Invoice Metadata")
|
| 265 |
-
st.table([{k.replace("_", " ").title(): v for k, v in
|
| 266 |
st.subheader("Line Items")
|
| 267 |
-
st.table(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
if "last_api" in st.session_state:
|
| 270 |
with st.expander("Debug"):
|
|
|
|
| 195 |
f"{txt}"
|
| 196 |
)
|
| 197 |
|
|
|
|
|
|
|
| 198 |
def extract_invoice_info(model_choice, text):
|
| 199 |
prompt = get_extraction_prompt(model_choice, text)
|
| 200 |
raw = query_llm(model_choice, prompt)
|
|
|
|
| 252 |
|
| 253 |
with tab2:
|
| 254 |
st.title("Invoice Extractor")
|
| 255 |
+
mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
|
| 256 |
inv_pdf = st.file_uploader("Invoice PDF", type="pdf")
|
| 257 |
+
extracted_info = None
|
| 258 |
+
|
| 259 |
if st.button("Extract") and inv_pdf:
|
| 260 |
txt = read_pdf(io.BytesIO(inv_pdf.getvalue()))
|
| 261 |
+
extracted_info = extract_invoice_info(mdl, txt)
|
| 262 |
+
if extracted_info:
|
| 263 |
st.success("Extraction Complete")
|
| 264 |
st.subheader("Invoice Metadata")
|
| 265 |
+
st.table([{k.replace("_", " ").title(): v for k, v in extracted_info["invoice_header"].items()}])
|
| 266 |
st.subheader("Line Items")
|
| 267 |
+
st.table(extracted_info["line_items"])
|
| 268 |
+
st.session_state["last_extracted_info"] = extracted_info # store in session
|
| 269 |
+
|
| 270 |
+
# If we've already extracted info, or in this session, show further controls
|
| 271 |
+
extracted_info = extracted_info or st.session_state.get("last_extracted_info", None)
|
| 272 |
+
if extracted_info:
|
| 273 |
+
st.markdown("---")
|
| 274 |
+
st.subheader("📝 Fine-tune Extracted Data with Your Own Prompt")
|
| 275 |
+
user_prompt = st.text_area(
|
| 276 |
+
"Enter your prompt for further processing or transformation (the extracted JSON will be available as context).",
|
| 277 |
+
height=120,
|
| 278 |
+
key="custom_prompt"
|
| 279 |
+
)
|
| 280 |
+
model_2 = st.selectbox("Model for Fine-Tuning Prompt", list(MODELS.keys()), key="refine_model")
|
| 281 |
+
if st.button("Run Custom Prompt"):
|
| 282 |
+
# Compose the prompt for the LLM, including the JSON and user's instruction
|
| 283 |
+
refine_input = (
|
| 284 |
+
"Here is an extracted invoice in JSON format:\n"
|
| 285 |
+
f"{json.dumps(extracted_info, indent=2)}\n"
|
| 286 |
+
"Follow this instruction and return the result as a JSON object only (no explanation):\n"
|
| 287 |
+
f"{user_prompt}"
|
| 288 |
+
)
|
| 289 |
+
result = query_llm(model_2, refine_input)
|
| 290 |
+
refined_json = clean_json_response(result)
|
| 291 |
+
st.subheader("Fine-Tuned Output")
|
| 292 |
+
if refined_json:
|
| 293 |
+
st.json(refined_json)
|
| 294 |
+
else:
|
| 295 |
+
st.error("Could not parse a valid JSON output from the model.")
|
| 296 |
+
st.caption("The prompt is run on the above-extracted fields as JSON. Try instructions like: 'Add a new field for net_amount (amount minus tax) to each line item', or 'Summarize the total quantity ordered', etc.")
|
| 297 |
|
| 298 |
if "last_api" in st.session_state:
|
| 299 |
with st.expander("Debug"):
|