Spaces:
Running
Running
Create src/app.py
Browse files- src/app.py +1375 -0
src/app.py
ADDED
|
@@ -0,0 +1,1375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =========================
|
| 2 |
+
# Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
|
| 3 |
+
# =========================
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# Environment hardening (HF Spaces, /.cache issue)
|
| 9 |
+
# -----------------------------
|
| 10 |
+
_home = os.environ.get("HOME", "")
|
| 11 |
+
if _home in ("", "/", None):
|
| 12 |
+
repo_dir = os.getcwd()
|
| 13 |
+
safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
|
| 14 |
+
os.environ["HOME"] = safe_home
|
| 15 |
+
print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")
|
| 16 |
+
|
| 17 |
+
streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
|
| 18 |
+
try:
|
| 19 |
+
streamlit_dir.mkdir(parents=True, exist_ok=True)
|
| 20 |
+
print(f"[startup] ensured {streamlit_dir}")
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
|
| 23 |
+
|
| 24 |
+
# -----------------------------
|
| 25 |
+
# Imports
|
| 26 |
+
# -----------------------------
|
| 27 |
+
import json
|
| 28 |
+
from io import BytesIO
|
| 29 |
+
import hashlib
|
| 30 |
+
from typing import Dict, Any
|
| 31 |
+
from datetime import datetime
|
| 32 |
+
|
| 33 |
+
import streamlit as st
|
| 34 |
+
import pandas as pd
|
| 35 |
+
from PIL import Image
|
| 36 |
+
|
| 37 |
+
# Optional: pdf2image is only needed for PDFs
|
| 38 |
+
try:
|
| 39 |
+
from pdf2image import convert_from_bytes
|
| 40 |
+
except Exception:
|
| 41 |
+
convert_from_bytes = None
|
| 42 |
+
|
| 43 |
+
# -----------------------------
|
| 44 |
+
# RunPod vLLM Configuration (from environment variables)
|
| 45 |
+
# -----------------------------
|
| 46 |
+
import requests
|
| 47 |
+
import base64
|
| 48 |
+
import re
|
| 49 |
+
|
| 50 |
+
POD_URL = os.getenv("POD_URL", "")
|
| 51 |
+
VLLM_API_KEY = os.getenv("VLLM_API_KEY", "")
|
| 52 |
+
MODEL_NAME = "qwen3-vl-8b-nu-merged"
|
| 53 |
+
|
| 54 |
+
# Validate secrets are set
|
| 55 |
+
if not POD_URL or not VLLM_API_KEY:
|
| 56 |
+
st.error("⚠️ API credentials not configured. Please set POD_URL and VLLM_API_KEY in Space settings.")
|
| 57 |
+
st.stop()
|
| 58 |
+
# -----------------------------
|
| 59 |
+
# Page config & CSS
|
| 60 |
+
# -----------------------------
|
| 61 |
+
st.set_page_config(page_title="Invoice Extractor (Qwen3-VL) - Batch Mode", layout="wide")
|
| 62 |
+
st.title("Invoice Extraction")
|
| 63 |
+
|
| 64 |
+
st.markdown(
|
| 65 |
+
"""
|
| 66 |
+
<style>
|
| 67 |
+
.stApp { background-color: #ECECEC !important; }
|
| 68 |
+
div.block-container { padding-top: 3rem; padding-bottom: 1rem; }
|
| 69 |
+
[data-testid="stSidebar"] { background-color: #F7F7F7 !important; }
|
| 70 |
+
div[data-testid="stTabs"] > div > div { padding-bottom: 6px !important; }
|
| 71 |
+
/* Keep right column steady on first render post-extraction */
|
| 72 |
+
[data-testid="column"]:nth-of-type(2) { min-height: 780px; }
|
| 73 |
+
</style>
|
| 74 |
+
""",
|
| 75 |
+
unsafe_allow_html=True
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Fixed sizes to prevent reflow wobble
|
| 79 |
+
FIXED_IMG_WIDTH = 640
|
| 80 |
+
DATA_EDITOR_HEIGHT = 380
|
| 81 |
+
|
| 82 |
+
# -----------------------------
|
| 83 |
+
# Helpers
|
| 84 |
+
# -----------------------------
|
| 85 |
+
def ensure_state(k: str, default):
|
| 86 |
+
"""Initialize a session_state key once, then let widgets bind to it via key=... (no value=...)."""
|
| 87 |
+
if k not in st.session_state:
|
| 88 |
+
st.session_state[k] = default
|
| 89 |
+
|
| 90 |
+
def clean_float(x) -> float:
|
| 91 |
+
import re
|
| 92 |
+
if x is None:
|
| 93 |
+
return 0.0
|
| 94 |
+
if isinstance(x, (int, float)):
|
| 95 |
+
return float(x)
|
| 96 |
+
s = str(x).strip()
|
| 97 |
+
if s == "":
|
| 98 |
+
return 0.0
|
| 99 |
+
s = re.sub(r"[,\s]", "", s)
|
| 100 |
+
s = re.sub(r"[^\d\.\-]", "", s)
|
| 101 |
+
if s in ("", ".", "-", "-."):
|
| 102 |
+
return 0.0
|
| 103 |
+
try:
|
| 104 |
+
return float(s)
|
| 105 |
+
except Exception:
|
| 106 |
+
return 0.0
|
| 107 |
+
|
| 108 |
+
def normalize_date(date_str) -> str:
|
| 109 |
+
"""
|
| 110 |
+
Normalize various date formats to dd-MMM-yyyy format (e.g., 01-Jan-2025)
|
| 111 |
+
Handles: ISO, US, EU, and various other common date formats
|
| 112 |
+
Returns empty string if date cannot be parsed
|
| 113 |
+
"""
|
| 114 |
+
if not date_str or date_str == "":
|
| 115 |
+
return ""
|
| 116 |
+
|
| 117 |
+
if isinstance(date_str, str):
|
| 118 |
+
date_str = date_str.strip()
|
| 119 |
+
if date_str == "":
|
| 120 |
+
return ""
|
| 121 |
+
|
| 122 |
+
# Common date formats to try
|
| 123 |
+
formats = [
|
| 124 |
+
"%Y-%m-%d", # 2025-01-15 (ISO)
|
| 125 |
+
"%d-%m-%Y", # 15-01-2025 (EU)
|
| 126 |
+
"%m-%d-%Y", # 01-15-2025 (US)
|
| 127 |
+
"%Y/%m/%d", # 2025/01/15
|
| 128 |
+
"%d/%m/%Y", # 15/01/2025
|
| 129 |
+
"%m/%d/%Y", # 01/15/2025
|
| 130 |
+
"%d.%m.%Y", # 15.01.2025
|
| 131 |
+
"%Y.%m.%d", # 2025.01.15
|
| 132 |
+
"%d %B %Y", # 15 January 2025
|
| 133 |
+
"%d %b %Y", # 15 Jan 2025
|
| 134 |
+
"%B %d, %Y", # January 15, 2025
|
| 135 |
+
"%b %d, %Y", # Jan 15, 2025
|
| 136 |
+
"%d-%b-%Y", # 15-Jan-2025
|
| 137 |
+
"%d-%B-%Y", # 15-January-2025
|
| 138 |
+
"%Y%m%d", # 20250115
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
parsed_date = None
|
| 142 |
+
|
| 143 |
+
# Try parsing with each format
|
| 144 |
+
for fmt in formats:
|
| 145 |
+
try:
|
| 146 |
+
parsed_date = datetime.strptime(str(date_str), fmt)
|
| 147 |
+
break
|
| 148 |
+
except (ValueError, TypeError):
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
# If no format matched, return empty string
|
| 152 |
+
if parsed_date is None:
|
| 153 |
+
return ""
|
| 154 |
+
|
| 155 |
+
# Format as dd-MMM-yyyy (e.g., 01-Jan-2025)
|
| 156 |
+
return parsed_date.strftime("%d-%b-%Y")
|
| 157 |
+
|
| 158 |
+
def parse_date_to_object(date_str):
|
| 159 |
+
"""
|
| 160 |
+
Parse a date string to a datetime.date object for date_input widget
|
| 161 |
+
Returns None if date cannot be parsed
|
| 162 |
+
"""
|
| 163 |
+
if not date_str or date_str == "":
|
| 164 |
+
return None
|
| 165 |
+
|
| 166 |
+
if isinstance(date_str, str):
|
| 167 |
+
date_str = date_str.strip()
|
| 168 |
+
if date_str == "":
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
# Common date formats to try
|
| 172 |
+
formats = [
|
| 173 |
+
"%Y-%m-%d", # 2025-01-15 (ISO)
|
| 174 |
+
"%d-%m-%Y", # 15-01-2025 (EU)
|
| 175 |
+
"%m-%d-%Y", # 01-15-2025 (US)
|
| 176 |
+
"%Y/%m/%d", # 2025/01/15
|
| 177 |
+
"%d/%m/%Y", # 15/01/2025
|
| 178 |
+
"%m/%d/%Y", # 01/15/2025
|
| 179 |
+
"%d.%m.%Y", # 15.01.2025
|
| 180 |
+
"%Y.%m.%d", # 2025.01.15
|
| 181 |
+
"%d %B %Y", # 15 January 2025
|
| 182 |
+
"%d %b %Y", # 15 Jan 2025
|
| 183 |
+
"%B %d, %Y", # January 15, 2025
|
| 184 |
+
"%b %d, %Y", # Jan 15, 2025
|
| 185 |
+
"%d-%b-%Y", # 15-Jan-2025
|
| 186 |
+
"%d-%B-%Y", # 15-January-2025
|
| 187 |
+
"%Y%m%d", # 20250115
|
| 188 |
+
]
|
| 189 |
+
|
| 190 |
+
# Try parsing with each format
|
| 191 |
+
for fmt in formats:
|
| 192 |
+
try:
|
| 193 |
+
parsed_date = datetime.strptime(str(date_str), fmt)
|
| 194 |
+
return parsed_date.date()
|
| 195 |
+
except (ValueError, TypeError):
|
| 196 |
+
continue
|
| 197 |
+
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
# -----------------------------
|
| 201 |
+
# HF login flow (REMOVED - No longer needed for vLLM API)
|
| 202 |
+
# -----------------------------
|
| 203 |
+
# Authentication is now handled via POD_URL and VLLM_API_KEY instead
|
| 204 |
+
|
| 205 |
+
# -----------------------------
|
| 206 |
+
# Model config
|
| 207 |
+
# -----------------------------
|
| 208 |
+
# OLD DONUT CODE (COMMENTED OUT - Now using vLLM API)
|
| 209 |
+
# -----------------------------
|
| 210 |
+
# HF_MODEL_ID = "Bhuvi13/model-V7"
|
| 211 |
+
# TASK_PROMPT = "<s_cord-v2>"
|
| 212 |
+
#
|
| 213 |
+
# @st.cache_resource(show_spinner=False)
|
| 214 |
+
# def load_model_and_processor(hf_model_id: str, task_prompt: str):
|
| 215 |
+
# ...
|
| 216 |
+
|
| 217 |
+
# -----------------------------
|
| 218 |
+
# vLLM Inference Function (RunPod API)
|
| 219 |
+
# -----------------------------
|
| 220 |
+
def run_inference_vllm(image: Image.Image):
|
| 221 |
+
"""Run inference using RunPod vLLM API"""
|
| 222 |
+
|
| 223 |
+
# Extraction prompt (JSON format)
|
| 224 |
+
EXTRACTION_PROMPT = """Please carefully examine this invoice image and extract all the information into the following structured JSON format. Pay close attention to details and ensure accuracy in number formatting and text extraction.
|
| 225 |
+
|
| 226 |
+
Extract the data into this exact JSON structure (do not add or remove keys):
|
| 227 |
+
|
| 228 |
+
{
|
| 229 |
+
"header": {
|
| 230 |
+
"invoice_no": "Invoice number or reference ID",
|
| 231 |
+
"invoice_date": "Date the invoice was issued (maintain original format)",
|
| 232 |
+
"due_date": "Payment due date if specified",
|
| 233 |
+
"sender_name": "Name of the company/person issuing the invoice",
|
| 234 |
+
"sender_addr": "Complete address of the sender/issuer",
|
| 235 |
+
"rcpt_name": "Name of the recipient/customer",
|
| 236 |
+
"rcpt_addr": "Address of the recipient/customer",
|
| 237 |
+
"bank_iban": "International Bank Account Number",
|
| 238 |
+
"bank_name": "Name of the bank",
|
| 239 |
+
"bank_acc_no": "Bank account number",
|
| 240 |
+
"bank_routing": "Bank routing number",
|
| 241 |
+
"bank_swift": "SWIFT/BIC code",
|
| 242 |
+
"bank_acc_name": "Account holder name",
|
| 243 |
+
"bank_branch": "Bank branch information"
|
| 244 |
+
},
|
| 245 |
+
"items": [
|
| 246 |
+
{
|
| 247 |
+
"descriptions": "Detailed description of the item/service",
|
| 248 |
+
"SKU": "Stock Keeping Unit or item code",
|
| 249 |
+
"quantity": "Quantity of items",
|
| 250 |
+
"unit_price": "Price per unit",
|
| 251 |
+
"amount": "Total amount for this line item",
|
| 252 |
+
"tax": "Tax amount for this item",
|
| 253 |
+
"Line_total": "Total amount including tax for this line"
|
| 254 |
+
}
|
| 255 |
+
],
|
| 256 |
+
"summary": {
|
| 257 |
+
"subtotal": "Subtotal amount before tax",
|
| 258 |
+
"tax_rate": "Tax rate percentage or description",
|
| 259 |
+
"tax_amount": "Total tax amount",
|
| 260 |
+
"total_amount": "Final total amount to be paid",
|
| 261 |
+
"currency": "Currency code (USD, EUR, etc.)"
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
STRICT POLICY RULES (apply exactly, do not deviate):
|
| 266 |
+
1) Number formatting & types
|
| 267 |
+
- Preserve the original number formatting from the invoice (commas, decimal places, currency symbols in text fields if shown).
|
| 268 |
+
- In this JSON, output all values as strings. If a field is not present or cannot be determined with high confidence, output "" (empty string). Do not use null, 0, or placeholders.
|
| 269 |
+
2) Currency selection (multi-currency invoices)
|
| 270 |
+
- If multiple currencies are shown, ALWAYS choose the recipient/customer currency for all monetary fields in items and summary.
|
| 271 |
+
- Do NOT perform FX conversion. Select the column/figures that are explicitly in the recipient’s currency.
|
| 272 |
+
- For "summary.currency", prefer the printed 3-letter code (e.g., USD, EUR, INR). If only an unambiguous symbol is present, map it (₹→INR, €→EUR, $→USD when clearly USD). If ambiguous, leave "".
|
| 273 |
+
3) Tax handling (no rounding of rates; don’t recompute given totals)
|
| 274 |
+
- Do NOT round tax percentages. Use the original precision for any calculations; keep the printed formatting for "summary.tax_rate".
|
| 275 |
+
- If a TOTAL tax amount is explicitly printed on the invoice (e.g., “Tax”, “VAT”, “IGST”, “Total Tax”), TREAT IT AS AUTHORITATIVE. Do NOT recompute a new total.
|
| 276 |
+
a) If per-line tax amounts are printed, copy them directly.
|
| 277 |
+
b) If per-line tax amounts are not printed, allocate the printed TOTAL tax proportionally across line items by each line’s net amount (quantity * unit_price − discount). Use precise arithmetic; ensure the sum of allocated per-line taxes equals the printed TOTAL tax (adjust the last cent minimally if required).
|
| 278 |
+
- If NO total tax amount is printed but a tax rate is printed, compute per-line tax as: tax = (quantity * unit_price − discount) × (exact, unrounded tax rate). Then set "summary.tax_amount" = sum of per-line taxes.
|
| 279 |
+
- "items[].amount" is the pre-tax line amount AFTER discount. "items[].Line_total" = amount + tax.
|
| 280 |
+
4) Discounts
|
| 281 |
+
- If discounts are present (per-line or overall), compute tax on the discounted base: (quantity * unit_price − discount). Never compute tax on the undiscounted amount.
|
| 282 |
+
5) Due date calculation from payment terms
|
| 283 |
+
- Preserve the invoice’s original date format for both "invoice_date" and "due_date".
|
| 284 |
+
- If explicit due date is printed, use it as "due_date".
|
| 285 |
+
- If payment terms specify Net X (e.g., Net 30), set due_date = invoice_date + X days (same format as invoice_date).
|
| 286 |
+
- If terms say “upon receipt”, “upon publication”, or equivalent, due_date = invoice_date.
|
| 287 |
+
- If both a printed due date and terms exist and they conflict, prefer the printed due date.
|
| 288 |
+
6) Items array
|
| 289 |
+
- Include every visible line item. Preserve multi-line descriptions using literal "\\n" where line breaks exist.
|
| 290 |
+
- If SKU is not shown, set "SKU": "".
|
| 291 |
+
- Ensure "quantity", "unit_price", "amount", "tax", and "Line_total" are consistent with the rules above.
|
| 292 |
+
7) Summary invariants (when values are available on the invoice)
|
| 293 |
+
- "summary.subtotal" = sum of items[].amount.
|
| 294 |
+
- "summary.tax_amount" = sum of items[].tax (if you allocated or computed it). If the invoice prints a total tax amount, use that exact value and make per-line taxes sum to it.
|
| 295 |
+
- "summary.total_amount" = subtotal + tax_amount.
|
| 296 |
+
- If any of these values are not printed and cannot be derived reliably from the printed numbers, leave them as "".
|
| 297 |
+
8) Text extraction fidelity
|
| 298 |
+
- Extract text exactly as printed (names, addresses, bank fields, references). Keep special characters and spacing (normalize only obvious OCR artifacts).
|
| 299 |
+
- If a bank field is absent (IBAN/SWIFT/routing/etc.), set it to "".
|
| 300 |
+
|
| 301 |
+
Output constraints:
|
| 302 |
+
- Return ONLY the JSON object described above (no explanations, no code fences, no trailing commas).
|
| 303 |
+
- Keep all values as strings.
|
| 304 |
+
- Do not add extra keys or sections beyond the given schema."""
|
| 305 |
+
|
| 306 |
+
try:
|
| 307 |
+
# Resize image if too large (max dimension 2048px to avoid payload size issues)
|
| 308 |
+
max_dimension = 2048
|
| 309 |
+
width, height = image.size
|
| 310 |
+
if width > max_dimension or height > max_dimension:
|
| 311 |
+
ratio = min(max_dimension / width, max_dimension / height)
|
| 312 |
+
new_size = (int(width * ratio), int(height * ratio))
|
| 313 |
+
image = image.resize(new_size, Image.Resampling.LANCZOS)
|
| 314 |
+
st.info(f"Image resized from {width}x{height} to {new_size[0]}x{new_size[1]} to reduce payload size")
|
| 315 |
+
|
| 316 |
+
# Convert image to base64
|
| 317 |
+
buffer = BytesIO()
|
| 318 |
+
image.save(buffer, format="PNG", optimize=True)
|
| 319 |
+
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
| 320 |
+
|
| 321 |
+
# Check payload size
|
| 322 |
+
payload_size_mb = len(image_base64) / (1024 * 1024)
|
| 323 |
+
if payload_size_mb > 10:
|
| 324 |
+
st.warning(f"Warning: Large image payload ({payload_size_mb:.2f} MB). This might cause issues.")
|
| 325 |
+
|
| 326 |
+
data_url = f"data:image/png;base64,{image_base64}"
|
| 327 |
+
|
| 328 |
+
# Build payload
|
| 329 |
+
payload = {
|
| 330 |
+
"model": MODEL_NAME,
|
| 331 |
+
"messages": [
|
| 332 |
+
{"role": "system", "content": EXTRACTION_PROMPT},
|
| 333 |
+
{"role": "user", "content": [
|
| 334 |
+
{"type": "image_url", "image_url": {"url": data_url}},
|
| 335 |
+
{"type": "text", "text": "Extract invoice data."}
|
| 336 |
+
]}
|
| 337 |
+
],
|
| 338 |
+
"temperature": 0,
|
| 339 |
+
"max_tokens": 1536
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
headers = {
|
| 343 |
+
"Authorization": f"Bearer {VLLM_API_KEY}",
|
| 344 |
+
"Content-Type": "application/json"
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
# Call API
|
| 348 |
+
st.info(f"Sending request to API (payload size: {payload_size_mb:.2f} MB)...")
|
| 349 |
+
response = requests.post(
|
| 350 |
+
f"{POD_URL}/v1/chat/completions",
|
| 351 |
+
headers=headers,
|
| 352 |
+
json=payload,
|
| 353 |
+
timeout=90
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
if response.status_code == 200:
|
| 357 |
+
result = response.json()
|
| 358 |
+
return result["choices"][0]["message"]["content"]
|
| 359 |
+
else:
|
| 360 |
+
# Show detailed error for debugging
|
| 361 |
+
st.error(f"❌ API Error {response.status_code}")
|
| 362 |
+
try:
|
| 363 |
+
error_detail = response.json()
|
| 364 |
+
st.json(error_detail) # Show as formatted JSON
|
| 365 |
+
except:
|
| 366 |
+
st.code(response.text) # Show raw text
|
| 367 |
+
return None
|
| 368 |
+
|
| 369 |
+
except Exception as e:
|
| 370 |
+
st.error(f"Error calling vLLM: {str(e)}")
|
| 371 |
+
return None
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
# -----------------------------
|
| 375 |
+
# JSON Parser for vLLM Output
|
| 376 |
+
# -----------------------------
|
| 377 |
+
def parse_vllm_json(raw_json_text):
|
| 378 |
+
"""Parse vLLM JSON output into structured format"""
|
| 379 |
+
try:
|
| 380 |
+
data = json.loads(raw_json_text)
|
| 381 |
+
|
| 382 |
+
def clean_amount(value):
|
| 383 |
+
if not value or value == "":
|
| 384 |
+
return 0.0
|
| 385 |
+
return float(re.sub(r'[^\d\.-]', '', str(value)))
|
| 386 |
+
|
| 387 |
+
header = data.get("header", {})
|
| 388 |
+
summary = data.get("summary", {})
|
| 389 |
+
items = data.get("items", [])
|
| 390 |
+
|
| 391 |
+
result = {
|
| 392 |
+
"Invoice Number": header.get("invoice_no", ""),
|
| 393 |
+
"Invoice Date": normalize_date(header.get("invoice_date", "")),
|
| 394 |
+
"Due Date": normalize_date(header.get("due_date", "")),
|
| 395 |
+
"Sender Name": header.get("sender_name", ""),
|
| 396 |
+
"Sender Address": header.get("sender_addr", ""),
|
| 397 |
+
"Sender": {
|
| 398 |
+
"Name": header.get("sender_name", ""),
|
| 399 |
+
"Address": header.get("sender_addr", "")
|
| 400 |
+
},
|
| 401 |
+
"Recipient Name": header.get("rcpt_name", ""),
|
| 402 |
+
"Recipient Address": header.get("rcpt_addr", ""),
|
| 403 |
+
"Recipient": {
|
| 404 |
+
"Name": header.get("rcpt_name", ""),
|
| 405 |
+
"Address": header.get("rcpt_addr", "")
|
| 406 |
+
},
|
| 407 |
+
"Bank Details": {
|
| 408 |
+
"bank_iban": header.get("bank_iban", ""),
|
| 409 |
+
"bank_name": header.get("bank_name", ""),
|
| 410 |
+
"bank_account_number": header.get("bank_acc_no", ""),
|
| 411 |
+
"bank_routing": header.get("bank_routing", ""),
|
| 412 |
+
"bank_swift": header.get("bank_swift", ""),
|
| 413 |
+
"bank_account_holder": header.get("bank_acc_name", ""),
|
| 414 |
+
"bank_branch": header.get("bank_branch", "")
|
| 415 |
+
},
|
| 416 |
+
"Subtotal": clean_amount(summary.get("subtotal", "0")),
|
| 417 |
+
"Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
|
| 418 |
+
"Total Tax": clean_amount(summary.get("tax_amount", "0")),
|
| 419 |
+
"Total Amount": clean_amount(summary.get("total_amount", "0")),
|
| 420 |
+
"Currency": summary.get("currency", ""),
|
| 421 |
+
"Itemized Data": []
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
for item in items:
|
| 425 |
+
# Store raw tax value to distinguish empty ("") from explicit "0" or "0.00"
|
| 426 |
+
raw_tax = item.get("tax", "")
|
| 427 |
+
|
| 428 |
+
result["Itemized Data"].append({
|
| 429 |
+
"Description": item.get("descriptions", ""),
|
| 430 |
+
"SKU": item.get("SKU", ""),
|
| 431 |
+
"Quantity": clean_amount(item.get("quantity", "0")),
|
| 432 |
+
"Unit Price": clean_amount(item.get("unit_price", "0")),
|
| 433 |
+
"Amount": clean_amount(item.get("amount", "0")),
|
| 434 |
+
"Tax": clean_amount(raw_tax),
|
| 435 |
+
"Tax_Raw": raw_tax, # Keep original to distinguish empty vs 0.00
|
| 436 |
+
"Line Total": clean_amount(item.get("Line_total", "0"))
|
| 437 |
+
})
|
| 438 |
+
|
| 439 |
+
return result
|
| 440 |
+
|
| 441 |
+
except Exception as e:
|
| 442 |
+
st.error(f"JSON parse error: {str(e)}")
|
| 443 |
+
return None
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
# -----------------------------
|
| 447 |
+
# Tax Validation Function
|
| 448 |
+
# -----------------------------
|
| 449 |
+
def validate_and_calculate_taxes(structured_data):
|
| 450 |
+
"""
|
| 451 |
+
Enhanced tax validation with smart line-item calculation:
|
| 452 |
+
1. Calculate line-item tax ONLY when line item tax is empty/missing
|
| 453 |
+
2. Skip line items with explicit 0.00 tax (tax-exempt)
|
| 454 |
+
3. Skip validation if tax_amount is 0 but tax_rate exists
|
| 455 |
+
4. Ensure both Tax Percentage and Total Tax are properly filled
|
| 456 |
+
"""
|
| 457 |
+
|
| 458 |
+
subtotal = structured_data.get("Subtotal", 0.0)
|
| 459 |
+
total_amount = structured_data.get("Total Amount", 0.0)
|
| 460 |
+
model_tax_rate = structured_data.get("Tax Percentage", 0.0)
|
| 461 |
+
model_tax_amount = structured_data.get("Total Tax", 0.0)
|
| 462 |
+
items = structured_data.get("Itemized Data", [])
|
| 463 |
+
|
| 464 |
+
# SKIP VALIDATION if: No tax detected (subtotal >= total) OR subtotal is invalid
|
| 465 |
+
if subtotal >= total_amount or subtotal <= 0:
|
| 466 |
+
structured_data["tax_validated"] = False
|
| 467 |
+
structured_data["tax_skip_reason"] = "No tax detected"
|
| 468 |
+
return structured_data
|
| 469 |
+
|
| 470 |
+
# SKIP if tax_rate exists but tax_amount is 0 (incomplete data)
|
| 471 |
+
if model_tax_rate > 0 and model_tax_amount == 0.0:
|
| 472 |
+
structured_data["tax_validated"] = False
|
| 473 |
+
structured_data["tax_skip_reason"] = "Tax rate exists but tax amount is 0"
|
| 474 |
+
return structured_data
|
| 475 |
+
|
| 476 |
+
# Determine authoritative tax rate from available sources
|
| 477 |
+
authoritative_rate = None
|
| 478 |
+
authority_source = None
|
| 479 |
+
|
| 480 |
+
# TEST SOURCE A: tax_rate
|
| 481 |
+
if model_tax_rate > 0:
|
| 482 |
+
expected_tax_from_rate = subtotal * (model_tax_rate / 100)
|
| 483 |
+
expected_total_from_rate = subtotal + expected_tax_from_rate
|
| 484 |
+
error_from_rate = abs(expected_total_from_rate - total_amount)
|
| 485 |
+
else:
|
| 486 |
+
error_from_rate = float('inf')
|
| 487 |
+
|
| 488 |
+
# TEST SOURCE B: tax_amount
|
| 489 |
+
if model_tax_amount > 0:
|
| 490 |
+
calculated_rate_from_amount = (model_tax_amount / subtotal) * 100
|
| 491 |
+
expected_total_from_amount = subtotal + model_tax_amount
|
| 492 |
+
error_from_amount = abs(expected_total_from_amount - total_amount)
|
| 493 |
+
else:
|
| 494 |
+
error_from_amount = float('inf')
|
| 495 |
+
|
| 496 |
+
# PICK WINNER (or use whichever is available)
|
| 497 |
+
if model_tax_rate > 0 or model_tax_amount > 0:
|
| 498 |
+
if error_from_rate < error_from_amount:
|
| 499 |
+
authoritative_rate = round(model_tax_rate, 4)
|
| 500 |
+
authority_source = "tax_rate"
|
| 501 |
+
else:
|
| 502 |
+
authoritative_rate = round(calculated_rate_from_amount, 4)
|
| 503 |
+
authority_source = "tax_amount"
|
| 504 |
+
else:
|
| 505 |
+
# No tax information available
|
| 506 |
+
structured_data["tax_validated"] = False
|
| 507 |
+
structured_data["tax_skip_reason"] = "No tax rate or amount provided"
|
| 508 |
+
return structured_data
|
| 509 |
+
|
| 510 |
+
# APPLY to line items - BUT respect explicit 0.00 values
|
| 511 |
+
calculated_total_tax = 0.0
|
| 512 |
+
|
| 513 |
+
for item in items:
|
| 514 |
+
amount = item.get("Amount", 0.0)
|
| 515 |
+
original_tax = item.get("Tax", 0.0)
|
| 516 |
+
raw_tax_value = item.get("Tax_Raw", "") # Original string value from JSON
|
| 517 |
+
|
| 518 |
+
# If item amount is 0, keep tax at 0
|
| 519 |
+
if amount == 0.0:
|
| 520 |
+
item["Tax"] = 0.0
|
| 521 |
+
item["Line Total"] = 0.0
|
| 522 |
+
continue
|
| 523 |
+
|
| 524 |
+
# Distinguish between empty ("") and explicit "0" or "0.00"
|
| 525 |
+
# Empty string means tax was not provided - we should calculate it
|
| 526 |
+
# "0", "0.0", "0.00" means explicitly tax-exempt - skip calculation
|
| 527 |
+
|
| 528 |
+
is_explicitly_zero = False
|
| 529 |
+
if isinstance(raw_tax_value, str):
|
| 530 |
+
cleaned = raw_tax_value.strip()
|
| 531 |
+
# Check if it's explicitly set to some form of zero
|
| 532 |
+
if cleaned != "" and float(re.sub(r'[^\d\.-]', '', cleaned) or '0') == 0.0:
|
| 533 |
+
is_explicitly_zero = True
|
| 534 |
+
elif raw_tax_value == 0 or raw_tax_value == 0.0:
|
| 535 |
+
# If it's a number 0, treat as explicit
|
| 536 |
+
is_explicitly_zero = True
|
| 537 |
+
|
| 538 |
+
# If explicitly 0.00 - tax-exempt item, don't calculate
|
| 539 |
+
if is_explicitly_zero and original_tax == 0.0:
|
| 540 |
+
item["Tax"] = 0.0
|
| 541 |
+
item["Line Total"] = amount
|
| 542 |
+
calculated_total_tax += 0.0
|
| 543 |
+
continue
|
| 544 |
+
|
| 545 |
+
# Calculate tax for this line item
|
| 546 |
+
# Either: (1) Tax was empty/missing, or (2) Tax has a value that needs recalculation
|
| 547 |
+
corrected_tax = round(amount * (authoritative_rate / 100), 2)
|
| 548 |
+
item["Tax"] = corrected_tax
|
| 549 |
+
calculated_total_tax += corrected_tax
|
| 550 |
+
item["Line Total"] = round(amount + corrected_tax, 2)
|
| 551 |
+
|
| 552 |
+
# Update summary - ENSURE BOTH FIELDS ARE FILLED
|
| 553 |
+
structured_data["Tax Percentage"] = authoritative_rate
|
| 554 |
+
structured_data["Total Tax"] = round(calculated_total_tax, 2)
|
| 555 |
+
structured_data["Total Amount"] = round(subtotal + calculated_total_tax, 2)
|
| 556 |
+
structured_data["tax_validated"] = True
|
| 557 |
+
structured_data["tax_authority_source"] = authority_source
|
| 558 |
+
structured_data["original_tax_rate"] = model_tax_rate
|
| 559 |
+
structured_data["original_tax_amount"] = model_tax_amount
|
| 560 |
+
|
| 561 |
+
return structured_data
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
# -----------------------------
|
| 565 |
+
# ORIGINAL (previous) mapping logic — restored verbatim
|
| 566 |
+
# -----------------------------
|
| 567 |
+
def map_prediction_to_ui(pred):
|
| 568 |
+
import json, re
|
| 569 |
+
from collections import defaultdict
|
| 570 |
+
|
| 571 |
+
def safe_json_load(s):
|
| 572 |
+
if s is None:
|
| 573 |
+
return None
|
| 574 |
+
if isinstance(s, (dict, list)):
|
| 575 |
+
return s
|
| 576 |
+
if isinstance(s, str):
|
| 577 |
+
s = s.strip()
|
| 578 |
+
if s == "":
|
| 579 |
+
return None
|
| 580 |
+
try:
|
| 581 |
+
return json.loads(s)
|
| 582 |
+
except Exception:
|
| 583 |
+
subs = []
|
| 584 |
+
stack = []
|
| 585 |
+
start = None
|
| 586 |
+
for i, ch in enumerate(s):
|
| 587 |
+
if ch == "{":
|
| 588 |
+
if not stack:
|
| 589 |
+
start = i
|
| 590 |
+
stack.append("{")
|
| 591 |
+
elif ch == "}":
|
| 592 |
+
if stack:
|
| 593 |
+
stack.pop()
|
| 594 |
+
if not stack and start is not None:
|
| 595 |
+
subs.append(s[start:i+1])
|
| 596 |
+
start = None
|
| 597 |
+
for sub in subs:
|
| 598 |
+
try:
|
| 599 |
+
return json.loads(sub)
|
| 600 |
+
except Exception:
|
| 601 |
+
continue
|
| 602 |
+
return None
|
| 603 |
+
|
| 604 |
+
def clean_number(x):
|
| 605 |
+
if x is None:
|
| 606 |
+
return 0.0
|
| 607 |
+
if isinstance(x, (int, float)):
|
| 608 |
+
return float(x)
|
| 609 |
+
s = str(x).strip()
|
| 610 |
+
if s == "":
|
| 611 |
+
return 0.0
|
| 612 |
+
s = re.sub(r"[,\s]", "", s)
|
| 613 |
+
s = re.sub(r"[^\d\.\-]", "", s)
|
| 614 |
+
if s in ("", ".", "-", "-."):
|
| 615 |
+
return 0.0
|
| 616 |
+
try:
|
| 617 |
+
return float(s)
|
| 618 |
+
except Exception:
|
| 619 |
+
return 0.0
|
| 620 |
+
|
| 621 |
+
def collect_keys(obj, out):
|
| 622 |
+
if isinstance(obj, dict):
|
| 623 |
+
for k, v in obj.items():
|
| 624 |
+
lk = str(k).strip().lower()
|
| 625 |
+
out[lk].append(v)
|
| 626 |
+
collect_keys(v, out)
|
| 627 |
+
elif isinstance(obj, list):
|
| 628 |
+
for it in obj:
|
| 629 |
+
collect_keys(it, out)
|
| 630 |
+
|
| 631 |
+
def collect_lists_of_dicts(obj, out_lists):
|
| 632 |
+
if isinstance(obj, dict):
|
| 633 |
+
for v in obj.values():
|
| 634 |
+
if isinstance(v, list) and v and isinstance(v[0], dict):
|
| 635 |
+
out_lists.append(v)
|
| 636 |
+
else:
|
| 637 |
+
collect_lists_of_dicts(v, out_lists)
|
| 638 |
+
elif isinstance(obj, list):
|
| 639 |
+
for it in obj:
|
| 640 |
+
if isinstance(it, list) and it and isinstance(it[0], dict):
|
| 641 |
+
out_lists.append(it)
|
| 642 |
+
else:
|
| 643 |
+
collect_lists_of_dicts(it, out_lists)
|
| 644 |
+
|
| 645 |
+
def map_item_dict(it):
|
| 646 |
+
if not isinstance(it, dict):
|
| 647 |
+
return None
|
| 648 |
+
lower = {str(k).strip().lower(): v for k, v in it.items()}
|
| 649 |
+
desc = (lower.get("descriptions") or lower.get("description") or lower.get("desc") or lower.get("item") or "")
|
| 650 |
+
qty = lower.get("quantity") or lower.get("qty") or lower.get("count") or ""
|
| 651 |
+
unit_price = lower.get("unit_price") or lower.get("price") or ""
|
| 652 |
+
amount = lower.get("amount") or lower.get("line_total") or lower.get("line total") or lower.get("total") or ""
|
| 653 |
+
tax = lower.get("tax") or lower.get("tax_amount") or ""
|
| 654 |
+
line_total = lower.get("line_total") or lower.get("line_total".lower()) or lower.get("line total") or amount
|
| 655 |
+
|
| 656 |
+
return {
|
| 657 |
+
"Description": str(desc).strip(),
|
| 658 |
+
"Quantity": float(clean_number(qty)),
|
| 659 |
+
"Unit Price": float(clean_number(unit_price)),
|
| 660 |
+
"Amount": float(clean_number(amount)),
|
| 661 |
+
"Tax": float(clean_number(tax)),
|
| 662 |
+
"Line Total": float(clean_number(line_total))
|
| 663 |
+
}
|
| 664 |
+
|
| 665 |
+
parsed = safe_json_load(pred) if isinstance(pred, str) else pred
|
| 666 |
+
if parsed is None and isinstance(pred, str):
|
| 667 |
+
parsed = None
|
| 668 |
+
|
| 669 |
+
if parsed is None and not isinstance(pred, dict):
|
| 670 |
+
parsed = pred
|
| 671 |
+
|
| 672 |
+
ui = {
|
| 673 |
+
"Invoice Number": "",
|
| 674 |
+
"Invoice Date": "",
|
| 675 |
+
"Due Date": "",
|
| 676 |
+
"Currency": "",
|
| 677 |
+
"Subtotal": 0.0,
|
| 678 |
+
"Tax Percentage": 0.0,
|
| 679 |
+
"Total Tax": 0.0,
|
| 680 |
+
"Total Amount": 0.0,
|
| 681 |
+
"Sender": {"Name": "", "Address": ""},
|
| 682 |
+
"Recipient": {"Name": "", "Address": ""},
|
| 683 |
+
"Sender Name": "",
|
| 684 |
+
"Sender Address": "",
|
| 685 |
+
"Recipient Name": "",
|
| 686 |
+
"Recipient Address": "",
|
| 687 |
+
"Bank Details": {},
|
| 688 |
+
"Itemized Data": []
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
key_map = defaultdict(list)
|
| 692 |
+
list_candidates = []
|
| 693 |
+
if isinstance(parsed, dict):
|
| 694 |
+
collect_keys(parsed, key_map)
|
| 695 |
+
collect_lists_of_dicts(parsed, list_candidates)
|
| 696 |
+
elif isinstance(pred, dict):
|
| 697 |
+
collect_keys(pred, key_map)
|
| 698 |
+
collect_lists_of_dicts(pred, list_candidates)
|
| 699 |
+
|
| 700 |
+
def pick_first(*candidate_keys):
|
| 701 |
+
for k in candidate_keys:
|
| 702 |
+
lk = k.strip().lower()
|
| 703 |
+
if lk in key_map:
|
| 704 |
+
for v in key_map[lk]:
|
| 705 |
+
if v is None:
|
| 706 |
+
continue
|
| 707 |
+
if isinstance(v, (dict, list)):
|
| 708 |
+
return v
|
| 709 |
+
s = str(v).strip()
|
| 710 |
+
if s != "":
|
| 711 |
+
return s
|
| 712 |
+
return None
|
| 713 |
+
|
| 714 |
+
ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
|
| 715 |
+
ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "")
|
| 716 |
+
ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "")
|
| 717 |
+
ui["Sender Name"] = pick_first("sender_name", "sender") or ""
|
| 718 |
+
ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
|
| 719 |
+
ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
|
| 720 |
+
ui["Recipient Address"] = pick_first("rcpt_addr", "recipient_address", "recipient addr") or ""
|
| 721 |
+
|
| 722 |
+
bank = {}
|
| 723 |
+
for bk in ("bank_name", "bank_acc_no", "bank_account_number", "bank_acc_name", "bank_iban", "bank_swift", "bank_routing", "bank_branch", "iban"):
|
| 724 |
+
val = pick_first(bk, bk.replace("bank_", ""))
|
| 725 |
+
if val:
|
| 726 |
+
if bk == "iban":
|
| 727 |
+
bank["bank_iban"] = str(val)
|
| 728 |
+
else:
|
| 729 |
+
bank[bk if bk != "bank_acc_no" else "bank_account_number"] = str(val)
|
| 730 |
+
ui["Bank Details"] = bank
|
| 731 |
+
|
| 732 |
+
ui["Subtotal"] = clean_number(pick_first("subtotal", "sub_total", "sub total") or 0.0)
|
| 733 |
+
ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
|
| 734 |
+
ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
|
| 735 |
+
ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
|
| 736 |
+
ui["Currency"] = (pick_first("currency") or "").strip()
|
| 737 |
+
|
| 738 |
+
items_rows = []
|
| 739 |
+
|
| 740 |
+
def list_looks_like_items(lst):
|
| 741 |
+
if not isinstance(lst, list) or not lst:
|
| 742 |
+
return False
|
| 743 |
+
if not isinstance(lst[0], dict):
|
| 744 |
+
return False
|
| 745 |
+
expected = {"descriptions", "description", "desc", "item", "quantity", "qty", "amount", "unit_price", "line_total", "line_total".lower(), "line_total"}
|
| 746 |
+
keys0 = {str(k).strip().lower() for k in lst[0].keys()}
|
| 747 |
+
return bool(expected.intersection(keys0))
|
| 748 |
+
|
| 749 |
+
for cand in list_candidates:
|
| 750 |
+
if list_looks_like_items(cand):
|
| 751 |
+
for it in cand:
|
| 752 |
+
row = map_item_dict(it)
|
| 753 |
+
if row is not None:
|
| 754 |
+
items_rows.append(row)
|
| 755 |
+
if items_rows:
|
| 756 |
+
break
|
| 757 |
+
|
| 758 |
+
if not items_rows:
|
| 759 |
+
single_candidate_keys = {k.strip().lower() for k in (parsed.keys() if isinstance(parsed, dict) else [])} if isinstance(parsed, dict) else set()
|
| 760 |
+
item_like_keys = {"descriptions", "description", "desc", "item", "quantity", "qty", "unit_price", "unit price", "price", "amount", "line_total", "line total", "line_total", "line_total".lower(), "sku", "tax", "tax_amount"}
|
| 761 |
+
if single_candidate_keys and single_candidate_keys.intersection(item_like_keys):
|
| 762 |
+
single_row = map_item_dict(parsed)
|
| 763 |
+
if single_row is not None:
|
| 764 |
+
items_rows.append(single_row)
|
| 765 |
+
|
| 766 |
+
if not items_rows:
|
| 767 |
+
for k, vals in key_map.items():
|
| 768 |
+
for v in vals:
|
| 769 |
+
if isinstance(v, dict):
|
| 770 |
+
lower_keys = {str(x).strip().lower() for x in v.keys()}
|
| 771 |
+
if lower_keys.intersection({"descriptions", "description", "desc", "amount", "line_total", "quantity", "qty", "unit_price"}):
|
| 772 |
+
row = map_item_dict(v)
|
| 773 |
+
if row is not None:
|
| 774 |
+
items_rows.append(row)
|
| 775 |
+
|
| 776 |
+
if not items_rows:
|
| 777 |
+
desc = pick_first("descriptions", "description")
|
| 778 |
+
amt = pick_first("amount", "line_total")
|
| 779 |
+
qty = pick_first("quantity", "qty")
|
| 780 |
+
unit_price = pick_first("unit_price", "price")
|
| 781 |
+
if desc or amt or qty or unit_price:
|
| 782 |
+
items_rows.append({
|
| 783 |
+
"Description": str(desc or ""),
|
| 784 |
+
"Quantity": float(clean_number(qty)),
|
| 785 |
+
"Unit Price": float(clean_number(unit_price)),
|
| 786 |
+
"Amount": float(clean_number(amt)),
|
| 787 |
+
"Tax": float(clean_number(pick_first("tax", "tax_amount") or 0.0)),
|
| 788 |
+
"Line Total": float(clean_number(amt or 0.0))
|
| 789 |
+
})
|
| 790 |
+
|
| 791 |
+
ui["Itemized Data"] = items_rows
|
| 792 |
+
ui["Sender"] = {"Name": ui["Sender Name"], "Address": ui["Sender Address"]}
|
| 793 |
+
ui["Recipient"] = {"Name": ui["Recipient Name"], "Address": ui["Recipient Address"]}
|
| 794 |
+
|
| 795 |
+
return ui
|
| 796 |
+
|
| 797 |
+
def flatten_invoice_to_rows(invoice_data) -> list:
|
| 798 |
+
EXPECTED_BANK_FIELDS = [
|
| 799 |
+
"bank_name",
|
| 800 |
+
"bank_account_number",
|
| 801 |
+
"bank_acc_name",
|
| 802 |
+
"bank_iban",
|
| 803 |
+
"bank_swift",
|
| 804 |
+
"bank_routing",
|
| 805 |
+
"bank_branch"
|
| 806 |
+
]
|
| 807 |
+
|
| 808 |
+
# Helper to format text fields (empty -> NA)
|
| 809 |
+
def format_text_field(value):
|
| 810 |
+
if value is None or str(value).strip() == "":
|
| 811 |
+
return "NA"
|
| 812 |
+
return str(value).strip()
|
| 813 |
+
|
| 814 |
+
# Helper to format amount fields (empty -> 0)
|
| 815 |
+
def format_amount_field(value):
|
| 816 |
+
if value is None or value == "" or (isinstance(value, str) and value.strip() == ""):
|
| 817 |
+
return 0
|
| 818 |
+
try:
|
| 819 |
+
return float(value)
|
| 820 |
+
except (ValueError, TypeError):
|
| 821 |
+
return 0
|
| 822 |
+
|
| 823 |
+
rows = []
|
| 824 |
+
invoice_data = invoice_data or {}
|
| 825 |
+
line_items = invoice_data.get("Itemized Data", []) or []
|
| 826 |
+
|
| 827 |
+
bank_details = {}
|
| 828 |
+
nested = invoice_data.get("Bank Details", {}) or {}
|
| 829 |
+
if isinstance(nested, dict):
|
| 830 |
+
for k, v in nested.items():
|
| 831 |
+
key_name = k if str(k).startswith("bank_") else f"bank_{k}"
|
| 832 |
+
bank_details[key_name] = v
|
| 833 |
+
|
| 834 |
+
for k, v in invoice_data.items():
|
| 835 |
+
if isinstance(k, str) and k.lower().startswith("bank_"):
|
| 836 |
+
bank_details[k] = v
|
| 837 |
+
|
| 838 |
+
for f in EXPECTED_BANK_FIELDS:
|
| 839 |
+
bank_details.setdefault(f, "")
|
| 840 |
+
|
| 841 |
+
def base_invoice_info():
|
| 842 |
+
return {
|
| 843 |
+
"Invoice Number": format_text_field(invoice_data.get("Invoice Number", "")),
|
| 844 |
+
"Invoice Date": format_text_field(invoice_data.get("Invoice Date", "")),
|
| 845 |
+
"Due Date": format_text_field(invoice_data.get("Due Date", "")),
|
| 846 |
+
"Currency": format_text_field(invoice_data.get("Currency", "")),
|
| 847 |
+
"Subtotal": format_amount_field(invoice_data.get("Subtotal", 0.0)),
|
| 848 |
+
"Tax Percentage": format_amount_field(invoice_data.get("Tax Percentage", 0.0)),
|
| 849 |
+
"Total Tax": format_amount_field(invoice_data.get("Total Tax", 0.0)),
|
| 850 |
+
"Total Amount": format_amount_field(invoice_data.get("Total Amount", 0.0)),
|
| 851 |
+
"Sender Name": format_text_field(invoice_data.get("Sender Name", "") or (invoice_data.get("Sender",{}) or {}).get("Name","")),
|
| 852 |
+
"Sender Address": format_text_field(invoice_data.get("Sender Address", "") or (invoice_data.get("Sender",{}) or {}).get("Address","")),
|
| 853 |
+
"Recipient Name": format_text_field(invoice_data.get("Recipient Name", "") or (invoice_data.get("Recipient",{}) or {}).get("Name","")),
|
| 854 |
+
"Recipient Address": format_text_field(invoice_data.get("Recipient Address", "") or (invoice_data.get("Recipient",{}) or {}).get("Address","")),
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
if not line_items:
|
| 858 |
+
row = base_invoice_info()
|
| 859 |
+
for k in EXPECTED_BANK_FIELDS:
|
| 860 |
+
row[k] = format_text_field(bank_details.get(k, ""))
|
| 861 |
+
row.update({
|
| 862 |
+
"Item Description": "NA",
|
| 863 |
+
"Item Quantity": 0,
|
| 864 |
+
"Item Unit Price": 0.0,
|
| 865 |
+
"Item Amount": 0.0,
|
| 866 |
+
"Item Tax": 0.0,
|
| 867 |
+
"Item Line Total": 0.0,
|
| 868 |
+
})
|
| 869 |
+
rows.append(row)
|
| 870 |
+
return rows
|
| 871 |
+
|
| 872 |
+
for item in line_items:
|
| 873 |
+
row = base_invoice_info()
|
| 874 |
+
for k in EXPECTED_BANK_FIELDS:
|
| 875 |
+
row[k] = format_text_field(bank_details.get(k, ""))
|
| 876 |
+
row.update({
|
| 877 |
+
"Item Description": format_text_field(item.get("Description", "") if isinstance(item, dict) else ""),
|
| 878 |
+
"Item Quantity": format_amount_field(item.get("Quantity", 0) if isinstance(item, dict) else 0),
|
| 879 |
+
"Item Unit Price": format_amount_field(item.get("Unit Price", 0.0) if isinstance(item, dict) else 0.0),
|
| 880 |
+
"Item Amount": format_amount_field(item.get("Amount", 0.0) if isinstance(item, dict) else 0.0),
|
| 881 |
+
"Item Tax": format_amount_field(item.get("Tax", 0.0) if isinstance(item, dict) else 0.0),
|
| 882 |
+
"Item Line Total": format_amount_field(item.get("Line Total", item.get("Amount", 0.0)) if isinstance(item, dict) else 0.0),
|
| 883 |
+
})
|
| 884 |
+
rows.append(row)
|
| 885 |
+
return rows
|
| 886 |
+
|
| 887 |
+
# -----------------------------
|
| 888 |
+
# Load model (COMMENTED OUT - Now using vLLM API)
|
| 889 |
+
# -----------------------------
|
| 890 |
+
# try:
|
| 891 |
+
# with st.spinner("Loading model & processor (cached) ..."):
|
| 892 |
+
# processor, model, device, decoder_input_ids = load_model_and_processor(HF_MODEL_ID, TASK_PROMPT)
|
| 893 |
+
# except Exception as e:
|
| 894 |
+
# st.error("Could not load model automatically. See details below.")
|
| 895 |
+
# st.exception(e)
|
| 896 |
+
# st.stop()
|
| 897 |
+
|
| 898 |
+
# -----------------------------
|
| 899 |
+
# Session scaffolding
|
| 900 |
+
# -----------------------------
|
| 901 |
+
if "batch_results" not in st.session_state:
|
| 902 |
+
st.session_state.batch_results = {}
|
| 903 |
+
if "current_file_hash" not in st.session_state:
|
| 904 |
+
st.session_state.current_file_hash = None
|
| 905 |
+
if "is_processing_batch" not in st.session_state:
|
| 906 |
+
st.session_state.is_processing_batch = False
|
| 907 |
+
|
| 908 |
+
# -----------------------------
|
| 909 |
+
# Pre-mount two-column skeleton to avoid layout jump
|
| 910 |
+
# -----------------------------
|
| 911 |
+
frame_left, frame_right = st.columns([1, 1], vertical_alignment="top")
|
| 912 |
+
|
| 913 |
+
# -----------------------------
|
| 914 |
+
# Upload / Process
|
| 915 |
+
# -----------------------------
|
| 916 |
+
if not st.session_state.is_processing_batch and len(st.session_state.batch_results) == 0:
|
| 917 |
+
with frame_left:
|
| 918 |
+
st.header("📤 Upload Invoices")
|
| 919 |
+
uploaded_files = st.file_uploader(
|
| 920 |
+
"Upload invoice images (png/jpg/jpeg/pdf)",
|
| 921 |
+
type=["png", "jpg", "jpeg", "pdf"],
|
| 922 |
+
accept_multiple_files=True
|
| 923 |
+
)
|
| 924 |
+
|
| 925 |
+
if uploaded_files:
|
| 926 |
+
st.session_state.is_processing_batch = True
|
| 927 |
+
progress_bar = st.progress(0)
|
| 928 |
+
status_text = st.empty()
|
| 929 |
+
|
| 930 |
+
for idx, uploaded_file in enumerate(uploaded_files):
|
| 931 |
+
status_text.text(f"Processing {idx+1}/{len(uploaded_files)}: {uploaded_file.name}")
|
| 932 |
+
uploaded_bytes = uploaded_file.read()
|
| 933 |
+
file_hash = hashlib.sha256(uploaded_bytes).hexdigest()
|
| 934 |
+
|
| 935 |
+
if file_hash in st.session_state.batch_results:
|
| 936 |
+
progress_bar.progress((idx + 1) / len(uploaded_files))
|
| 937 |
+
continue
|
| 938 |
+
|
| 939 |
+
# Load image (first page for PDFs)
|
| 940 |
+
image = None
|
| 941 |
+
is_pdf = uploaded_file.name.lower().endswith('.pdf') or (hasattr(uploaded_file, 'type') and uploaded_file.type == 'application/pdf')
|
| 942 |
+
if is_pdf:
|
| 943 |
+
if convert_from_bytes is None:
|
| 944 |
+
st.warning(f"PDF {uploaded_file.name} could not be rendered (pdf2image/poppler missing).")
|
| 945 |
+
continue
|
| 946 |
+
try:
|
| 947 |
+
pages = convert_from_bytes(uploaded_bytes, dpi=200)
|
| 948 |
+
if len(pages) > 0:
|
| 949 |
+
image = pages[0].convert("RGB")
|
| 950 |
+
else:
|
| 951 |
+
st.warning(f"PDF {uploaded_file.name} has no pages.")
|
| 952 |
+
continue
|
| 953 |
+
except Exception:
|
| 954 |
+
st.warning(f"Could not render PDF {uploaded_file.name}. Ensure 'pdf2image' and poppler are installed.")
|
| 955 |
+
continue
|
| 956 |
+
else:
|
| 957 |
+
try:
|
| 958 |
+
image = Image.open(BytesIO(uploaded_bytes)).convert("RGB")
|
| 959 |
+
except Exception:
|
| 960 |
+
st.warning(f"Failed to open {uploaded_file.name}.")
|
| 961 |
+
continue
|
| 962 |
+
|
| 963 |
+
if image is None:
|
| 964 |
+
continue
|
| 965 |
+
|
| 966 |
+
# vLLM Inference + parsing + tax validation
|
| 967 |
+
try:
|
| 968 |
+
# Call vLLM API
|
| 969 |
+
raw_json = run_inference_vllm(image)
|
| 970 |
+
|
| 971 |
+
if raw_json:
|
| 972 |
+
# Parse JSON response
|
| 973 |
+
parsed_data = parse_vllm_json(raw_json)
|
| 974 |
+
|
| 975 |
+
if parsed_data:
|
| 976 |
+
# Apply tax validation
|
| 977 |
+
mapped = validate_and_calculate_taxes(parsed_data)
|
| 978 |
+
else:
|
| 979 |
+
st.warning(f"Failed to parse JSON for {uploaded_file.name}")
|
| 980 |
+
mapped = {}
|
| 981 |
+
else:
|
| 982 |
+
st.warning(f"No response from vLLM for {uploaded_file.name}")
|
| 983 |
+
mapped = {}
|
| 984 |
+
|
| 985 |
+
pred = raw_json # Store raw JSON for debugging
|
| 986 |
+
except Exception as e:
|
| 987 |
+
st.warning(f"Error processing {uploaded_file.name}: {str(e)}")
|
| 988 |
+
pred = None
|
| 989 |
+
mapped = {}
|
| 990 |
+
|
| 991 |
+
safe_mapped = mapped if isinstance(mapped, dict) else {}
|
| 992 |
+
|
| 993 |
+
st.session_state.batch_results[file_hash] = {
|
| 994 |
+
"file_name": uploaded_file.name,
|
| 995 |
+
"image": image,
|
| 996 |
+
"raw_pred": pred,
|
| 997 |
+
"mapped_data": safe_mapped,
|
| 998 |
+
"edited_data": safe_mapped.copy()
|
| 999 |
+
}
|
| 1000 |
+
|
| 1001 |
+
progress_bar.progress((idx + 1) / len(uploaded_files))
|
| 1002 |
+
|
| 1003 |
+
status_text.text("✅ All files processed!")
|
| 1004 |
+
st.session_state.is_processing_batch = False
|
| 1005 |
+
st.rerun()
|
| 1006 |
+
|
| 1007 |
+
with frame_right:
|
| 1008 |
+
st.caption("Preview & editor will appear here after extraction.")
|
| 1009 |
+
|
| 1010 |
+
elif len(st.session_state.batch_results) > 0:
|
| 1011 |
+
|
| 1012 |
+
# --------- Top row: All-results download + Back button ----------
|
| 1013 |
+
with frame_left:
|
| 1014 |
+
all_rows = []
|
| 1015 |
+
for file_hash, result in st.session_state.batch_results.items():
|
| 1016 |
+
rows = flatten_invoice_to_rows(result["edited_data"])
|
| 1017 |
+
for r in rows:
|
| 1018 |
+
r["Source File"] = result.get("file_name", file_hash)
|
| 1019 |
+
all_rows.extend(rows)
|
| 1020 |
+
|
| 1021 |
+
if all_rows:
|
| 1022 |
+
full_df = pd.DataFrame(all_rows)
|
| 1023 |
+
cols = list(full_df.columns)
|
| 1024 |
+
if "Source File" in cols:
|
| 1025 |
+
cols = ["Source File"] + [c for c in cols if c != "Source File"]
|
| 1026 |
+
full_df = full_df[cols]
|
| 1027 |
+
csv_bytes = full_df.to_csv(index=False).encode("utf-8")
|
| 1028 |
+
st.download_button("📦 Download All Results (CSV)", csv_bytes,
|
| 1029 |
+
file_name="all_extracted_invoices.csv", mime="text/csv", key="download_all_csv")
|
| 1030 |
+
|
| 1031 |
+
with frame_right:
|
| 1032 |
+
if st.button("⬅️ Back to Upload"):
|
| 1033 |
+
st.session_state.batch_results.clear()
|
| 1034 |
+
st.session_state.current_file_hash = None
|
| 1035 |
+
st.session_state.is_processing_batch = False
|
| 1036 |
+
st.rerun()
|
| 1037 |
+
|
| 1038 |
+
# --------- Selector ----------
|
| 1039 |
+
with frame_left:
|
| 1040 |
+
file_options = {f"{v['file_name']} ({k[:6]})": k for k, v in st.session_state.batch_results.items()}
|
| 1041 |
+
selected_display = st.selectbox("Select invoice to view/edit:", options=list(file_options.keys()), index=0, key="file_selector")
|
| 1042 |
+
selected_hash = file_options[selected_display]
|
| 1043 |
+
if st.session_state.current_file_hash != selected_hash:
|
| 1044 |
+
st.session_state.current_file_hash = selected_hash
|
| 1045 |
+
|
| 1046 |
+
current = st.session_state.batch_results[selected_hash]
|
| 1047 |
+
image = current["image"]
|
| 1048 |
+
form_data = current["edited_data"]
|
| 1049 |
+
|
| 1050 |
+
# --------- Initialize widget state - FORCE UPDATE from form_data ----------
|
| 1051 |
+
bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
|
| 1052 |
+
|
| 1053 |
+
# Always update state from form_data (don't use ensure_state which only sets if not exists)
|
| 1054 |
+
st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
|
| 1055 |
+
|
| 1056 |
+
# Parse dates to date objects for date_input widgets
|
| 1057 |
+
invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''))
|
| 1058 |
+
due_date_obj = parse_date_to_object(form_data.get('Due Date', ''))
|
| 1059 |
+
st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
|
| 1060 |
+
st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
|
| 1061 |
+
|
| 1062 |
+
st.session_state[f"Currency_{selected_hash}"] = form_data.get('Currency', 'USD') or 'USD'
|
| 1063 |
+
st.session_state[f"Currency_Custom_{selected_hash}"] = form_data.get('Currency', '') if form_data.get('Currency') not in ['USD','EUR','GBP','INR'] else ''
|
| 1064 |
+
st.session_state[f"Subtotal_{selected_hash}"] = float(form_data.get('Subtotal', 0.0))
|
| 1065 |
+
st.session_state[f"Tax Percentage_{selected_hash}"] = float(form_data.get('Tax Percentage', 0.0))
|
| 1066 |
+
st.session_state[f"Total Tax_{selected_hash}"] = float(form_data.get('Total Tax', 0.0))
|
| 1067 |
+
st.session_state[f"Total Amount_{selected_hash}"] = float(form_data.get('Total Amount', 0.0))
|
| 1068 |
+
st.session_state[f"Sender Name_{selected_hash}"] = form_data.get('Sender Name', '')
|
| 1069 |
+
st.session_state[f"Sender Address_{selected_hash}"] = form_data.get('Sender Address', '')
|
| 1070 |
+
st.session_state[f"Recipient Name_{selected_hash}"] = form_data.get('Recipient Name', '')
|
| 1071 |
+
st.session_state[f"Recipient Address_{selected_hash}"] = form_data.get('Recipient Address', '')
|
| 1072 |
+
st.session_state[f"Bank_bank_name_{selected_hash}"] = bank.get('bank_name', '')
|
| 1073 |
+
st.session_state[f"Bank_bank_account_number_{selected_hash}"] = bank.get('bank_account_number', '') or bank.get('bank_acc_no', '')
|
| 1074 |
+
st.session_state[f"Bank_bank_acc_name_{selected_hash}"] = bank.get('bank_acc_name', '')
|
| 1075 |
+
st.session_state[f"Bank_bank_iban_{selected_hash}"] = bank.get('bank_iban', '')
|
| 1076 |
+
st.session_state[f"Bank_bank_swift_{selected_hash}"] = bank.get('bank_swift', '')
|
| 1077 |
+
st.session_state[f"Bank_bank_routing_{selected_hash}"] = bank.get('bank_routing', '')
|
| 1078 |
+
st.session_state[f"Bank_bank_branch_{selected_hash}"] = bank.get('bank_branch', '')
|
| 1079 |
+
|
| 1080 |
+
# --------- Display (no wobble) ----------
|
| 1081 |
+
with frame_left:
|
| 1082 |
+
st.image(image, caption=current["file_name"], width=FIXED_IMG_WIDTH)
|
| 1083 |
+
st.write(f"**File Hash:** {selected_hash[:8]}...")
|
| 1084 |
+
if current.get('raw_pred') is not None:
|
| 1085 |
+
with st.expander("🔍 Show raw model output"):
|
| 1086 |
+
st.json(current['raw_pred'])
|
| 1087 |
+
|
| 1088 |
+
if st.button("🔁 Re-Run Inference", key=f"rerun_{selected_hash}"):
|
| 1089 |
+
with st.spinner("Re-running inference..."):
|
| 1090 |
+
try:
|
| 1091 |
+
# Call vLLM API
|
| 1092 |
+
raw_json = run_inference_vllm(image)
|
| 1093 |
+
|
| 1094 |
+
if raw_json:
|
| 1095 |
+
# Parse JSON response
|
| 1096 |
+
parsed_data = parse_vllm_json(raw_json)
|
| 1097 |
+
|
| 1098 |
+
if parsed_data:
|
| 1099 |
+
# Apply tax validation
|
| 1100 |
+
mapped = validate_and_calculate_taxes(parsed_data)
|
| 1101 |
+
else:
|
| 1102 |
+
st.error("Failed to parse JSON response")
|
| 1103 |
+
mapped = {}
|
| 1104 |
+
else:
|
| 1105 |
+
st.error("No response from vLLM")
|
| 1106 |
+
mapped = {}
|
| 1107 |
+
|
| 1108 |
+
safe_mapped = mapped if isinstance(mapped, dict) else {}
|
| 1109 |
+
pred = raw_json # Store raw JSON
|
| 1110 |
+
|
| 1111 |
+
# Update stored results
|
| 1112 |
+
st.session_state.batch_results[selected_hash]["raw_pred"] = pred
|
| 1113 |
+
st.session_state.batch_results[selected_hash]["mapped_data"] = mapped
|
| 1114 |
+
st.session_state.batch_results[selected_hash]["edited_data"] = safe_mapped.copy()
|
| 1115 |
+
|
| 1116 |
+
# Clear widget state for this file so defaults refresh from new mapped data
|
| 1117 |
+
for key in [k for k in st.session_state.keys() if k.endswith(f"_{selected_hash}")]:
|
| 1118 |
+
del st.session_state[key]
|
| 1119 |
+
|
| 1120 |
+
st.success("✅ Re-run complete")
|
| 1121 |
+
st.rerun()
|
| 1122 |
+
except Exception as e:
|
| 1123 |
+
st.error(f"Re-run failed: {e}")
|
| 1124 |
+
|
| 1125 |
+
with frame_right:
|
| 1126 |
+
st.subheader(f"Editable Invoice: {current['file_name']}")
|
| 1127 |
+
|
| 1128 |
+
# Quick swap outside the form (one clean rerun)
|
| 1129 |
+
swap_cols = st.columns([1,1,2])
|
| 1130 |
+
with swap_cols[0]:
|
| 1131 |
+
if st.button("⇄ Swap Sender ↔ Recipient", key=f"swap_{selected_hash}"):
|
| 1132 |
+
sn = f"Sender Name_{selected_hash}"
|
| 1133 |
+
rn = f"Recipient Name_{selected_hash}"
|
| 1134 |
+
sa = f"Sender Address_{selected_hash}"
|
| 1135 |
+
ra = f"Recipient Address_{selected_hash}"
|
| 1136 |
+
st.session_state[sn], st.session_state[rn] = st.session_state[rn], st.session_state[sn]
|
| 1137 |
+
st.session_state[sa], st.session_state[ra] = st.session_state[ra], st.session_state[sa]
|
| 1138 |
+
st.rerun()
|
| 1139 |
+
|
| 1140 |
+
# ----------------- FORM START -----------------
|
| 1141 |
+
with st.form(key=f"edit_form_{selected_hash}", clear_on_submit=False):
|
| 1142 |
+
tabs = st.tabs(["Invoice Details", "Sender/Recipient", "Bank Details", "Line Items"])
|
| 1143 |
+
|
| 1144 |
+
with tabs[0]:
|
| 1145 |
+
st.text_input("Invoice Number", key=f"Invoice Number_{selected_hash}")
|
| 1146 |
+
st.date_input("Invoice Date", key=f"Invoice Date_{selected_hash}", format="DD/MM/YYYY")
|
| 1147 |
+
st.date_input("Due Date", key=f"Due Date_{selected_hash}", format="DD/MM/YYYY")
|
| 1148 |
+
|
| 1149 |
+
curr_options = ['USD', 'EUR', 'GBP', 'INR', 'Other']
|
| 1150 |
+
if st.session_state[f"Currency_{selected_hash}"] not in curr_options:
|
| 1151 |
+
st.session_state[f"Currency_{selected_hash}"] = 'Other'
|
| 1152 |
+
st.selectbox("Currency", options=curr_options, key=f"Currency_{selected_hash}")
|
| 1153 |
+
|
| 1154 |
+
if st.session_state.get(f"Currency_{selected_hash}") == 'Other':
|
| 1155 |
+
st.text_input("Specify Currency", key=f"Currency_Custom_{selected_hash}")
|
| 1156 |
+
|
| 1157 |
+
st.number_input("Subtotal", key=f"Subtotal_{selected_hash}")
|
| 1158 |
+
st.number_input("Tax %", key=f"Tax Percentage_{selected_hash}")
|
| 1159 |
+
st.number_input("Total Tax", key=f"Total Tax_{selected_hash}")
|
| 1160 |
+
st.number_input("Total Amount", key=f"Total Amount_{selected_hash}")
|
| 1161 |
+
|
| 1162 |
+
with tabs[1]:
|
| 1163 |
+
st.text_input("Sender Name", key=f"Sender Name_{selected_hash}")
|
| 1164 |
+
st.text_area("Sender Address", key=f"Sender Address_{selected_hash}", height=80)
|
| 1165 |
+
st.text_input("Recipient Name", key=f"Recipient Name_{selected_hash}")
|
| 1166 |
+
st.text_area("Recipient Address", key=f"Recipient Address_{selected_hash}", height=80)
|
| 1167 |
+
|
| 1168 |
+
with tabs[2]:
|
| 1169 |
+
st.text_input("Bank Name", key=f"Bank_bank_name_{selected_hash}")
|
| 1170 |
+
st.text_input("Account Number", key=f"Bank_bank_account_number_{selected_hash}")
|
| 1171 |
+
st.text_input("Account Name", key=f"Bank_bank_acc_name_{selected_hash}")
|
| 1172 |
+
st.text_input("IBAN", key=f"Bank_bank_iban_{selected_hash}")
|
| 1173 |
+
st.text_input("SWIFT", key=f"Bank_bank_swift_{selected_hash}")
|
| 1174 |
+
st.text_input("Routing", key=f"Bank_bank_routing_{selected_hash}")
|
| 1175 |
+
st.text_input("Branch", key=f"Bank_bank_branch_{selected_hash}")
|
| 1176 |
+
|
| 1177 |
+
with tabs[3]:
|
| 1178 |
+
# Build base DF from current edited_data (not raw mapped) so it's always what the user last saved
|
| 1179 |
+
item_rows = form_data.get('Itemized Data', []) or []
|
| 1180 |
+
normalized = []
|
| 1181 |
+
for it in item_rows:
|
| 1182 |
+
if not isinstance(it, dict):
|
| 1183 |
+
it = {}
|
| 1184 |
+
normalized.append({
|
| 1185 |
+
"Description": it.get("Description", it.get("Item Description", "")),
|
| 1186 |
+
"Quantity": it.get("Quantity", it.get("Item Quantity", 0)),
|
| 1187 |
+
"Unit Price": it.get("Unit Price", it.get("Item Unit Price", 0.0)),
|
| 1188 |
+
"Amount": it.get("Amount", it.get("Item Amount", 0.0)),
|
| 1189 |
+
"Tax": it.get("Tax", it.get("Item Tax", 0.0)),
|
| 1190 |
+
"Line Total": it.get("Line Total", it.get("Item Line Total", 0.0)),
|
| 1191 |
+
})
|
| 1192 |
+
|
| 1193 |
+
items_df = pd.DataFrame(normalized) if normalized else pd.DataFrame(
|
| 1194 |
+
columns=["Description", "Quantity", "Unit Price", "Amount", "Tax", "Line Total"]
|
| 1195 |
+
)
|
| 1196 |
+
|
| 1197 |
+
# Show editor without totals
|
| 1198 |
+
edited_df = st.data_editor(
|
| 1199 |
+
items_df,
|
| 1200 |
+
num_rows="dynamic",
|
| 1201 |
+
key=f"items_editor_{selected_hash}",
|
| 1202 |
+
use_container_width=True,
|
| 1203 |
+
height=DATA_EDITOR_HEIGHT - 50, # Reduce height slightly for totals below
|
| 1204 |
+
)
|
| 1205 |
+
|
| 1206 |
+
# Display non-editable totals row immediately below (looks integrated)
|
| 1207 |
+
if len(edited_df) > 0:
|
| 1208 |
+
total_amount = edited_df["Amount"].sum()
|
| 1209 |
+
total_tax = edited_df["Tax"].sum()
|
| 1210 |
+
total_line_total = edited_df["Line Total"].sum()
|
| 1211 |
+
|
| 1212 |
+
# Create totals display - styled to look like part of the table
|
| 1213 |
+
totals_df = pd.DataFrame([{
|
| 1214 |
+
"Description": "──── TOTAL ────",
|
| 1215 |
+
"Quantity": "",
|
| 1216 |
+
"Unit Price": "",
|
| 1217 |
+
"Amount": f"${total_amount:,.2f}",
|
| 1218 |
+
"Tax": f"${total_tax:,.2f}",
|
| 1219 |
+
"Line Total": f"${total_line_total:,.2f}"
|
| 1220 |
+
}])
|
| 1221 |
+
|
| 1222 |
+
st.dataframe(
|
| 1223 |
+
totals_df,
|
| 1224 |
+
use_container_width=True,
|
| 1225 |
+
hide_index=True,
|
| 1226 |
+
height=38 # Single row height
|
| 1227 |
+
)
|
| 1228 |
+
|
| 1229 |
+
saved = st.form_submit_button("💾 Save All Edits")
|
| 1230 |
+
# ----------------- FORM END -----------------
|
| 1231 |
+
|
| 1232 |
+
if saved:
|
| 1233 |
+
currency = st.session_state.get(f"Currency_{selected_hash}", 'USD')
|
| 1234 |
+
if currency == 'Other':
|
| 1235 |
+
currency = st.session_state.get(f"Currency_Custom_{selected_hash}", '')
|
| 1236 |
+
|
| 1237 |
+
# Convert date objects to normalized strings (dd-MMM-yyyy format)
|
| 1238 |
+
invoice_date = st.session_state.get(f"Invoice Date_{selected_hash}", None)
|
| 1239 |
+
due_date = st.session_state.get(f"Due Date_{selected_hash}", None)
|
| 1240 |
+
|
| 1241 |
+
invoice_date_str = ""
|
| 1242 |
+
if invoice_date is not None:
|
| 1243 |
+
try:
|
| 1244 |
+
invoice_date_str = invoice_date.strftime("%d-%b-%Y")
|
| 1245 |
+
except (AttributeError, ValueError):
|
| 1246 |
+
invoice_date_str = ""
|
| 1247 |
+
|
| 1248 |
+
due_date_str = ""
|
| 1249 |
+
if due_date is not None:
|
| 1250 |
+
try:
|
| 1251 |
+
due_date_str = due_date.strftime("%d-%b-%Y")
|
| 1252 |
+
except (AttributeError, ValueError):
|
| 1253 |
+
due_date_str = ""
|
| 1254 |
+
|
| 1255 |
+
# Calculate totals from line items
|
| 1256 |
+
line_items_list = edited_df.to_dict('records')
|
| 1257 |
+
calculated_subtotal = sum(clean_float(item.get('Amount', 0)) for item in line_items_list)
|
| 1258 |
+
calculated_total_tax = sum(clean_float(item.get('Tax', 0)) for item in line_items_list)
|
| 1259 |
+
calculated_total = sum(clean_float(item.get('Line Total', 0)) for item in line_items_list)
|
| 1260 |
+
|
| 1261 |
+
# Calculate tax percentage if possible
|
| 1262 |
+
calculated_tax_pct = 0.0
|
| 1263 |
+
if calculated_subtotal > 0 and calculated_total_tax > 0:
|
| 1264 |
+
calculated_tax_pct = round((calculated_total_tax / calculated_subtotal) * 100, 4)
|
| 1265 |
+
|
| 1266 |
+
updated = {
|
| 1267 |
+
'Invoice Number': st.session_state.get(f"Invoice Number_{selected_hash}", ''),
|
| 1268 |
+
'Invoice Date': invoice_date_str,
|
| 1269 |
+
'Due Date': due_date_str,
|
| 1270 |
+
'Currency': currency,
|
| 1271 |
+
'Subtotal': calculated_subtotal, # Auto-calculated from line items
|
| 1272 |
+
'Tax Percentage': calculated_tax_pct, # Auto-calculated
|
| 1273 |
+
'Total Tax': calculated_total_tax, # Auto-calculated from line items
|
| 1274 |
+
'Total Amount': calculated_total, # Auto-calculated from line items
|
| 1275 |
+
'Sender Name': st.session_state.get(f"Sender Name_{selected_hash}", ''),
|
| 1276 |
+
'Sender Address': st.session_state.get(f"Sender Address_{selected_hash}", ''),
|
| 1277 |
+
'Recipient Name': st.session_state.get(f"Recipient Name_{selected_hash}", ''),
|
| 1278 |
+
'Recipient Address': st.session_state.get(f"Recipient Address_{selected_hash}", ''),
|
| 1279 |
+
'Bank Details': {
|
| 1280 |
+
'bank_name': st.session_state.get(f"Bank_bank_name_{selected_hash}", ''),
|
| 1281 |
+
'bank_account_number': st.session_state.get(f"Bank_bank_account_number_{selected_hash}", ''),
|
| 1282 |
+
'bank_acc_name': st.session_state.get(f"Bank_bank_acc_name_{selected_hash}", ''),
|
| 1283 |
+
'bank_iban': st.session_state.get(f"Bank_bank_iban_{selected_hash}", ''),
|
| 1284 |
+
'bank_swift': st.session_state.get(f"Bank_bank_swift_{selected_hash}", ''),
|
| 1285 |
+
'bank_routing': st.session_state.get(f"Bank_bank_routing_{selected_hash}", ''),
|
| 1286 |
+
'bank_branch': st.session_state.get(f"Bank_bank_branch_{selected_hash}", '')
|
| 1287 |
+
},
|
| 1288 |
+
'Itemized Data': line_items_list,
|
| 1289 |
+
'Sender': {"Name": st.session_state.get(f"Sender Name_{selected_hash}", ''),
|
| 1290 |
+
"Address": st.session_state.get(f"Sender Address_{selected_hash}", '')},
|
| 1291 |
+
'Recipient': {"Name": st.session_state.get(f"Recipient Name_{selected_hash}", ''),
|
| 1292 |
+
"Address": st.session_state.get(f"Recipient Address_{selected_hash}", '')},
|
| 1293 |
+
}
|
| 1294 |
+
|
| 1295 |
+
# Update session state fields to reflect the new calculated values
|
| 1296 |
+
st.session_state[f"Subtotal_{selected_hash}"] = calculated_subtotal
|
| 1297 |
+
st.session_state[f"Tax Percentage_{selected_hash}"] = calculated_tax_pct
|
| 1298 |
+
st.session_state[f"Total Tax_{selected_hash}"] = calculated_total_tax
|
| 1299 |
+
st.session_state[f"Total Amount_{selected_hash}"] = calculated_total
|
| 1300 |
+
|
| 1301 |
+
st.session_state.batch_results[selected_hash]["edited_data"] = updated
|
| 1302 |
+
st.success(f"✅ Saved: {current['file_name']} | Updated totals: Subtotal=${calculated_subtotal:,.2f}, Tax=${calculated_total_tax:,.2f}, Total=${calculated_total:,.2f}")
|
| 1303 |
+
st.rerun() # Force rerun to show updated totals in the form fields
|
| 1304 |
+
|
| 1305 |
+
# Per-file CSV download (uses the current editor contents even if not saved)
|
| 1306 |
+
d_currency = st.session_state.get(f"Currency_{selected_hash}", 'USD')
|
| 1307 |
+
if d_currency == 'Other':
|
| 1308 |
+
d_currency = st.session_state.get(f"Currency_Custom_{selected_hash}", '')
|
| 1309 |
+
|
| 1310 |
+
# Convert date objects to strings for download
|
| 1311 |
+
d_invoice_date = st.session_state.get(f"Invoice Date_{selected_hash}", None)
|
| 1312 |
+
d_due_date = st.session_state.get(f"Due Date_{selected_hash}", None)
|
| 1313 |
+
|
| 1314 |
+
d_invoice_date_str = ""
|
| 1315 |
+
if d_invoice_date is not None:
|
| 1316 |
+
try:
|
| 1317 |
+
d_invoice_date_str = d_invoice_date.strftime("%d-%b-%Y")
|
| 1318 |
+
except (AttributeError, ValueError):
|
| 1319 |
+
d_invoice_date_str = ""
|
| 1320 |
+
|
| 1321 |
+
d_due_date_str = ""
|
| 1322 |
+
if d_due_date is not None:
|
| 1323 |
+
try:
|
| 1324 |
+
d_due_date_str = d_due_date.strftime("%d-%b-%Y")
|
| 1325 |
+
except (AttributeError, ValueError):
|
| 1326 |
+
d_due_date_str = ""
|
| 1327 |
+
|
| 1328 |
+
download_data = {
|
| 1329 |
+
'Invoice Number': st.session_state.get(f"Invoice Number_{selected_hash}", ''),
|
| 1330 |
+
'Invoice Date': d_invoice_date_str,
|
| 1331 |
+
'Due Date': d_due_date_str,
|
| 1332 |
+
'Currency': d_currency,
|
| 1333 |
+
'Subtotal': st.session_state.get(f"Subtotal_{selected_hash}", 0.0),
|
| 1334 |
+
'Tax Percentage': st.session_state.get(f"Tax Percentage_{selected_hash}", 0.0),
|
| 1335 |
+
'Total Tax': st.session_state.get(f"Total Tax_{selected_hash}", 0.0),
|
| 1336 |
+
'Total Amount': st.session_state.get(f"Total Amount_{selected_hash}", 0.0),
|
| 1337 |
+
'Sender Name': st.session_state.get(f"Sender Name_{selected_hash}", ''),
|
| 1338 |
+
'Sender Address': st.session_state.get(f"Sender Address_{selected_hash}", ''),
|
| 1339 |
+
'Recipient Name': st.session_state.get(f"Recipient Name_{selected_hash}", ''),
|
| 1340 |
+
'Recipient Address': st.session_state.get(f"Recipient Address_{selected_hash}", ''),
|
| 1341 |
+
'Bank Details': {
|
| 1342 |
+
'bank_name': st.session_state.get(f"Bank_bank_name_{selected_hash}", ''),
|
| 1343 |
+
'bank_account_number': st.session_state.get(f"Bank_bank_account_number_{selected_hash}", ''),
|
| 1344 |
+
'bank_acc_name': st.session_state.get(f"Bank_bank_acc_name_{selected_hash}", ''),
|
| 1345 |
+
'bank_iban': st.session_state.get(f"Bank_bank_iban_{selected_hash}", ''),
|
| 1346 |
+
'bank_swift': st.session_state.get(f"Bank_bank_swift_{selected_hash}", ''),
|
| 1347 |
+
'bank_routing': st.session_state.get(f"Bank_bank_routing_{selected_hash}", ''),
|
| 1348 |
+
'bank_branch': st.session_state.get(f"Bank_bank_branch_{selected_hash}", '')
|
| 1349 |
+
},
|
| 1350 |
+
'Itemized Data': edited_df.to_dict('records')
|
| 1351 |
+
}
|
| 1352 |
+
rows = flatten_invoice_to_rows(download_data)
|
| 1353 |
+
full_df = pd.DataFrame(rows)
|
| 1354 |
+
csv_bytes_one = full_df.to_csv(index=False).encode("utf-8")
|
| 1355 |
+
st.download_button(
|
| 1356 |
+
"📥 Download This Invoice (CSV)",
|
| 1357 |
+
csv_bytes_one,
|
| 1358 |
+
file_name=f"{Path(current['file_name']).stem}_full.csv",
|
| 1359 |
+
mime="text/csv",
|
| 1360 |
+
key=f"dl_{selected_hash}"
|
| 1361 |
+
)
|
| 1362 |
+
|
| 1363 |
+
elif st.session_state.is_processing_batch:
|
| 1364 |
+
with frame_left:
|
| 1365 |
+
st.info("⏳ Processing batch... Please wait.")
|
| 1366 |
+
st.progress(0)
|
| 1367 |
+
with frame_right:
|
| 1368 |
+
st.caption("Preview & editor will appear here after extraction.")
|
| 1369 |
+
|
| 1370 |
+
else:
|
| 1371 |
+
# Shouldn't happen, but keeps skeleton steady
|
| 1372 |
+
with frame_left:
|
| 1373 |
+
st.caption("Ready when you are.")
|
| 1374 |
+
with frame_right:
|
| 1375 |
+
st.caption("Preview & editor will appear here after extraction.")
|