Upload app.py
Browse filesdeleted unnecessary steps and output size reduced
app.py
ADDED
|
@@ -0,0 +1,2051 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py β POWERGRID Document Auditor (single-file HuggingFace Spaces build)
|
| 3 |
+
=============================================================================
|
| 4 |
+
Single-file Gradio app for AI-powered engineering drawing comparison.
|
| 5 |
+
Designed for POWERGRID (765/400/132kV AIS/GIS vendor drawing audits).
|
| 6 |
+
|
| 7 |
+
Pipeline:
|
| 8 |
+
Stage 1 β Global Alignment : Phase Correlation + ORB/RANSAC homography
|
| 9 |
+
Stage 2 β Region Extraction : Content-aware morphology (no pretrained detector)
|
| 10 |
+
Stage 3 β Semantic Matching : ResNet50 embeddings + cosine similarity (position-agnostic)
|
| 11 |
+
Stage 4 β Siamese Comparison : ResNet50 patch embeddings + GradCAM heatmaps
|
| 12 |
+
|
| 13 |
+
Run locally:
|
| 14 |
+
python app.py
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
# IMPORTS
|
| 19 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
|
| 21 |
+
import base64
|
| 22 |
+
import io
|
| 23 |
+
import logging
|
| 24 |
+
import os
|
| 25 |
+
import time
|
| 26 |
+
from dataclasses import dataclass, field
|
| 27 |
+
from typing import Dict, List, Optional, Tuple
|
| 28 |
+
|
| 29 |
+
import cv2
|
| 30 |
+
import fitz # PyMuPDF
|
| 31 |
+
import gradio as gr
|
| 32 |
+
import numpy as np
|
| 33 |
+
import torch
|
| 34 |
+
import torch.nn as nn
|
| 35 |
+
import torch.nn.functional as F
|
| 36 |
+
from PIL import Image
|
| 37 |
+
from scipy.optimize import linear_sum_assignment
|
| 38 |
+
from skimage.metrics import structural_similarity as ssim
|
| 39 |
+
from torchvision import models, transforms
|
| 40 |
+
|
| 41 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 42 |
+
logger = logging.getLogger(__name__)
|
| 43 |
+
|
| 44 |
+
# ββ Logo: embed as base64 so it works on HuggingFace Spaces (no static folder) ββ
|
| 45 |
+
def _load_logo_b64(filename: str = "logo_0.png") -> str:
|
| 46 |
+
"""Return a data-URI string for the logo, or empty string if file not found."""
|
| 47 |
+
logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
|
| 48 |
+
if os.path.exists(logo_path):
|
| 49 |
+
with open(logo_path, "rb") as f:
|
| 50 |
+
b64 = base64.b64encode(f.read()).decode("utf-8")
|
| 51 |
+
ext = filename.rsplit(".", 1)[-1].lower()
|
| 52 |
+
mime = "image/png" if ext == "png" else f"image/{ext}"
|
| 53 |
+
return f"data:{mime};base64,{b64}"
|
| 54 |
+
return ""
|
| 55 |
+
|
| 56 |
+
_LOGO_URI = _load_logo_b64("logo_0.png")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
+
# DATA STRUCTURES
|
| 61 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
|
| 63 |
+
@dataclass
|
| 64 |
+
class Region:
|
| 65 |
+
"""A detected layout region (axis-aligned bounding box)."""
|
| 66 |
+
x: int
|
| 67 |
+
y: int
|
| 68 |
+
w: int
|
| 69 |
+
h: int
|
| 70 |
+
label: str = "text_block" # text_block | figure | table | margin
|
| 71 |
+
confidence: float = 1.0
|
| 72 |
+
|
| 73 |
+
@property
|
| 74 |
+
def bbox(self) -> Tuple[int, int, int, int]:
|
| 75 |
+
return (self.x, self.y, self.x + self.w, self.y + self.h)
|
| 76 |
+
|
| 77 |
+
@property
|
| 78 |
+
def area(self) -> int:
|
| 79 |
+
return self.w * self.h
|
| 80 |
+
|
| 81 |
+
@property
|
| 82 |
+
def center(self) -> Tuple[float, float]:
|
| 83 |
+
return (self.x + self.w / 2.0, self.y + self.h / 2.0)
|
| 84 |
+
|
| 85 |
+
def iou(self, other: "Region") -> float:
|
| 86 |
+
xa = max(self.x, other.x)
|
| 87 |
+
ya = max(self.y, other.y)
|
| 88 |
+
xb = min(self.x + self.w, other.x + other.w)
|
| 89 |
+
yb = min(self.y + self.h, other.y + other.h)
|
| 90 |
+
inter = max(0, xb - xa) * max(0, yb - ya)
|
| 91 |
+
union = self.area + other.area - inter
|
| 92 |
+
return inter / union if union > 0 else 0.0
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
@dataclass
|
| 96 |
+
class MatchedPair:
|
| 97 |
+
"""A matched region pair between old and new documents."""
|
| 98 |
+
region_old: Region
|
| 99 |
+
region_new: Region
|
| 100 |
+
match_score: float
|
| 101 |
+
position_cost: float
|
| 102 |
+
appearance_cost: float
|
| 103 |
+
pixel_diff: float = 0.0
|
| 104 |
+
ssim_score: float = 1.0
|
| 105 |
+
semantic_diff: float = 0.0
|
| 106 |
+
total_change: float = 0.0
|
| 107 |
+
heatmap: Optional[np.ndarray] = None
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@dataclass
|
| 111 |
+
class ComparisonResult:
|
| 112 |
+
"""Full comparison result for one document page."""
|
| 113 |
+
matched_pairs: List[MatchedPair]
|
| 114 |
+
unmatched_old: List[Region]
|
| 115 |
+
unmatched_new: List[Region]
|
| 116 |
+
global_transform: Optional[np.ndarray]
|
| 117 |
+
total_change_pct: float
|
| 118 |
+
heatmap: np.ndarray
|
| 119 |
+
img_old_aligned: Optional[np.ndarray] = None # aligned OLD, same coord-space as NEW
|
| 120 |
+
|
| 121 |
+
def summary(self) -> str:
|
| 122 |
+
lines = [
|
| 123 |
+
f" Global Alignment : {'Applied' if self.global_transform is not None else 'Skipped'}",
|
| 124 |
+
f" Matched Pairs : {len(self.matched_pairs)}",
|
| 125 |
+
f" Deleted Regions : {len(self.unmatched_old)}",
|
| 126 |
+
f" Added Regions : {len(self.unmatched_new)}",
|
| 127 |
+
f" Total Change : {self.total_change_pct:.1f}%",
|
| 128 |
+
]
|
| 129 |
+
changed = [p for p in self.matched_pairs if p.total_change > 0.05]
|
| 130 |
+
if changed:
|
| 131 |
+
avg_chg = np.mean([p.total_change for p in changed])
|
| 132 |
+
lines.append(f" Avg Change (modified regions): {avg_chg:.2f}")
|
| 133 |
+
return "\n".join(lines)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 137 |
+
# STAGE 1 β GLOBAL ALIGNER
|
| 138 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
|
| 140 |
+
class GlobalAligner:
|
| 141 |
+
def __init__(self, orb_features: int = 2000, ransac_threshold: float = 5.0):
|
| 142 |
+
self.orb_features = orb_features
|
| 143 |
+
self.ransac_threshold = ransac_threshold
|
| 144 |
+
|
| 145 |
+
def _phase_correlation_shift(self, gray1: np.ndarray, gray2: np.ndarray) -> Tuple[float, float]:
|
| 146 |
+
f1 = np.fft.fft2(gray1.astype(np.float32))
|
| 147 |
+
f2 = np.fft.fft2(gray2.astype(np.float32))
|
| 148 |
+
denom = np.abs(f1 * np.conj(f2)) + 1e-10
|
| 149 |
+
cross = (f1 * np.conj(f2)) / denom
|
| 150 |
+
corr = np.fft.ifft2(cross).real
|
| 151 |
+
y_shift, x_shift = np.unravel_index(np.argmax(corr), corr.shape)
|
| 152 |
+
h, w = gray1.shape
|
| 153 |
+
if y_shift > h // 2:
|
| 154 |
+
y_shift -= h
|
| 155 |
+
if x_shift > w // 2:
|
| 156 |
+
x_shift -= w
|
| 157 |
+
return float(-x_shift), float(-y_shift)
|
| 158 |
+
|
| 159 |
+
def _orb_affine(self, gray_old: np.ndarray, gray_new: np.ndarray) -> Optional[np.ndarray]:
|
| 160 |
+
orb = cv2.ORB_create(nfeatures=self.orb_features)
|
| 161 |
+
kp1, des1 = orb.detectAndCompute(gray_old, None)
|
| 162 |
+
kp2, des2 = orb.detectAndCompute(gray_new, None)
|
| 163 |
+
if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
|
| 164 |
+
return None
|
| 165 |
+
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
|
| 166 |
+
matches = sorted(bf.match(des1, des2), key=lambda m: m.distance)
|
| 167 |
+
if len(matches) < 10:
|
| 168 |
+
return None
|
| 169 |
+
top_k = min(200, len(matches))
|
| 170 |
+
# src = OLD keypoints, dst = NEW keypoints
|
| 171 |
+
# β M maps OLDβNEW (forward transform), which is what warpAffine expects:
|
| 172 |
+
# warpAffine(img_old, M, size) correctly places OLD pixels at their NEW positions.
|
| 173 |
+
# BUG that was here: src/dst were swapped (kp2/NEW as src, kp1/OLD as dst),
|
| 174 |
+
# giving M that mapped NEWβOLD. warpAffine then doubled the displacement
|
| 175 |
+
# instead of correcting it, causing the full-image red/cyan fringe seen in
|
| 176 |
+
# the Alignment Check view.
|
| 177 |
+
src_pts = np.float32([kp1[m.queryIdx].pt for m in matches[:top_k]]).reshape(-1, 1, 2)
|
| 178 |
+
dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches[:top_k]]).reshape(-1, 1, 2)
|
| 179 |
+
M, mask = cv2.estimateAffinePartial2D(
|
| 180 |
+
src_pts, dst_pts, method=cv2.RANSAC,
|
| 181 |
+
ransacReprojThreshold=self.ransac_threshold,
|
| 182 |
+
)
|
| 183 |
+
return M
|
| 184 |
+
|
| 185 |
+
def align(self, img_old: np.ndarray, img_new: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
| 186 |
+
g_old = cv2.cvtColor(img_old, cv2.COLOR_RGB2GRAY)
|
| 187 |
+
g_new = cv2.cvtColor(img_new, cv2.COLOR_RGB2GRAY)
|
| 188 |
+
dx, dy = self._phase_correlation_shift(g_old, g_new)
|
| 189 |
+
M = self._orb_affine(g_old, g_new)
|
| 190 |
+
if M is None:
|
| 191 |
+
M = np.array([[1.0, 0.0, dx], [0.0, 1.0, dy]], dtype=np.float32)
|
| 192 |
+
h, w = img_old.shape[:2]
|
| 193 |
+
aligned = cv2.warpAffine(
|
| 194 |
+
img_old, M, (w, h),
|
| 195 |
+
flags=cv2.INTER_LINEAR,
|
| 196 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 197 |
+
borderValue=(255, 255, 255),
|
| 198 |
+
)
|
| 199 |
+
return aligned, M
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 203 |
+
# STAGE 2 β LAYOUT REGION EXTRACTOR
|
| 204 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
+
|
| 206 |
+
class LayoutRegionExtractor:
|
| 207 |
+
def __init__(
|
| 208 |
+
self,
|
| 209 |
+
min_area_ratio: float = 0.0003,
|
| 210 |
+
max_area_ratio: float = 0.92,
|
| 211 |
+
dilation_kernel: Tuple[int, int] = (8, 2),
|
| 212 |
+
dilation_iters: int = 2,
|
| 213 |
+
merge_iou_threshold: float = 0.40,
|
| 214 |
+
):
|
| 215 |
+
self.min_area_ratio = min_area_ratio
|
| 216 |
+
self.max_area_ratio = max_area_ratio
|
| 217 |
+
self.dilation_kernel = dilation_kernel
|
| 218 |
+
self.dilation_iters = dilation_iters
|
| 219 |
+
self.merge_iou_threshold = merge_iou_threshold
|
| 220 |
+
|
| 221 |
+
def _binarise(self, gray: np.ndarray) -> np.ndarray:
|
| 222 |
+
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 223 |
+
_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 224 |
+
return binary
|
| 225 |
+
|
| 226 |
+
def _dilate(self, binary: np.ndarray) -> np.ndarray:
|
| 227 |
+
k = cv2.getStructuringElement(cv2.MORPH_RECT, self.dilation_kernel)
|
| 228 |
+
dilated = cv2.dilate(binary, k, iterations=self.dilation_iters)
|
| 229 |
+
k_line = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 1))
|
| 230 |
+
dilated = cv2.dilate(dilated, k_line, iterations=1)
|
| 231 |
+
k_vert = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
|
| 232 |
+
return cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, k_vert)
|
| 233 |
+
|
| 234 |
+
def _classify(self, patch_gray: np.ndarray, w: int, h: int) -> str:
|
| 235 |
+
aspect = w / max(h, 1)
|
| 236 |
+
_, binary = cv2.threshold(patch_gray, 127, 255, cv2.THRESH_BINARY_INV)
|
| 237 |
+
density = np.sum(binary > 0) / max(w * h, 1)
|
| 238 |
+
if density < 0.02:
|
| 239 |
+
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 240 |
+
if len(contours) < 3:
|
| 241 |
+
return "margin"
|
| 242 |
+
if aspect > 4.0 and density > 0.06:
|
| 243 |
+
return "text_block"
|
| 244 |
+
if 0.4 < aspect < 2.8 and density < 0.25:
|
| 245 |
+
return "figure"
|
| 246 |
+
if density > 0.18 and aspect > 1.0:
|
| 247 |
+
return "table"
|
| 248 |
+
return "text_block"
|
| 249 |
+
|
| 250 |
+
def _merge_overlapping(self, regions: List[Region]) -> List[Region]:
|
| 251 |
+
changed = True
|
| 252 |
+
while changed:
|
| 253 |
+
changed = False
|
| 254 |
+
used = [False] * len(regions)
|
| 255 |
+
merged: List[Region] = []
|
| 256 |
+
for i, r1 in enumerate(regions):
|
| 257 |
+
if used[i]:
|
| 258 |
+
continue
|
| 259 |
+
x0, y0 = r1.x, r1.y
|
| 260 |
+
x1, y1 = r1.x + r1.w, r1.y + r1.h
|
| 261 |
+
for j, r2 in enumerate(regions):
|
| 262 |
+
if i == j or used[j]:
|
| 263 |
+
continue
|
| 264 |
+
expanded = Region(x0, y0, x1 - x0, y1 - y0)
|
| 265 |
+
if expanded.iou(r2) > self.merge_iou_threshold:
|
| 266 |
+
x0 = min(x0, r2.x)
|
| 267 |
+
y0 = min(y0, r2.y)
|
| 268 |
+
x1 = max(x1, r2.x + r2.w)
|
| 269 |
+
y1 = max(y1, r2.y + r2.h)
|
| 270 |
+
used[j] = True
|
| 271 |
+
changed = True
|
| 272 |
+
merged.append(Region(x0, y0, x1 - x0, y1 - y0))
|
| 273 |
+
used[i] = True
|
| 274 |
+
regions = merged
|
| 275 |
+
return regions
|
| 276 |
+
|
| 277 |
+
def extract(self, img_rgb: np.ndarray) -> List[Region]:
|
| 278 |
+
h, w = img_rgb.shape[:2]
|
| 279 |
+
page_area = h * w
|
| 280 |
+
gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
|
| 281 |
+
binary = self._binarise(gray)
|
| 282 |
+
dilated = self._dilate(binary)
|
| 283 |
+
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 284 |
+
candidates: List[Region] = []
|
| 285 |
+
for cnt in contours:
|
| 286 |
+
rx, ry, rw, rh = cv2.boundingRect(cnt)
|
| 287 |
+
area = rw * rh
|
| 288 |
+
if area < page_area * self.min_area_ratio:
|
| 289 |
+
continue
|
| 290 |
+
if area > page_area * self.max_area_ratio:
|
| 291 |
+
continue
|
| 292 |
+
patch = gray[ry: ry + rh, rx: rx + rw]
|
| 293 |
+
label = self._classify(patch, rw, rh)
|
| 294 |
+
if label == "margin":
|
| 295 |
+
continue
|
| 296 |
+
candidates.append(Region(rx, ry, rw, rh, label=label))
|
| 297 |
+
regions = self._merge_overlapping(candidates)
|
| 298 |
+
regions.sort(key=lambda r: (r.y // 50, r.x))
|
| 299 |
+
logger.info("LayoutExtractor: %d regions detected", len(regions))
|
| 300 |
+
return regions
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 304 |
+
# STAGE 3 β HUNGARIAN REGION MATCHER
|
| 305 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 306 |
+
|
| 307 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 308 |
+
# STAGE 3 β SEMANTIC RETRIEVAL MATCHER (position-agnostic)
|
| 309 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 310 |
+
|
| 311 |
+
class SemanticRetrievalMatcher:
|
| 312 |
+
"""
|
| 313 |
+
Replaces HungarianRegionMatcher for layout-shift-robust document comparison.
|
| 314 |
+
|
| 315 |
+
Strategy
|
| 316 |
+
--------
|
| 317 |
+
For every region in the NEW page:
|
| 318 |
+
1. Extract the patch image from the NEW document.
|
| 319 |
+
2. Encode it with the shared ResNet50 backbone β 128-d L2-normalised vector.
|
| 320 |
+
Simultaneously encode every OLD region patch.
|
| 321 |
+
Build an (N_new Γ N_old) cosine-similarity matrix.
|
| 322 |
+
Run scipy.linear_sum_assignment on βsimilarity (maximise similarity).
|
| 323 |
+
Accept a pair only when similarity β₯ min_similarity.
|
| 324 |
+
|
| 325 |
+
This means a region that has *moved* (different x/y) but is otherwise
|
| 326 |
+
identical will still get similarity β 1.0 and be matched correctly.
|
| 327 |
+
"""
|
| 328 |
+
|
| 329 |
+
def __init__(
|
| 330 |
+
self,
|
| 331 |
+
encoder: "_SiameseEncoder",
|
| 332 |
+
device: torch.device,
|
| 333 |
+
min_similarity: float = 0.50,
|
| 334 |
+
thumbnail_size: Tuple[int, int] = (224, 224),
|
| 335 |
+
):
|
| 336 |
+
self.encoder = encoder
|
| 337 |
+
self.device = device
|
| 338 |
+
self.min_similarity = min_similarity
|
| 339 |
+
self._transform = transforms.Compose([
|
| 340 |
+
transforms.Resize(thumbnail_size),
|
| 341 |
+
transforms.ToTensor(),
|
| 342 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 343 |
+
std=[0.229, 0.224, 0.225]),
|
| 344 |
+
])
|
| 345 |
+
|
| 346 |
+
# ------------------------------------------------------------------
|
| 347 |
+
def _patch(self, region: Region, img: np.ndarray) -> np.ndarray:
|
| 348 |
+
"""Crop a region from the image; returns white 64Γ64 if empty."""
|
| 349 |
+
p = img[region.y: region.y + region.h, region.x: region.x + region.w]
|
| 350 |
+
if p.size == 0:
|
| 351 |
+
p = np.full((64, 64, 3), 255, dtype=np.uint8)
|
| 352 |
+
return p
|
| 353 |
+
|
| 354 |
+
def _embed(self, patches: List[np.ndarray]) -> torch.Tensor:
|
| 355 |
+
"""
|
| 356 |
+
Batch-encode a list of patches β (N, 128) normalised embedding tensor.
|
| 357 |
+
Runs entirely on self.device with no gradient.
|
| 358 |
+
"""
|
| 359 |
+
tensors = [
|
| 360 |
+
self._transform(Image.fromarray(p)) for p in patches
|
| 361 |
+
]
|
| 362 |
+
batch = torch.stack(tensors).to(self.device) # (N, 3, 224, 224)
|
| 363 |
+
with torch.no_grad():
|
| 364 |
+
embeddings, _ = self.encoder.encode(batch) # (N, 128) β already L2-normed
|
| 365 |
+
return embeddings
|
| 366 |
+
|
| 367 |
+
# ------------------------------------------------------------------
|
| 368 |
+
def match(
|
| 369 |
+
self,
|
| 370 |
+
regions_old: List[Region],
|
| 371 |
+
regions_new: List[Region],
|
| 372 |
+
img_old: np.ndarray,
|
| 373 |
+
img_new: np.ndarray,
|
| 374 |
+
) -> Tuple[List[MatchedPair], List[Region], List[Region]]:
|
| 375 |
+
n_old, n_new = len(regions_old), len(regions_new)
|
| 376 |
+
if n_old == 0 or n_new == 0:
|
| 377 |
+
return [], list(regions_old), list(regions_new)
|
| 378 |
+
|
| 379 |
+
# ββ 1. Encode both sets of patches βββββββββββββββββββββββββ
|
| 380 |
+
patches_old = [self._patch(r, img_old) for r in regions_old]
|
| 381 |
+
patches_new = [self._patch(r, img_new) for r in regions_new]
|
| 382 |
+
|
| 383 |
+
emb_old = self._embed(patches_old) # (n_old, 128)
|
| 384 |
+
emb_new = self._embed(patches_new) # (n_new, 128)
|
| 385 |
+
|
| 386 |
+
# ββ 2. Cosine similarity matrix: rows=NEW, cols=OLD βββββββββ
|
| 387 |
+
# L2-normed β dot product == cosine similarity
|
| 388 |
+
sim_mat = torch.mm(emb_new, emb_old.T).cpu().numpy() # (n_new, n_old)
|
| 389 |
+
|
| 390 |
+
# ββ 3. Hungarian assignment on βsimilarity ββββββββββββββββββ
|
| 391 |
+
row_ind, col_ind = linear_sum_assignment(-sim_mat) # maximise sim
|
| 392 |
+
|
| 393 |
+
matched_pairs: List[MatchedPair] = []
|
| 394 |
+
matched_old_idx: set = set()
|
| 395 |
+
matched_new_idx: set = set()
|
| 396 |
+
|
| 397 |
+
for ri, ci in zip(row_ind, col_ind):
|
| 398 |
+
sim = float(sim_mat[ri, ci])
|
| 399 |
+
if sim < self.min_similarity:
|
| 400 |
+
continue # below threshold β treat as unmatched
|
| 401 |
+
matched_pairs.append(MatchedPair(
|
| 402 |
+
region_old = regions_old[ci],
|
| 403 |
+
region_new = regions_new[ri],
|
| 404 |
+
match_score = sim,
|
| 405 |
+
position_cost = 0.0, # no position penalty
|
| 406 |
+
appearance_cost= max(0.0, 1.0 - sim),
|
| 407 |
+
))
|
| 408 |
+
matched_old_idx.add(ci)
|
| 409 |
+
matched_new_idx.add(ri)
|
| 410 |
+
|
| 411 |
+
unmatched_old = [regions_old[i] for i in range(n_old) if i not in matched_old_idx]
|
| 412 |
+
unmatched_new = [regions_new[j] for j in range(n_new) if j not in matched_new_idx]
|
| 413 |
+
|
| 414 |
+
logger.info(
|
| 415 |
+
"SemanticRetrieval: %d matched | %d deleted | %d added "
|
| 416 |
+
"(min_sim=%.2f)",
|
| 417 |
+
len(matched_pairs), len(unmatched_old), len(unmatched_new),
|
| 418 |
+
self.min_similarity,
|
| 419 |
+
)
|
| 420 |
+
return matched_pairs, unmatched_old, unmatched_new
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 424 |
+
# STAGE 4 β SIAMESE PATCH COMPARATOR
|
| 425 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 426 |
+
|
| 427 |
+
class _SiameseEncoder(nn.Module):
|
| 428 |
+
def __init__(self):
|
| 429 |
+
super().__init__()
|
| 430 |
+
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
|
| 431 |
+
self.features = nn.Sequential(*list(resnet.children())[:-2])
|
| 432 |
+
self.pool = resnet.avgpool
|
| 433 |
+
self.embed = nn.Sequential(
|
| 434 |
+
nn.Linear(2048, 512), nn.ReLU(),
|
| 435 |
+
nn.Linear(512, 128),
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
def encode(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 439 |
+
feat_map = self.features(x)
|
| 440 |
+
pooled = torch.flatten(self.pool(feat_map), 1)
|
| 441 |
+
embed = F.normalize(self.embed(pooled), p=2, dim=1)
|
| 442 |
+
return embed, feat_map
|
| 443 |
+
|
| 444 |
+
def forward(self, x1: torch.Tensor, x2: torch.Tensor):
|
| 445 |
+
e1, f1 = self.encode(x1)
|
| 446 |
+
e2, f2 = self.encode(x2)
|
| 447 |
+
return e1, e2, f1, f2
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
class SiamesePatchComparator:
|
| 451 |
+
def __init__(
|
| 452 |
+
self,
|
| 453 |
+
device: Optional[torch.device] = None,
|
| 454 |
+
encoder: Optional[_SiameseEncoder] = None, # β shared encoder
|
| 455 |
+
):
|
| 456 |
+
if device is None:
|
| 457 |
+
if torch.cuda.is_available():
|
| 458 |
+
device = torch.device("cuda")
|
| 459 |
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 460 |
+
device = torch.device("mps")
|
| 461 |
+
else:
|
| 462 |
+
device = torch.device("cpu")
|
| 463 |
+
self.device = device
|
| 464 |
+
# Reuse the encoder from SemanticRetrievalMatcher if provided β
|
| 465 |
+
# avoids loading ResNet50 weights a second time.
|
| 466 |
+
if encoder is not None:
|
| 467 |
+
self.model = encoder
|
| 468 |
+
logger.info("SiamesePatchComparator: reusing shared encoder on %s", device)
|
| 469 |
+
else:
|
| 470 |
+
self.model = _SiameseEncoder().to(device).eval()
|
| 471 |
+
logger.info("SiamesePatchComparator: created new encoder on %s", device)
|
| 472 |
+
self.transform = transforms.Compose([
|
| 473 |
+
transforms.Resize((224, 224)),
|
| 474 |
+
transforms.ToTensor(),
|
| 475 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 476 |
+
])
|
| 477 |
+
|
| 478 |
+
def _to_tensor(self, patch_rgb: np.ndarray) -> torch.Tensor:
|
| 479 |
+
return self.transform(Image.fromarray(patch_rgb)).unsqueeze(0).to(self.device)
|
| 480 |
+
|
| 481 |
+
def _grad_cam(
|
| 482 |
+
self,
|
| 483 |
+
patch_old: np.ndarray,
|
| 484 |
+
patch_new: np.ndarray,
|
| 485 |
+
target_hw: Tuple[int, int],
|
| 486 |
+
) -> np.ndarray:
|
| 487 |
+
"""
|
| 488 |
+
Grad-CAM spatial change map β WHERE inside the patch the embedding differs.
|
| 489 |
+
|
| 490 |
+
Method
|
| 491 |
+
------
|
| 492 |
+
1. Forward patch_old (no grad) β embedding e_old.
|
| 493 |
+
2. Forward patch_new (with grad, hooks on last conv block) β embedding e_new
|
| 494 |
+
+ feature map F captured by forward hook.
|
| 495 |
+
3. Scalar loss = pairwise_distance(e_old.detach(), e_new).
|
| 496 |
+
4. loss.backward() β βloss/βF captured by backward hook.
|
| 497 |
+
5. Grad-CAM = ReLU( mean_c(βloss/βF) Β· F ) β (7Γ7) β upsample to patch size.
|
| 498 |
+
|
| 499 |
+
Pixels with HIGH activation changed the embedding the most β the actual edits.
|
| 500 |
+
|
| 501 |
+
Returns
|
| 502 |
+
-------
|
| 503 |
+
np.ndarray shape (target_hw[0], target_hw[1]), float32, values in [0, 1].
|
| 504 |
+
"""
|
| 505 |
+
t_old = self._to_tensor(patch_old)
|
| 506 |
+
t_new = self._to_tensor(patch_new)
|
| 507 |
+
|
| 508 |
+
feat_store: Dict[str, torch.Tensor] = {}
|
| 509 |
+
grad_store: Dict[str, torch.Tensor] = {}
|
| 510 |
+
|
| 511 |
+
# Hook on the last convolutional block of the shared ResNet50
|
| 512 |
+
last_block = self.model.features[-1]
|
| 513 |
+
|
| 514 |
+
def _fwd(module, inp, out):
|
| 515 |
+
feat_store["f"] = out # (1, 2048, 7, 7)
|
| 516 |
+
|
| 517 |
+
def _bwd(module, grad_in, grad_out):
|
| 518 |
+
grad_store["g"] = grad_out[0] # (1, 2048, 7, 7)
|
| 519 |
+
|
| 520 |
+
h_fwd = last_block.register_forward_hook(_fwd)
|
| 521 |
+
h_bwd = last_block.register_full_backward_hook(_bwd)
|
| 522 |
+
|
| 523 |
+
try:
|
| 524 |
+
# e_old β no gradient needed, just a reference point
|
| 525 |
+
with torch.no_grad():
|
| 526 |
+
e_old, _ = self.model.encode(t_old)
|
| 527 |
+
|
| 528 |
+
# e_new β gradient flows through this path only
|
| 529 |
+
with torch.enable_grad():
|
| 530 |
+
self.model.zero_grad()
|
| 531 |
+
e_new, _ = self.model.encode(t_new)
|
| 532 |
+
dist = F.pairwise_distance(e_old.detach(), e_new)
|
| 533 |
+
dist.backward()
|
| 534 |
+
finally:
|
| 535 |
+
h_fwd.remove()
|
| 536 |
+
h_bwd.remove()
|
| 537 |
+
|
| 538 |
+
if "f" not in feat_store or "g" not in grad_store:
|
| 539 |
+
return np.zeros(target_hw, dtype=np.float32)
|
| 540 |
+
|
| 541 |
+
# Grad-CAM: global-average-pool the gradients, weight feature maps
|
| 542 |
+
weights = grad_store["g"].mean(dim=[2, 3], keepdim=True) # (1,2048,1,1)
|
| 543 |
+
cam = (weights * feat_store["f"]).sum(dim=1).squeeze() # (7, 7)
|
| 544 |
+
cam = F.relu(cam)
|
| 545 |
+
|
| 546 |
+
cam_max = cam.max()
|
| 547 |
+
if cam_max < 1e-8:
|
| 548 |
+
return np.zeros(target_hw, dtype=np.float32)
|
| 549 |
+
|
| 550 |
+
cam = (cam / cam_max).detach().cpu().numpy() # (7, 7) in [0, 1]
|
| 551 |
+
|
| 552 |
+
# Upsample to original patch resolution
|
| 553 |
+
h, w = target_hw
|
| 554 |
+
cam_up = cv2.resize(cam, (w, h), interpolation=cv2.INTER_LINEAR)
|
| 555 |
+
return np.clip(cam_up, 0.0, 1.0).astype(np.float32)
|
| 556 |
+
|
| 557 |
+
def compare(self, patch_old: np.ndarray, patch_new: np.ndarray) -> Dict[str, object]:
|
| 558 |
+
g_old = cv2.cvtColor(patch_old, cv2.COLOR_RGB2GRAY).astype(np.float32)
|
| 559 |
+
g_new = cv2.cvtColor(patch_new, cv2.COLOR_RGB2GRAY).astype(np.float32)
|
| 560 |
+
diff_map = np.abs(g_old - g_new)
|
| 561 |
+
# Threshold of 8 (was 15) β CAD drawings have fine lines and small
|
| 562 |
+
# text; a dimension change may shift only a handful of pixels slightly.
|
| 563 |
+
changed_pixels = np.sum(diff_map > 8.0)
|
| 564 |
+
pixel_diff = float(changed_pixels) / max(g_old.size, 1)
|
| 565 |
+
ssim_val = float(ssim(g_old, g_new, data_range=255.0))
|
| 566 |
+
ssim_cost = max(0.0, 1.0 - ssim_val)
|
| 567 |
+
with torch.no_grad():
|
| 568 |
+
t1 = self._to_tensor(patch_old)
|
| 569 |
+
t2 = self._to_tensor(patch_new)
|
| 570 |
+
e1, e2, _, _ = self.model(t1, t2)
|
| 571 |
+
l2_dist = float(F.pairwise_distance(e1, e2).item())
|
| 572 |
+
semantic_diff = min(l2_dist / 10.0, 1.0)
|
| 573 |
+
total = 0.30 * pixel_diff + 0.40 * ssim_cost + 0.30 * semantic_diff
|
| 574 |
+
|
| 575 |
+
# Grad-CAM: spatial map showing WHERE inside this patch the change is
|
| 576 |
+
h, w = patch_new.shape[:2]
|
| 577 |
+
grad_cam_map = self._grad_cam(patch_old, patch_new, (h, w))
|
| 578 |
+
|
| 579 |
+
return {
|
| 580 |
+
"pixel_diff": pixel_diff,
|
| 581 |
+
"ssim_score": ssim_val,
|
| 582 |
+
"semantic_diff":semantic_diff,
|
| 583 |
+
"total_change": min(float(total), 1.0),
|
| 584 |
+
"grad_cam": grad_cam_map, # (h, w) float32 [0,1] β new
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
def compare_pair(self, pair: MatchedPair, img_old: np.ndarray, img_new: np.ndarray) -> MatchedPair:
|
| 588 |
+
ro, rn = pair.region_old, pair.region_new
|
| 589 |
+
patch_old = img_old[ro.y: ro.y + ro.h, ro.x: ro.x + ro.w]
|
| 590 |
+
patch_new = img_new[rn.y: rn.y + rn.h, rn.x: rn.x + rn.w]
|
| 591 |
+
if patch_old.size == 0 or patch_new.size == 0:
|
| 592 |
+
return pair
|
| 593 |
+
target_h = max(patch_old.shape[0], patch_new.shape[0])
|
| 594 |
+
target_w = max(patch_old.shape[1], patch_new.shape[1])
|
| 595 |
+
|
| 596 |
+
def _pad_white(patch: np.ndarray, th: int, tw: int) -> np.ndarray:
|
| 597 |
+
canvas = np.full((th, tw, patch.shape[2]), 255, dtype=np.uint8)
|
| 598 |
+
canvas[:patch.shape[0], :patch.shape[1]] = patch
|
| 599 |
+
return canvas
|
| 600 |
+
|
| 601 |
+
patch_old_p = _pad_white(patch_old, target_h, target_w)
|
| 602 |
+
patch_new_p = _pad_white(patch_new, target_h, target_w)
|
| 603 |
+
metrics = self.compare(patch_old_p, patch_new_p)
|
| 604 |
+
pair.pixel_diff = metrics["pixel_diff"]
|
| 605 |
+
pair.ssim_score = metrics["ssim_score"]
|
| 606 |
+
pair.semantic_diff = metrics["semantic_diff"]
|
| 607 |
+
pair.total_change = metrics["total_change"]
|
| 608 |
+
# Store Grad-CAM map (sized to the new patch, not the padded version)
|
| 609 |
+
raw_cam = metrics.get("grad_cam")
|
| 610 |
+
if raw_cam is not None:
|
| 611 |
+
rn = pair.region_new
|
| 612 |
+
pair.heatmap = cv2.resize(raw_cam, (rn.w, rn.h),
|
| 613 |
+
interpolation=cv2.INTER_LINEAR)
|
| 614 |
+
return pair
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 618 |
+
# HEATMAP GENERATOR
|
| 619 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 620 |
+
|
| 621 |
+
class HeatmapGenerator:
|
| 622 |
+
_COLOUR_CHANGED = np.array([255, 220, 0], dtype=np.float32)
|
| 623 |
+
_COLOUR_MAJOR = np.array([230, 30, 30], dtype=np.float32)
|
| 624 |
+
_COLOUR_ADDED = np.array([ 30, 200, 60], dtype=np.float32)
|
| 625 |
+
_COLOUR_DELETED = np.array([200, 30, 200], dtype=np.float32)
|
| 626 |
+
|
| 627 |
+
@staticmethod
|
| 628 |
+
def _project_region(r: Region, M_inv: Optional[np.ndarray], w: int, h: int) -> Tuple[int, int, int, int]:
|
| 629 |
+
if M_inv is not None:
|
| 630 |
+
corners = np.array([
|
| 631 |
+
[r.x, r.y ],
|
| 632 |
+
[r.x + r.w, r.y ],
|
| 633 |
+
[r.x, r.y + r.h],
|
| 634 |
+
[r.x + r.w, r.y + r.h],
|
| 635 |
+
], dtype=np.float32)
|
| 636 |
+
ones = np.ones((4, 1), dtype=np.float32)
|
| 637 |
+
projected = (M_inv @ np.hstack([corners, ones]).T).T
|
| 638 |
+
x0 = int(np.clip(projected[:, 0].min(), 0, w - 1))
|
| 639 |
+
y0 = int(np.clip(projected[:, 1].min(), 0, h - 1))
|
| 640 |
+
x1 = int(np.clip(projected[:, 0].max(), 0, w - 1))
|
| 641 |
+
y1 = int(np.clip(projected[:, 1].max(), 0, h - 1))
|
| 642 |
+
else:
|
| 643 |
+
x0, y0, x1, y1 = r.x, r.y, r.x + r.w, r.y + r.h
|
| 644 |
+
return x0, y0, x1, y1
|
| 645 |
+
|
| 646 |
+
@staticmethod
|
| 647 |
+
def generate(
|
| 648 |
+
img_shape: Tuple[int, int],
|
| 649 |
+
matched_pairs: List[MatchedPair],
|
| 650 |
+
unmatched_old: List[Region],
|
| 651 |
+
unmatched_new: List[Region],
|
| 652 |
+
smooth_kernel: int = 11,
|
| 653 |
+
M_inv: Optional[np.ndarray] = None,
|
| 654 |
+
change_threshold: float = 0.05,
|
| 655 |
+
) -> np.ndarray:
|
| 656 |
+
h, w = img_shape
|
| 657 |
+
layers = np.zeros((h, w, 4), dtype=np.float32)
|
| 658 |
+
for pair in matched_pairs:
|
| 659 |
+
chg = float(pair.total_change)
|
| 660 |
+
if chg <= change_threshold:
|
| 661 |
+
continue
|
| 662 |
+
r = pair.region_new
|
| 663 |
+
ch = 0 if chg <= 0.40 else 1 # yellow channel vs red channel
|
| 664 |
+
|
| 665 |
+
if pair.heatmap is not None:
|
| 666 |
+
# ββ Grad-CAM path: paint only the pixels that actually changed ββ
|
| 667 |
+
# pair.heatmap is (r.h, r.w) float32 in [0,1]
|
| 668 |
+
# Scale by total_change so brighter = more changed
|
| 669 |
+
cam = pair.heatmap
|
| 670 |
+
if cam.shape != (r.h, r.w):
|
| 671 |
+
cam = cv2.resize(cam, (r.w, r.h),
|
| 672 |
+
interpolation=cv2.INTER_LINEAR)
|
| 673 |
+
intensity = np.clip(cam * chg, 0.0, 1.0)
|
| 674 |
+
layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
|
| 675 |
+
layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], intensity)
|
| 676 |
+
else:
|
| 677 |
+
# ββ Fallback: flood the whole bounding box (no Grad-CAM available) ββ
|
| 678 |
+
layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
|
| 679 |
+
layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], chg)
|
| 680 |
+
# Channels 2 (added/green) and 3 (deleted/purple) intentionally omitted.
|
| 681 |
+
# The Heatmap tab shows only modification intensity via yellow gradient.
|
| 682 |
+
# Added / deleted regions are visible in the Match Canvas thermal view.
|
| 683 |
+
if smooth_kernel > 0:
|
| 684 |
+
ksize = smooth_kernel if smooth_kernel % 2 == 1 else smooth_kernel + 1
|
| 685 |
+
for ch in range(4):
|
| 686 |
+
if layers[:, :, ch].max() > 0:
|
| 687 |
+
layers[:, :, ch] = cv2.GaussianBlur(layers[:, :, ch], (ksize, ksize), sigmaX=3.0)
|
| 688 |
+
for ch in range(2):
|
| 689 |
+
if layers[:, :, ch].max() > 0:
|
| 690 |
+
layers[:, :, ch] = np.power(layers[:, :, ch], 0.6)
|
| 691 |
+
return layers
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 695 |
+
# VISUALISER
|
| 696 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 697 |
+
|
| 698 |
+
class Visualiser:
|
| 699 |
+
COLOURS: Dict[str, Tuple[int, int, int]] = {
|
| 700 |
+
"text_block": (30, 144, 255),
|
| 701 |
+
"figure": (255, 165, 0),
|
| 702 |
+
"table": (50, 205, 50),
|
| 703 |
+
"unknown": (180, 180, 180),
|
| 704 |
+
"deleted": (220, 50, 50),
|
| 705 |
+
"added": (50, 220, 80),
|
| 706 |
+
"changed": (255, 200, 0),
|
| 707 |
+
"unchanged": (80, 220, 80),
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
@staticmethod
|
| 711 |
+
def draw_alignment_check(
|
| 712 |
+
img_old_aligned: np.ndarray,
|
| 713 |
+
img_new: np.ndarray,
|
| 714 |
+
) -> np.ndarray:
|
| 715 |
+
"""
|
| 716 |
+
Red-cyan overlay β Alignment Check tab.
|
| 717 |
+
|
| 718 |
+
How to read it
|
| 719 |
+
--------------
|
| 720 |
+
OLD aligned β Red channel
|
| 721 |
+
NEW doc β Green + Blue channels (= Cyan)
|
| 722 |
+
|
| 723 |
+
β’ Lines present at the SAME pixel in both β gray (RβGβB)
|
| 724 |
+
β’ Lines in OLD that drifted β RED fringe
|
| 725 |
+
β’ Lines in NEW that drifted β CYAN fringe
|
| 726 |
+
β’ White background on both β white
|
| 727 |
+
|
| 728 |
+
If the overlay looks mostly gray/white with no fringes, alignment is
|
| 729 |
+
good. Red/cyan colour fringes indicate residual misalignment.
|
| 730 |
+
"""
|
| 731 |
+
g_old = cv2.cvtColor(img_old_aligned, cv2.COLOR_RGB2GRAY)
|
| 732 |
+
g_new = cv2.cvtColor(img_new, cv2.COLOR_RGB2GRAY)
|
| 733 |
+
# Stack: R = old, G = new, B = new β cyan for new, red for old
|
| 734 |
+
return np.stack([g_old, g_new, g_new], axis=2)
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 738 |
+
# HELPER β unmatched region visual-change check
|
| 739 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 740 |
+
|
| 741 |
+
# Mean-abs pixel diff below this threshold β region is visually identical
|
| 742 |
+
# despite not being paired by the matcher; excluded from the change score.
|
| 743 |
+
_UNMATCHED_PIXEL_THR: float = 12.0 # on 0β255 grayscale scale
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
def _region_mean_diff(
|
| 747 |
+
r: Region,
|
| 748 |
+
img_a: np.ndarray,
|
| 749 |
+
candidates: List[Region],
|
| 750 |
+
img_b: np.ndarray,
|
| 751 |
+
thumb: int = 64,
|
| 752 |
+
) -> float:
|
| 753 |
+
"""
|
| 754 |
+
Return the *minimum* mean-abs-diff (grayscale, 0β255) between region `r`
|
| 755 |
+
in `img_a` and the spatially closest candidate region in `img_b`.
|
| 756 |
+
|
| 757 |
+
"Spatially closest" = smallest Euclidean centre-to-centre distance.
|
| 758 |
+
If there are no candidates, return 255.0 (maximally different).
|
| 759 |
+
"""
|
| 760 |
+
if not candidates:
|
| 761 |
+
return 255.0
|
| 762 |
+
pa = img_a[r.y: r.y + r.h, r.x: r.x + r.w]
|
| 763 |
+
if pa.size == 0:
|
| 764 |
+
return 255.0
|
| 765 |
+
ga = cv2.resize(cv2.cvtColor(pa, cv2.COLOR_RGB2GRAY), (thumb, thumb)).astype(np.float32)
|
| 766 |
+
|
| 767 |
+
cx_r, cy_r = r.center
|
| 768 |
+
# Sort candidates by centre distance β only check the 3 nearest for speed
|
| 769 |
+
candidates_sorted = sorted(
|
| 770 |
+
candidates,
|
| 771 |
+
key=lambda c: (c.center[0] - cx_r) ** 2 + (c.center[1] - cy_r) ** 2,
|
| 772 |
+
)[:3]
|
| 773 |
+
|
| 774 |
+
best = 255.0
|
| 775 |
+
for cand in candidates_sorted:
|
| 776 |
+
pb = img_b[cand.y: cand.y + cand.h, cand.x: cand.x + cand.w]
|
| 777 |
+
if pb.size == 0:
|
| 778 |
+
continue
|
| 779 |
+
gb = cv2.resize(
|
| 780 |
+
cv2.cvtColor(pb, cv2.COLOR_RGB2GRAY), (thumb, thumb)
|
| 781 |
+
).astype(np.float32)
|
| 782 |
+
diff = float(np.mean(np.abs(ga - gb)))
|
| 783 |
+
if diff < best:
|
| 784 |
+
best = diff
|
| 785 |
+
return best
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
def _is_truly_changed(
|
| 789 |
+
r: Region,
|
| 790 |
+
candidates: List[Region],
|
| 791 |
+
img_a: np.ndarray,
|
| 792 |
+
img_b: np.ndarray,
|
| 793 |
+
) -> bool:
|
| 794 |
+
"""
|
| 795 |
+
Return True only when region `r` (from img_a) is visually *different*
|
| 796 |
+
from its nearest spatial counterpart in candidates (from img_b).
|
| 797 |
+
|
| 798 |
+
Used to distinguish "matcher failed to pair identical regions" from
|
| 799 |
+
"content was genuinely added or deleted."
|
| 800 |
+
"""
|
| 801 |
+
return _region_mean_diff(r, img_a, candidates, img_b) >= _UNMATCHED_PIXEL_THR
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 805 |
+
# MAIN PIPELINE
|
| 806 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 807 |
+
|
| 808 |
+
class CoarseToFinePipeline:
|
| 809 |
+
def __init__(
|
| 810 |
+
self,
|
| 811 |
+
align: bool = True,
|
| 812 |
+
device: Optional[torch.device] = None,
|
| 813 |
+
region_extractor: Optional[LayoutRegionExtractor] = None,
|
| 814 |
+
matcher=None, # SemanticRetrievalMatcher or HungarianRegionMatcher
|
| 815 |
+
comparator: Optional[SiamesePatchComparator] = None,
|
| 816 |
+
min_similarity: float = 0.50, # used only when matcher=None (auto-build)
|
| 817 |
+
):
|
| 818 |
+
# Resolve device once here so both sub-modules share it
|
| 819 |
+
if device is None:
|
| 820 |
+
if torch.cuda.is_available():
|
| 821 |
+
device = torch.device("cuda")
|
| 822 |
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 823 |
+
device = torch.device("mps")
|
| 824 |
+
else:
|
| 825 |
+
device = torch.device("cpu")
|
| 826 |
+
self._device = device
|
| 827 |
+
|
| 828 |
+
self.aligner = GlobalAligner() if align else None
|
| 829 |
+
self.extractor = region_extractor or LayoutRegionExtractor()
|
| 830 |
+
|
| 831 |
+
if matcher is not None:
|
| 832 |
+
# Caller supplied a custom matcher β use it as-is
|
| 833 |
+
self.matcher = matcher
|
| 834 |
+
self.comparator = comparator or SiamesePatchComparator(device=device)
|
| 835 |
+
else:
|
| 836 |
+
# ββ Default path: shared ResNet50 encoder ββββββββββββββ
|
| 837 |
+
# Build the encoder once; hand the same object to both
|
| 838 |
+
# SemanticRetrievalMatcher (Stage 3) and SiamesePatchComparator (Stage 4).
|
| 839 |
+
# This halves model-load time and GPU/CPU RAM usage.
|
| 840 |
+
shared_encoder = _SiameseEncoder().to(device).eval()
|
| 841 |
+
logger.info("Pipeline: shared ResNet50 encoder on %s", device)
|
| 842 |
+
|
| 843 |
+
self.matcher = SemanticRetrievalMatcher(
|
| 844 |
+
encoder = shared_encoder,
|
| 845 |
+
device = device,
|
| 846 |
+
min_similarity = min_similarity,
|
| 847 |
+
)
|
| 848 |
+
self.comparator = comparator or SiamesePatchComparator(
|
| 849 |
+
device = device,
|
| 850 |
+
encoder = shared_encoder, # β reuse, no second load
|
| 851 |
+
)
|
| 852 |
+
|
| 853 |
+
def compare(self, img_old: np.ndarray, img_new: np.ndarray, verbose: bool = True) -> ComparisonResult:
|
| 854 |
+
timings: Dict[str, float] = {}
|
| 855 |
+
t = time.time()
|
| 856 |
+
M = None
|
| 857 |
+
if self.aligner is not None:
|
| 858 |
+
img_old_aligned, M = self.aligner.align(img_old, img_new)
|
| 859 |
+
else:
|
| 860 |
+
img_old_aligned = img_old.copy()
|
| 861 |
+
timings["alignment"] = time.time() - t
|
| 862 |
+
|
| 863 |
+
t = time.time()
|
| 864 |
+
regions_old = self.extractor.extract(img_old_aligned)
|
| 865 |
+
regions_new = self.extractor.extract(img_new)
|
| 866 |
+
timings["extraction"] = time.time() - t
|
| 867 |
+
|
| 868 |
+
t = time.time()
|
| 869 |
+
matched, unmatched_old, unmatched_new = self.matcher.match(
|
| 870 |
+
regions_old, regions_new, img_old_aligned, img_new)
|
| 871 |
+
timings["matching"] = time.time() - t
|
| 872 |
+
|
| 873 |
+
t = time.time()
|
| 874 |
+
for i, pair in enumerate(matched):
|
| 875 |
+
matched[i] = self.comparator.compare_pair(pair, img_old_aligned, img_new)
|
| 876 |
+
timings["siamese"] = time.time() - t
|
| 877 |
+
|
| 878 |
+
if verbose:
|
| 879 |
+
logger.info("Timings β align: %.2fs | extract: %.2fs | match: %.2fs | siamese: %.2fs",
|
| 880 |
+
timings["alignment"], timings["extraction"],
|
| 881 |
+
timings["matching"], timings["siamese"])
|
| 882 |
+
|
| 883 |
+
h, w = img_new.shape[:2]
|
| 884 |
+
# After the ORB fix, M maps OLDβNEW (forward).
|
| 885 |
+
# _project_region uses this matrix to map unmatched OLD region corners
|
| 886 |
+
# into NEW-page coordinates for heatmap rendering β so pass M directly,
|
| 887 |
+
# NOT its inverse. (Previously M mapped NEWβOLD so the inverse was
|
| 888 |
+
# needed; now the roles are corrected.)
|
| 889 |
+
heatmap = HeatmapGenerator.generate(
|
| 890 |
+
(h, w), matched, unmatched_old, unmatched_new,
|
| 891 |
+
M_inv=M, change_threshold=0.05,
|
| 892 |
+
)
|
| 893 |
+
# ββ Change % calculation (two-part fix) ββββββββββββββββββββββββ
|
| 894 |
+
#
|
| 895 |
+
# Part A β pixel-diff gate on unmatched regions
|
| 896 |
+
# Unmatched regions are NOT automatically "added/deleted".
|
| 897 |
+
# They may simply be regions the matcher failed to pair even though
|
| 898 |
+
# the content is identical. We compare each unmatched region to its
|
| 899 |
+
# nearest spatial counterpart in the opposite list; only those whose
|
| 900 |
+
# pixel diff exceeds _UNMATCHED_PIXEL_THR are counted as truly changed.
|
| 901 |
+
#
|
| 902 |
+
# Part B β normalise against full page area (not just detected regions)
|
| 903 |
+
# Using content_area as denominator collapses to 100% when all regions
|
| 904 |
+
# are unmatched. Using h*w gives a stable baseline independent of
|
| 905 |
+
# how many regions were detected or matched.
|
| 906 |
+
|
| 907 |
+
truly_deleted = [
|
| 908 |
+
r for r in unmatched_old
|
| 909 |
+
if _is_truly_changed(r, unmatched_new, img_old_aligned, img_new)
|
| 910 |
+
]
|
| 911 |
+
truly_added = [
|
| 912 |
+
r for r in unmatched_new
|
| 913 |
+
if _is_truly_changed(r, unmatched_old, img_new, img_old_aligned)
|
| 914 |
+
]
|
| 915 |
+
|
| 916 |
+
page_area = max(h * w, 1) # Part B denominator
|
| 917 |
+
changed_area = sum(p.region_new.area for p in matched if p.total_change > 0.05)
|
| 918 |
+
deleted_area = sum(r.area for r in truly_deleted)
|
| 919 |
+
added_area = sum(r.area for r in truly_added)
|
| 920 |
+
total_pct = min(100.0 * (changed_area + added_area + deleted_area) / page_area, 100.0)
|
| 921 |
+
|
| 922 |
+
return ComparisonResult(
|
| 923 |
+
matched_pairs=matched,
|
| 924 |
+
unmatched_old=unmatched_old,
|
| 925 |
+
unmatched_new=unmatched_new,
|
| 926 |
+
global_transform=M,
|
| 927 |
+
total_change_pct=total_pct,
|
| 928 |
+
heatmap=heatmap,
|
| 929 |
+
img_old_aligned=img_old_aligned, # β stored for thermal overlay
|
| 930 |
+
)
|
| 931 |
+
|
| 932 |
+
|
| 933 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 934 |
+
# GRADIO APP β HELPERS
|
| 935 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 936 |
+
|
| 937 |
+
def _pick_device() -> torch.device:
|
| 938 |
+
if torch.cuda.is_available():
|
| 939 |
+
return torch.device("cuda")
|
| 940 |
+
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 941 |
+
return torch.device("mps")
|
| 942 |
+
return torch.device("cpu")
|
| 943 |
+
|
| 944 |
+
|
| 945 |
+
def _page_to_rgb(doc: fitz.Document, idx: int, dpi: int) -> np.ndarray:
|
| 946 |
+
pix = doc[idx].get_pixmap(dpi=dpi)
|
| 947 |
+
return np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)
|
| 948 |
+
|
| 949 |
+
|
| 950 |
+
def _build_summary(
|
| 951 |
+
page_results: list,
|
| 952 |
+
aligned: bool,
|
| 953 |
+
skip_old_p1: bool = False,
|
| 954 |
+
skip_new_p1: bool = False,
|
| 955 |
+
) -> str:
|
| 956 |
+
total_changes = [pr["total_change_pct"] for pr in page_results]
|
| 957 |
+
|
| 958 |
+
lines = [
|
| 959 |
+
"ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 960 |
+
"β POWERGRID DOCUMENT AUDIT β CHANGE REPORT β",
|
| 961 |
+
"ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 962 |
+
"",
|
| 963 |
+
f" Total Pages Analysed : {len(page_results)}",
|
| 964 |
+
f" Overall Avg Change : {np.mean(total_changes):.2f}%",
|
| 965 |
+
"",
|
| 966 |
+
"ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 967 |
+
" PAGE-WISE CHANGE SUMMARY",
|
| 968 |
+
"ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 969 |
+
]
|
| 970 |
+
|
| 971 |
+
for pr in page_results:
|
| 972 |
+
pct = pr["total_change_pct"]
|
| 973 |
+
status = "β
MINIMAL" if pct < 5 else "β οΈ MODERATE" if pct < 20 else "π΄ SIGNIFICANT"
|
| 974 |
+
lines.append(f" Page {pr['page']:>3} β {pct:>5.1f}% β {status}")
|
| 975 |
+
|
| 976 |
+
significant = [pr["page"] for pr in page_results if pr["total_change_pct"] > 20]
|
| 977 |
+
if significant:
|
| 978 |
+
lines += [
|
| 979 |
+
"",
|
| 980 |
+
f" β οΈ Pages with significant changes (>20%): {significant}",
|
| 981 |
+
]
|
| 982 |
+
|
| 983 |
+
return "\n".join(lines)
|
| 984 |
+
|
| 985 |
+
|
| 986 |
+
def _build_output_pdf(page_results: list, output_path: str,
|
| 987 |
+
process_dpi: int = 400) -> str:
|
| 988 |
+
"""
|
| 989 |
+
Build the output PDF at full pixel depth.
|
| 990 |
+
|
| 991 |
+
PyMuPDF page dimensions are in points (1 pt = 1/72 inch).
|
| 992 |
+
The overlay images are rendered at process_dpi. To preserve every
|
| 993 |
+
pixel without resampling, set the page size so that 1 image pixel = 1 pt
|
| 994 |
+
scaled by (72 / process_dpi):
|
| 995 |
+
page_width_pts = img_width_px * 72 / process_dpi
|
| 996 |
+
page_height_pts = img_height_px * 72 / process_dpi
|
| 997 |
+
insert_image() maps the image 1:1 onto the page rect, so no
|
| 998 |
+
downsampling or upsampling occurs β full pixel depth is preserved.
|
| 999 |
+
"""
|
| 1000 |
+
doc_out = fitz.open()
|
| 1001 |
+
for pr in page_results:
|
| 1002 |
+
img = pr["align_check"].convert("RGB")
|
| 1003 |
+
px_w, px_h = img.size
|
| 1004 |
+
# Convert pixel dimensions to PDF points at the process DPI
|
| 1005 |
+
pt_w = px_w * 72.0 / process_dpi
|
| 1006 |
+
pt_h = px_h * 72.0 / process_dpi
|
| 1007 |
+
page_out = doc_out.new_page(width=pt_w, height=pt_h)
|
| 1008 |
+
buf = io.BytesIO()
|
| 1009 |
+
img.save(buf, format="PNG", optimize=True) # lossless β no JPEG ringing
|
| 1010 |
+
buf.seek(0)
|
| 1011 |
+
page_out.insert_image(page_out.rect, stream=buf.read())
|
| 1012 |
+
doc_out.save(output_path, deflate=True, garbage=4, clean=True)
|
| 1013 |
+
doc_out.close()
|
| 1014 |
+
return output_path
|
| 1015 |
+
|
| 1016 |
+
|
| 1017 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1018 |
+
# SPECIFIC-REGION HELPER β semantic global search in OLD document
|
| 1019 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1020 |
+
|
| 1021 |
+
# ImageNet normalisation reused from SemanticRetrievalMatcher
|
| 1022 |
+
_REGION_TRANSFORM = transforms.Compose([
|
| 1023 |
+
transforms.Resize((224, 224)),
|
| 1024 |
+
transforms.ToTensor(),
|
| 1025 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 1026 |
+
std=[0.229, 0.224, 0.225]),
|
| 1027 |
+
])
|
| 1028 |
+
|
| 1029 |
+
|
| 1030 |
+
def _embed_patch(patch_rgb: np.ndarray,
|
| 1031 |
+
encoder: "_SiameseEncoder",
|
| 1032 |
+
device: torch.device) -> torch.Tensor:
|
| 1033 |
+
"""Encode a single RGB numpy patch β (128,) L2-normalised embedding."""
|
| 1034 |
+
t = _REGION_TRANSFORM(Image.fromarray(patch_rgb)).unsqueeze(0).to(device)
|
| 1035 |
+
with torch.no_grad():
|
| 1036 |
+
emb, _ = encoder.encode(t) # (1, 128)
|
| 1037 |
+
return emb[0] # (128,)
|
| 1038 |
+
|
| 1039 |
+
|
| 1040 |
+
def _find_matching_region_in_old(
|
| 1041 |
+
new_crop: np.ndarray,
|
| 1042 |
+
img_old_full: np.ndarray,
|
| 1043 |
+
encoder: "_SiameseEncoder",
|
| 1044 |
+
device: torch.device,
|
| 1045 |
+
) -> Tuple[int, int, int, int]:
|
| 1046 |
+
"""
|
| 1047 |
+
Locate where new_crop (user-selected patch from NEW page) sits inside
|
| 1048 |
+
img_old_full (the complete OLD page).
|
| 1049 |
+
|
| 1050 |
+
Method β Semantic sliding-window search
|
| 1051 |
+
----------------------------------------
|
| 1052 |
+
1. Encode new_crop with the shared ResNet50 encoder β 128-d embedding.
|
| 1053 |
+
2. Slide a window across img_old_full at multiple scales (Β±30 % of the
|
| 1054 |
+
crop size, preserving aspect ratio). Step = 50 % of window size so
|
| 1055 |
+
adjacent windows overlap and the true location is never missed.
|
| 1056 |
+
3. Encode every window patch and compute cosine similarity with the
|
| 1057 |
+
query embedding. Pick the window with the highest similarity.
|
| 1058 |
+
4. Clamp the winning box to page bounds and return it.
|
| 1059 |
+
|
| 1060 |
+
Why semantic (not pixel-level):
|
| 1061 |
+
β’ ResNet50 encodes *what* is in a region (shapes, structure, symbols),
|
| 1062 |
+
not pixel values. Two revisions of the same table/panel/diagram will
|
| 1063 |
+
have near-identical embeddings even if text values changed slightly.
|
| 1064 |
+
β’ Scale-invariant: the multi-scale sweep handles content that was
|
| 1065 |
+
enlarged or shrunk between revisions.
|
| 1066 |
+
β’ Position-invariant: the full-page sweep finds content anywhere on the
|
| 1067 |
+
OLD page regardless of how far it moved.
|
| 1068 |
+
|
| 1069 |
+
Returns (x1, y1, x2, y2) in img_old_full pixel space.
|
| 1070 |
+
"""
|
| 1071 |
+
crop_h, crop_w = new_crop.shape[:2]
|
| 1072 |
+
old_h, old_w = img_old_full.shape[:2]
|
| 1073 |
+
|
| 1074 |
+
def _clamp_box(bx: int, by: int, bw: int, bh: int
|
| 1075 |
+
) -> Tuple[int, int, int, int]:
|
| 1076 |
+
bx = max(0, min(bx, old_w - 1))
|
| 1077 |
+
by = max(0, min(by, old_h - 1))
|
| 1078 |
+
bw = max(1, min(bw, old_w - bx))
|
| 1079 |
+
bh = max(1, min(bh, old_h - by))
|
| 1080 |
+
return bx, by, bx + bw, by + bh
|
| 1081 |
+
|
| 1082 |
+
# ββ Step 1: encode the query (NEW crop) ββββββββββββββββββββββββββ
|
| 1083 |
+
q_emb = _embed_patch(new_crop, encoder, device) # (128,)
|
| 1084 |
+
|
| 1085 |
+
# ββ Step 2: build candidate windows across scales ββββββββββββββββ
|
| 1086 |
+
# Scales relative to the crop's own size. For a 400-DPI page a crop
|
| 1087 |
+
# that is, say, 600 px wide is tested at 420 β¦ 780 px widths.
|
| 1088 |
+
scales = (0.70, 0.85, 1.00, 1.15, 1.30)
|
| 1089 |
+
aspect = crop_h / max(crop_w, 1)
|
| 1090 |
+
|
| 1091 |
+
candidates: List[Tuple[int, int, int, int]] = [] # (x, y, w, h)
|
| 1092 |
+
|
| 1093 |
+
for sc in scales:
|
| 1094 |
+
win_w = max(32, int(crop_w * sc))
|
| 1095 |
+
win_h = max(32, int(crop_h * sc))
|
| 1096 |
+
if win_w > old_w or win_h > old_h:
|
| 1097 |
+
continue
|
| 1098 |
+
step_x = max(1, win_w // 2)
|
| 1099 |
+
step_y = max(1, win_h // 2)
|
| 1100 |
+
for y in range(0, old_h - win_h + 1, step_y):
|
| 1101 |
+
for x in range(0, old_w - win_w + 1, step_x):
|
| 1102 |
+
candidates.append((x, y, win_w, win_h))
|
| 1103 |
+
|
| 1104 |
+
logger.info(
|
| 1105 |
+
"_find_matching_region_in_old: %d candidate windows across %d scales",
|
| 1106 |
+
len(candidates), len(scales),
|
| 1107 |
+
)
|
| 1108 |
+
|
| 1109 |
+
if not candidates:
|
| 1110 |
+
# Entire crop is bigger than the old page β return full page
|
| 1111 |
+
logger.warning("_find_matching_region_in_old: crop >= page; returning full page box.")
|
| 1112 |
+
return _clamp_box(0, 0, old_w, old_h)
|
| 1113 |
+
|
| 1114 |
+
# ββ Step 3: batch-encode all windows, find best cosine similarity β
|
| 1115 |
+
# Process in mini-batches of 64 to avoid OOM on large pages.
|
| 1116 |
+
BATCH = 64
|
| 1117 |
+
best_sim: float = -1.0
|
| 1118 |
+
best_box: Tuple[int, int, int, int] = candidates[0]
|
| 1119 |
+
|
| 1120 |
+
for start in range(0, len(candidates), BATCH):
|
| 1121 |
+
batch_cands = candidates[start: start + BATCH]
|
| 1122 |
+
patches = []
|
| 1123 |
+
for (cx, cy, cw, ch) in batch_cands:
|
| 1124 |
+
patch = img_old_full[cy: cy + ch, cx: cx + cw]
|
| 1125 |
+
patches.append(patch)
|
| 1126 |
+
|
| 1127 |
+
tensors = [
|
| 1128 |
+
_REGION_TRANSFORM(Image.fromarray(p)) for p in patches
|
| 1129 |
+
]
|
| 1130 |
+
batch_t = torch.stack(tensors).to(device) # (B, 3, 224, 224)
|
| 1131 |
+
with torch.no_grad():
|
| 1132 |
+
embs, _ = encoder.encode(batch_t) # (B, 128)
|
| 1133 |
+
|
| 1134 |
+
# Cosine similarity: q_emb is already L2-normed, embs are L2-normed
|
| 1135 |
+
sims = (embs @ q_emb).cpu().numpy() # (B,)
|
| 1136 |
+
|
| 1137 |
+
idx = int(sims.argmax())
|
| 1138 |
+
if sims[idx] > best_sim:
|
| 1139 |
+
best_sim = float(sims[idx])
|
| 1140 |
+
best_box = batch_cands[idx]
|
| 1141 |
+
|
| 1142 |
+
bx, by, bw, bh = best_box
|
| 1143 |
+
x1o, y1o, x2o, y2o = _clamp_box(bx, by, bw, bh)
|
| 1144 |
+
|
| 1145 |
+
logger.info(
|
| 1146 |
+
"_find_matching_region_in_old: best cosine=%.4f OLD box (%d,%d)β(%d,%d)",
|
| 1147 |
+
best_sim, x1o, y1o, x2o, y2o,
|
| 1148 |
+
)
|
| 1149 |
+
return (x1o, y1o, x2o, y2o)
|
| 1150 |
+
|
| 1151 |
+
|
| 1152 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1153 |
+
# CORE PROCESSING
|
| 1154 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1155 |
+
|
| 1156 |
+
def run_comparison(
|
| 1157 |
+
pdf_old_file,
|
| 1158 |
+
pdf_new_file,
|
| 1159 |
+
skip_old_p1: bool,
|
| 1160 |
+
skip_new_p1: bool,
|
| 1161 |
+
enable_align: bool,
|
| 1162 |
+
compare_mode: str,
|
| 1163 |
+
page_old_input: int,
|
| 1164 |
+
page_new_input: int,
|
| 1165 |
+
page_compare_mode: str = "Full Page",
|
| 1166 |
+
region_coords=None,
|
| 1167 |
+
display_dpi: int = 72,
|
| 1168 |
+
progress=gr.Progress(),
|
| 1169 |
+
):
|
| 1170 |
+
dpi = 400 # process DPI β higher = more pixel depth in overlay output
|
| 1171 |
+
|
| 1172 |
+
if pdf_old_file is None or pdf_new_file is None:
|
| 1173 |
+
raise gr.Error("Please upload both Previous Revision and New Document PDF files.")
|
| 1174 |
+
|
| 1175 |
+
device = _pick_device()
|
| 1176 |
+
|
| 1177 |
+
pipeline = CoarseToFinePipeline(
|
| 1178 |
+
align = enable_align,
|
| 1179 |
+
device = device,
|
| 1180 |
+
min_similarity = 0.50,
|
| 1181 |
+
)
|
| 1182 |
+
|
| 1183 |
+
progress(0, desc="Opening PDF files β¦")
|
| 1184 |
+
doc_old = fitz.open(pdf_old_file.name)
|
| 1185 |
+
doc_new = fitz.open(pdf_new_file.name)
|
| 1186 |
+
|
| 1187 |
+
# ββ Build the list of (old_page_idx, new_page_idx) pairs to process ββ
|
| 1188 |
+
if compare_mode == "Specific Pages":
|
| 1189 |
+
# Convert 1-based user input to 0-based index
|
| 1190 |
+
old_idx_req = int(page_old_input or 1) - 1
|
| 1191 |
+
new_idx_req = int(page_new_input or 1) - 1
|
| 1192 |
+
# Clamp to valid range
|
| 1193 |
+
old_idx_req = max(0, min(old_idx_req, len(doc_old) - 1))
|
| 1194 |
+
new_idx_req = max(0, min(new_idx_req, len(doc_new) - 1))
|
| 1195 |
+
page_pairs = [(old_idx_req, new_idx_req)]
|
| 1196 |
+
else:
|
| 1197 |
+
# Full document mode
|
| 1198 |
+
old_start = 1 if skip_old_p1 else 0
|
| 1199 |
+
new_start = 1 if skip_new_p1 else 0
|
| 1200 |
+
old_pages = len(doc_old) - old_start
|
| 1201 |
+
new_pages = len(doc_new) - new_start
|
| 1202 |
+
num_pages = min(old_pages, new_pages)
|
| 1203 |
+
|
| 1204 |
+
if skip_old_p1:
|
| 1205 |
+
gr.Info("Skipping cover page of Previous Revision.")
|
| 1206 |
+
if skip_new_p1:
|
| 1207 |
+
gr.Info("Skipping cover page of New Document.")
|
| 1208 |
+
if old_pages != new_pages:
|
| 1209 |
+
gr.Warning(
|
| 1210 |
+
f"Page count mismatch: Previous Revision={old_pages}, New Document={new_pages}. "
|
| 1211 |
+
f"Processing {num_pages} pages."
|
| 1212 |
+
)
|
| 1213 |
+
page_pairs = [(pg + old_start, pg + new_start) for pg in range(num_pages)]
|
| 1214 |
+
|
| 1215 |
+
num_pairs = len(page_pairs)
|
| 1216 |
+
page_results = []
|
| 1217 |
+
|
| 1218 |
+
for i, (old_idx, new_idx) in enumerate(page_pairs):
|
| 1219 |
+
progress(i / num_pairs, desc=f"Processing page {i + 1} / {num_pairs} β¦")
|
| 1220 |
+
img_old = _page_to_rgb(doc_old, old_idx, dpi)
|
| 1221 |
+
img_new = _page_to_rgb(doc_new, new_idx, dpi)
|
| 1222 |
+
|
| 1223 |
+
# ββ Normalise page dimensions before any cropping βββββββββββββ
|
| 1224 |
+
# Both pages must have the same native DPI dimensions so that the
|
| 1225 |
+
# same pixel box selects the same physical region in both docs.
|
| 1226 |
+
if img_old.shape != img_new.shape:
|
| 1227 |
+
img_old = cv2.resize(img_old, (img_new.shape[1], img_new.shape[0]))
|
| 1228 |
+
|
| 1229 |
+
# ββ Specific-region crop ββββββββββββββββββββββββββββββββββββββ
|
| 1230 |
+
# The user drew a box on the NEW-doc preview (at display_dpi).
|
| 1231 |
+
# Steps:
|
| 1232 |
+
# 1. Scale the drag coordinates from preview pixels β process DPI pixels.
|
| 1233 |
+
# 2. Crop the same pixel box from BOTH old and new pages.
|
| 1234 |
+
# (Engineering drawings keep the same layout between revisions β
|
| 1235 |
+
# same position = same physical area. The ORB aligner inside
|
| 1236 |
+
# pipeline.compare() handles any sub-pixel drift between the two.)
|
| 1237 |
+
# 3. Replace img_old / img_new with the two crops β overlay is
|
| 1238 |
+
# scoped to only the selected region.
|
| 1239 |
+
if (compare_mode == "Specific Pages"
|
| 1240 |
+
and page_compare_mode == "Specific Region"
|
| 1241 |
+
and region_coords):
|
| 1242 |
+
rx = region_coords.get("x", 0)
|
| 1243 |
+
ry = region_coords.get("y", 0)
|
| 1244 |
+
rw = region_coords.get("width", img_new.shape[1])
|
| 1245 |
+
rh = region_coords.get("height", img_new.shape[0])
|
| 1246 |
+
sf = dpi / float(display_dpi or 72) # preview px β process DPI px
|
| 1247 |
+
x1 = max(0, int(rx * sf))
|
| 1248 |
+
y1 = max(0, int(ry * sf))
|
| 1249 |
+
x2 = min(img_new.shape[1], int((rx + rw) * sf))
|
| 1250 |
+
y2 = min(img_new.shape[0], int((ry + rh) * sf))
|
| 1251 |
+
|
| 1252 |
+
logger.info(
|
| 1253 |
+
"Specific Region: display_dpi=%d sf=%.3f "
|
| 1254 |
+
"preview-box (%d,%d,%d,%d) β process-px (%d,%d)β(%d,%d)",
|
| 1255 |
+
display_dpi, sf, rx, ry, rw, rh, x1, y1, x2, y2,
|
| 1256 |
+
)
|
| 1257 |
+
|
| 1258 |
+
if x2 > x1 and y2 > y1:
|
| 1259 |
+
# Step 1 β crop the selected region from NEW page
|
| 1260 |
+
img_new_crop = img_new[y1:y2, x1:x2]
|
| 1261 |
+
|
| 1262 |
+
# Step 2 β semantic global search: encode the NEW crop with
|
| 1263 |
+
# ResNet50, slide windows over the FULL OLD page at
|
| 1264 |
+
# multiple scales, pick the highest cosine-similarity
|
| 1265 |
+
# window as the matching region in OLD.
|
| 1266 |
+
ox1, oy1, ox2, oy2 = _find_matching_region_in_old(
|
| 1267 |
+
new_crop = img_new_crop,
|
| 1268 |
+
img_old_full = img_old,
|
| 1269 |
+
encoder = pipeline.matcher.encoder,
|
| 1270 |
+
device = device,
|
| 1271 |
+
)
|
| 1272 |
+
logger.info(
|
| 1273 |
+
"Specific Region: NEW (%d,%d)β(%d,%d) β OLD (%d,%d)β(%d,%d)",
|
| 1274 |
+
x1, y1, x2, y2, ox1, oy1, ox2, oy2,
|
| 1275 |
+
)
|
| 1276 |
+
|
| 1277 |
+
# Step 3 β crop OLD at found location; resize to exactly match
|
| 1278 |
+
# NEW crop so pipeline.compare() gets equal-size inputs
|
| 1279 |
+
img_old_raw = img_old[oy1:oy2, ox1:ox2]
|
| 1280 |
+
nh, nw = img_new_crop.shape[:2]
|
| 1281 |
+
if img_old_raw.shape[:2] != (nh, nw):
|
| 1282 |
+
img_old_crop = cv2.resize(
|
| 1283 |
+
img_old_raw, (nw, nh), interpolation=cv2.INTER_LINEAR,
|
| 1284 |
+
)
|
| 1285 |
+
else:
|
| 1286 |
+
img_old_crop = img_old_raw
|
| 1287 |
+
|
| 1288 |
+
# Step 4 β overlay is scoped to the selected region only
|
| 1289 |
+
img_old = img_old_crop
|
| 1290 |
+
img_new = img_new_crop
|
| 1291 |
+
|
| 1292 |
+
result = pipeline.compare(img_old, img_new)
|
| 1293 |
+
|
| 1294 |
+
old_aligned_for_check = (
|
| 1295 |
+
result.img_old_aligned if result.img_old_aligned is not None
|
| 1296 |
+
else img_old
|
| 1297 |
+
)
|
| 1298 |
+
align_check = Visualiser.draw_alignment_check(old_aligned_for_check, img_new)
|
| 1299 |
+
|
| 1300 |
+
page_results.append({
|
| 1301 |
+
"page": i + 1,
|
| 1302 |
+
"result": result,
|
| 1303 |
+
"align_check": Image.fromarray(align_check),
|
| 1304 |
+
"original": Image.fromarray(img_old),
|
| 1305 |
+
"revised": Image.fromarray(img_new),
|
| 1306 |
+
"total_change_pct": result.total_change_pct,
|
| 1307 |
+
})
|
| 1308 |
+
|
| 1309 |
+
doc_old.close()
|
| 1310 |
+
doc_new.close()
|
| 1311 |
+
|
| 1312 |
+
progress(0.95, desc="Generating report PDF β¦")
|
| 1313 |
+
output_pdf = _build_output_pdf(page_results, "ctf_output.pdf", process_dpi=dpi)
|
| 1314 |
+
summary = _build_summary(page_results, enable_align, skip_old_p1, skip_new_p1)
|
| 1315 |
+
|
| 1316 |
+
progress(1.0, desc="Done!")
|
| 1317 |
+
return page_results, summary, output_pdf, 1, gr.update(maximum=num_pairs, value=1)
|
| 1318 |
+
|
| 1319 |
+
|
| 1320 |
+
def get_page_view(page_num, pages_data, view_mode, rotation: int = 0,
|
| 1321 |
+
nudge_x: int = 0, nudge_y: int = 0, nudge_scale: float = 1.0):
|
| 1322 |
+
if not pages_data:
|
| 1323 |
+
return None
|
| 1324 |
+
idx = int(page_num) - 1
|
| 1325 |
+
idx = max(0, min(idx, len(pages_data) - 1))
|
| 1326 |
+
pr = pages_data[idx]
|
| 1327 |
+
key_map = {
|
| 1328 |
+
"Auto-Overlay": "align_check",
|
| 1329 |
+
"Previous Revision": "original",
|
| 1330 |
+
"New Document": "revised",
|
| 1331 |
+
}
|
| 1332 |
+
img = pr.get(key_map.get(view_mode, "align_check"))
|
| 1333 |
+
if img is None:
|
| 1334 |
+
return None
|
| 1335 |
+
|
| 1336 |
+
# Manual fine-tune: only applies to Auto-Overlay view
|
| 1337 |
+
ns = float(nudge_scale) if nudge_scale else 1.0
|
| 1338 |
+
if view_mode == "Auto-Overlay" and (nudge_x != 0 or nudge_y != 0 or abs(ns - 1.0) > 1e-4):
|
| 1339 |
+
img = _apply_nudge_overlay(pr, nudge_x, nudge_y, ns)
|
| 1340 |
+
|
| 1341 |
+
if img is not None and rotation % 360 != 0:
|
| 1342 |
+
img = img.rotate(rotation, expand=True)
|
| 1343 |
+
return img
|
| 1344 |
+
|
| 1345 |
+
|
| 1346 |
+
def _apply_nudge_overlay(pr: dict, dx: int, dy: int, scale: float = 1.0) -> Image.Image:
|
| 1347 |
+
"""
|
| 1348 |
+
Re-render the Auto-Overlay with the NEW (red) layer shifted by (dx, dy) pixels
|
| 1349 |
+
and scaled by `scale` around the image centre.
|
| 1350 |
+
|
| 1351 |
+
Cyan channel stays fixed (Previous Revision aligned).
|
| 1352 |
+
Red channel = New Doc with nudge translate + scale applied.
|
| 1353 |
+
"""
|
| 1354 |
+
if pr.get("align_check") is None:
|
| 1355 |
+
return None
|
| 1356 |
+
|
| 1357 |
+
# Extract channels from the stored align_check image
|
| 1358 |
+
align_check_arr = np.array(pr["align_check"].convert("RGB"))
|
| 1359 |
+
g_old_aligned = align_check_arr[:, :, 0] # cyan source (Previous Revision)
|
| 1360 |
+
g_new_orig = align_check_arr[:, :, 1] # red source (New Doc)
|
| 1361 |
+
|
| 1362 |
+
h, w = g_old_aligned.shape
|
| 1363 |
+
cx, cy = w / 2.0, h / 2.0
|
| 1364 |
+
|
| 1365 |
+
# Build combined affine: scale about centre + translate
|
| 1366 |
+
# M = T(cx,cy) Β· S(scale) Β· T(-cx,-cy) Β· T(dx,dy)
|
| 1367 |
+
scale = float(scale) if scale and scale > 0 else 1.0
|
| 1368 |
+
# Combined 2Γ3 affine matrix
|
| 1369 |
+
M = np.float32([
|
| 1370 |
+
[scale, 0, dx + cx * (1 - scale)],
|
| 1371 |
+
[0, scale, dy + cy * (1 - scale)],
|
| 1372 |
+
])
|
| 1373 |
+
|
| 1374 |
+
g_new_transformed = cv2.warpAffine(
|
| 1375 |
+
g_new_orig, M, (w, h),
|
| 1376 |
+
flags=cv2.INTER_LINEAR,
|
| 1377 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 1378 |
+
borderValue=255,
|
| 1379 |
+
)
|
| 1380 |
+
|
| 1381 |
+
# Stack: R=old_aligned (cyan base), G=new_transformed, B=new_transformed (β red fringe)
|
| 1382 |
+
overlay = np.stack([g_old_aligned, g_new_transformed, g_new_transformed], axis=2)
|
| 1383 |
+
return Image.fromarray(overlay.astype(np.uint8))
|
| 1384 |
+
|
| 1385 |
+
|
| 1386 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1387 |
+
# GRADIO UI
|
| 1388 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1389 |
+
|
| 1390 |
+
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "styles.css"),
|
| 1391 |
+
encoding="utf-8") as _css_f:
|
| 1392 |
+
_CSS = _css_f.read()
|
| 1393 |
+
|
| 1394 |
+
_THEME = gr.themes.Base(
|
| 1395 |
+
primary_hue=gr.themes.colors.blue,
|
| 1396 |
+
neutral_hue=gr.themes.colors.gray,
|
| 1397 |
+
font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
|
| 1398 |
+
)
|
| 1399 |
+
|
| 1400 |
+
# Gradio 6+: theme & css are passed to launch(), not Blocks()
|
| 1401 |
+
with gr.Blocks(title="POWERGRID Document Auditor") as demo:
|
| 1402 |
+
|
| 1403 |
+
# ββ Header βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1404 |
+
_logo_tag = (
|
| 1405 |
+
f'<img src="{_LOGO_URI}" alt="POWERGRID Logo" />'
|
| 1406 |
+
if _LOGO_URI else
|
| 1407 |
+
'<span style="font-size:1.4rem;font-weight:900;color:#003087;letter-spacing:-1px;">PG</span>'
|
| 1408 |
+
)
|
| 1409 |
+
gr.HTML(f"""
|
| 1410 |
+
<div id="app-header">
|
| 1411 |
+
<div id="app-header-inner">
|
| 1412 |
+
<div id="app-header-logo">{_logo_tag}</div>
|
| 1413 |
+
<div id="app-header-text">
|
| 1414 |
+
<h1>POWERGRID Document Auditor</h1>
|
| 1415 |
+
<p>Power Grid Corporation of India Limited — AI-Powered Document Comparison</p>
|
| 1416 |
+
</div>
|
| 1417 |
+
</div>
|
| 1418 |
+
</div>
|
| 1419 |
+
""")
|
| 1420 |
+
|
| 1421 |
+
# (JS injected via demo.load below β see end of Blocks context)
|
| 1422 |
+
|
| 1423 |
+
# ββ Shared State βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1424 |
+
pages_state = gr.State(value=None)
|
| 1425 |
+
rotation_state = gr.State(value=0)
|
| 1426 |
+
nudge_x_state = gr.State(value=0) # manual X offset for red (New Doc) layer
|
| 1427 |
+
nudge_y_state = gr.State(value=0) # manual Y offset for red (New Doc) layer
|
| 1428 |
+
nudge_scale_state = gr.State(value=1.0) # manual scale for red (New Doc) layer
|
| 1429 |
+
region_coords_state = gr.State(value=None) # {x,y,width,height} in preview px; None = full page
|
| 1430 |
+
display_dpi_state = gr.State(value=72) # DPI used when rendering the region preview
|
| 1431 |
+
|
| 1432 |
+
# ββ Layout βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1433 |
+
with gr.Row(equal_height=False):
|
| 1434 |
+
|
| 1435 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1436 |
+
# LEFT PANE β inputs
|
| 1437 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1438 |
+
with gr.Column(scale=1, min_width=290, elem_id="left-panel"):
|
| 1439 |
+
|
| 1440 |
+
gr.HTML('<div class="section-label">Documents</div>')
|
| 1441 |
+
pdf_old = gr.File(label="Previous Revision PDF", file_types=[".pdf"])
|
| 1442 |
+
skip_old_p1 = gr.Checkbox(
|
| 1443 |
+
value=False,
|
| 1444 |
+
label="Skip cover page of Previous Revision",
|
| 1445 |
+
interactive=False,
|
| 1446 |
+
elem_classes=["skip-cb"],
|
| 1447 |
+
)
|
| 1448 |
+
|
| 1449 |
+
gr.HTML('<div class="section-divider"></div>')
|
| 1450 |
+
pdf_new = gr.File(label="Revised (New) PDF", file_types=[".pdf"])
|
| 1451 |
+
skip_new_p1 = gr.Checkbox(
|
| 1452 |
+
value=False,
|
| 1453 |
+
label="Skip cover page of New Revision",
|
| 1454 |
+
interactive=False,
|
| 1455 |
+
elem_classes=["skip-cb"],
|
| 1456 |
+
)
|
| 1457 |
+
|
| 1458 |
+
gr.HTML('<div class="section-divider"></div>')
|
| 1459 |
+
gr.HTML('<div class="section-label">Options</div>')
|
| 1460 |
+
enable_align = gr.Checkbox(
|
| 1461 |
+
value=True,
|
| 1462 |
+
label="Auto-align pages before comparing",
|
| 1463 |
+
info="Enable if documents were scanned or printed at different positions or scales.",
|
| 1464 |
+
)
|
| 1465 |
+
|
| 1466 |
+
gr.HTML('<div class="section-divider"></div>')
|
| 1467 |
+
gr.HTML('<div class="section-label">Compare Mode</div>')
|
| 1468 |
+
compare_mode = gr.Radio(
|
| 1469 |
+
choices=["Full Document", "Specific Pages"],
|
| 1470 |
+
value="Full Document",
|
| 1471 |
+
label="Compare Mode",
|
| 1472 |
+
show_label=False,
|
| 1473 |
+
elem_id="compare-mode-radio",
|
| 1474 |
+
)
|
| 1475 |
+
with gr.Row(visible=False, elem_id="specific-pages-row") as specific_pages_row:
|
| 1476 |
+
page_old_input = gr.Number(
|
| 1477 |
+
value=1, minimum=1, step=1, precision=0,
|
| 1478 |
+
label="Prev. Revision Page",
|
| 1479 |
+
elem_id="page-old-input",
|
| 1480 |
+
)
|
| 1481 |
+
page_new_input = gr.Number(
|
| 1482 |
+
value=1, minimum=1, step=1, precision=0,
|
| 1483 |
+
label="New Document Page",
|
| 1484 |
+
elem_id="page-new-input",
|
| 1485 |
+
)
|
| 1486 |
+
|
| 1487 |
+
# Sub-options shown when "Specific Pages" is selected
|
| 1488 |
+
with gr.Column(visible=False, elem_id="region-col") as region_col:
|
| 1489 |
+
page_compare_mode = gr.Radio(
|
| 1490 |
+
choices=["Full Page", "Specific Region"],
|
| 1491 |
+
value="Full Page",
|
| 1492 |
+
label="Page Comparison",
|
| 1493 |
+
show_label=True,
|
| 1494 |
+
elem_id="page-compare-mode-radio",
|
| 1495 |
+
)
|
| 1496 |
+
|
| 1497 |
+
# Region selection β gr.Image shows the page; canvas overlay captures bbox drag
|
| 1498 |
+
with gr.Column(visible=False, elem_id="region-preview-col") as region_preview_col:
|
| 1499 |
+
region_readout = gr.HTML(
|
| 1500 |
+
value='<div id="region-readout">No region selected β full page will be used</div>',
|
| 1501 |
+
elem_id="region-readout",
|
| 1502 |
+
)
|
| 1503 |
+
# gr.Image: Python pushes the page PIL image here (always visible in DOM)
|
| 1504 |
+
region_page_img = gr.Image(
|
| 1505 |
+
value=None,
|
| 1506 |
+
label=None,
|
| 1507 |
+
show_label=False,
|
| 1508 |
+
type="pil",
|
| 1509 |
+
interactive=False,
|
| 1510 |
+
elem_id="region-page-img",
|
| 1511 |
+
height=380,
|
| 1512 |
+
)
|
| 1513 |
+
# Coords textbox: JSβPython bridge β visible but CSS-collapsed to 0px
|
| 1514 |
+
region_coords_txt = gr.Textbox(
|
| 1515 |
+
value="",
|
| 1516 |
+
label=None,
|
| 1517 |
+
show_label=False,
|
| 1518 |
+
elem_id="region-coords-txt",
|
| 1519 |
+
elem_classes=["region-coords-hidden"],
|
| 1520 |
+
)
|
| 1521 |
+
clear_region_btn = gr.Button(
|
| 1522 |
+
"β Clear Region",
|
| 1523 |
+
size="sm",
|
| 1524 |
+
elem_id="clear-region-btn",
|
| 1525 |
+
)
|
| 1526 |
+
|
| 1527 |
+
gr.HTML('<div class="section-divider"></div>')
|
| 1528 |
+
run_btn = gr.Button("Run Audit", variant="primary", size="lg", elem_id="run-btn")
|
| 1529 |
+
|
| 1530 |
+
gr.HTML('<div class="section-divider"></div>')
|
| 1531 |
+
gr.HTML('<div class="section-label">Fine-Tune Alignment</div>')
|
| 1532 |
+
|
| 1533 |
+
# ββ MacBook-style arrow key D-pad βββββββββββββββββββββββββ
|
| 1534 |
+
# Row 1: [ β² ] (centred, half-row)
|
| 1535 |
+
with gr.Row(equal_height=True, elem_id="nudge-row-top"):
|
| 1536 |
+
gr.HTML('<div style="flex:1;min-width:0"></div>')
|
| 1537 |
+
nudge_up_btn = gr.Button("β²", elem_id="nudge-up", min_width=44, scale=0)
|
| 1538 |
+
gr.HTML('<div style="flex:1;min-width:0"></div>')
|
| 1539 |
+
|
| 1540 |
+
# Row 2: [ β ][ βΌ ][ βΆ ]
|
| 1541 |
+
with gr.Row(equal_height=True, elem_id="nudge-row-bot"):
|
| 1542 |
+
nudge_left_btn = gr.Button("β", elem_id="nudge-left", min_width=44, scale=0)
|
| 1543 |
+
nudge_down_btn = gr.Button("βΌ", elem_id="nudge-down", min_width=44, scale=0)
|
| 1544 |
+
nudge_right_btn = gr.Button("βΆ", elem_id="nudge-right", min_width=44, scale=0)
|
| 1545 |
+
|
| 1546 |
+
gr.HTML('<p class="nudge-tip">Tip: Run Audit resets alignment</p>')
|
| 1547 |
+
|
| 1548 |
+
nudge_step = gr.Number(
|
| 1549 |
+
value=1, minimum=1, maximum=100, step=1,
|
| 1550 |
+
label="Step Size (px)", precision=0,
|
| 1551 |
+
elem_id="nudge-step",
|
| 1552 |
+
)
|
| 1553 |
+
nudge_scale = gr.Number(
|
| 1554 |
+
value=1.0, minimum=0.10, maximum=10.0, step=0.005,
|
| 1555 |
+
label="Scale β Red Layer", precision=3,
|
| 1556 |
+
elem_id="nudge-scale",
|
| 1557 |
+
)
|
| 1558 |
+
nudge_readout = gr.HTML(
|
| 1559 |
+
value='<div id="nudge-readout-wrap">x = +0 px<br>y = +0 px<br>scale = 1.000</div>',
|
| 1560 |
+
elem_id="nudge-readout",
|
| 1561 |
+
)
|
| 1562 |
+
|
| 1563 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1564 |
+
# RIGHT PANE β results
|
| 1565 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1566 |
+
with gr.Column(scale=3, elem_id="right-panel"):
|
| 1567 |
+
|
| 1568 |
+
# ββ Toolbar: view tabs | rotation buttons ββ
|
| 1569 |
+
with gr.Row(elem_id="toolbar-row"):
|
| 1570 |
+
view_mode = gr.Radio(
|
| 1571 |
+
choices=["Auto-Overlay", "Previous Revision", "New Document"],
|
| 1572 |
+
value="Auto-Overlay",
|
| 1573 |
+
label="View",
|
| 1574 |
+
show_label=False,
|
| 1575 |
+
scale=1,
|
| 1576 |
+
min_width=320,
|
| 1577 |
+
elem_id="view-mode-radio",
|
| 1578 |
+
)
|
| 1579 |
+
gr.HTML('<div class="toolbar-sep"></div>')
|
| 1580 |
+
rot_left_btn = gr.Button("βΊ", scale=0, elem_id="rot-left", min_width=38)
|
| 1581 |
+
rot_right_btn = gr.Button("β»", scale=0, elem_id="rot-right", min_width=38)
|
| 1582 |
+
|
| 1583 |
+
# ββ Page slider (shown only after audit runs) ββββββββββββββ
|
| 1584 |
+
page_slider = gr.Slider(
|
| 1585 |
+
minimum=1, maximum=1, value=1, step=1,
|
| 1586 |
+
label="Page",
|
| 1587 |
+
visible=False,
|
| 1588 |
+
elem_id="page-slider",
|
| 1589 |
+
)
|
| 1590 |
+
|
| 1591 |
+
# Hidden state
|
| 1592 |
+
page_num_state = gr.State(value=1)
|
| 1593 |
+
total_pages_state = gr.State(value=1)
|
| 1594 |
+
|
| 1595 |
+
result_image = gr.Image(
|
| 1596 |
+
label="",
|
| 1597 |
+
type="pil",
|
| 1598 |
+
height=720,
|
| 1599 |
+
interactive=False,
|
| 1600 |
+
elem_id="result-image",
|
| 1601 |
+
)
|
| 1602 |
+
|
| 1603 |
+
gr.HTML("""
|
| 1604 |
+
<div id="legend-bar" style="display:flex; gap:18px; flex-wrap:wrap; align-items:center;">
|
| 1605 |
+
<span style="font-size:0.60rem;font-weight:700;color:#8BA0BB;text-transform:uppercase;
|
| 1606 |
+
letter-spacing:0.11em;white-space:nowrap;flex-shrink:0;">Overlay Legend</span>
|
| 1607 |
+
<span style="display:flex;align-items:center;gap:6px;">
|
| 1608 |
+
<span style="width:12px;height:12px;border-radius:3px;background:#7A7A7A;
|
| 1609 |
+
flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
|
| 1610 |
+
<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
|
| 1611 |
+
<b style="color:#0F1C2E;font-weight:600;">Gray</b> — Unchanged</span>
|
| 1612 |
+
</span>
|
| 1613 |
+
<span style="display:flex;align-items:center;gap:6px;">
|
| 1614 |
+
<span style="width:12px;height:12px;border-radius:3px;background:#00BBBB;
|
| 1615 |
+
flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
|
| 1616 |
+
<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
|
| 1617 |
+
<b style="color:#007070;font-weight:600;">Cyan</b> — Previous Revision</span>
|
| 1618 |
+
</span>
|
| 1619 |
+
<span style="display:flex;align-items:center;gap:6px;">
|
| 1620 |
+
<span style="width:12px;height:12px;border-radius:3px;background:#EE3333;
|
| 1621 |
+
flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
|
| 1622 |
+
<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
|
| 1623 |
+
<b style="color:#BB0000;font-weight:600;">Red</b> — New Document</span>
|
| 1624 |
+
</span>
|
| 1625 |
+
</div>
|
| 1626 |
+
""")
|
| 1627 |
+
|
| 1628 |
+
with gr.Row():
|
| 1629 |
+
pdf_output = gr.File(label="β¬οΈ Download Result PDF")
|
| 1630 |
+
|
| 1631 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1632 |
+
# EVENT HANDLERS
|
| 1633 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1634 |
+
|
| 1635 |
+
def on_pdf_upload(pdf_file):
|
| 1636 |
+
"""Disable skip-cover-page checkbox when uploaded PDF has only 1 page."""
|
| 1637 |
+
if pdf_file is None:
|
| 1638 |
+
return gr.update(interactive=False, value=False)
|
| 1639 |
+
try:
|
| 1640 |
+
doc = fitz.open(pdf_file.name)
|
| 1641 |
+
n = len(doc)
|
| 1642 |
+
doc.close()
|
| 1643 |
+
if n <= 1:
|
| 1644 |
+
return gr.update(interactive=False, value=False)
|
| 1645 |
+
else:
|
| 1646 |
+
return gr.update(interactive=True)
|
| 1647 |
+
except Exception:
|
| 1648 |
+
return gr.update(interactive=True)
|
| 1649 |
+
|
| 1650 |
+
def _readout_html(nx: int, ny: int, ns: float) -> str:
|
| 1651 |
+
return (
|
| 1652 |
+
f'<div id="nudge-readout-wrap">'
|
| 1653 |
+
f'x = {nx:+d} px<br>'
|
| 1654 |
+
f'y = {ny:+d} px<br>'
|
| 1655 |
+
f'scale = {ns:.3f}'
|
| 1656 |
+
f'</div>'
|
| 1657 |
+
)
|
| 1658 |
+
|
| 1659 |
+
def on_compare_mode_change(mode):
|
| 1660 |
+
"""Show/hide the specific-page number inputs and region sub-options."""
|
| 1661 |
+
show = (mode == "Specific Pages")
|
| 1662 |
+
return gr.update(visible=show), gr.update(visible=show)
|
| 1663 |
+
|
| 1664 |
+
def on_load_preview(pdf_new_f, pg_new):
|
| 1665 |
+
"""Render the New Doc page at 72 DPI and return as PIL image for inline display."""
|
| 1666 |
+
if pdf_new_f is None:
|
| 1667 |
+
raise gr.Error("Please upload the New Document PDF first.")
|
| 1668 |
+
preview_dpi = 72
|
| 1669 |
+
doc = fitz.open(pdf_new_f.name)
|
| 1670 |
+
idx = max(0, int(pg_new or 1) - 1)
|
| 1671 |
+
idx = min(idx, len(doc) - 1)
|
| 1672 |
+
arr = _page_to_rgb(doc, idx, preview_dpi)
|
| 1673 |
+
doc.close()
|
| 1674 |
+
pil_img = Image.fromarray(arr)
|
| 1675 |
+
readout = '<div id="region-readout">Draw a box on the image below to select a region</div>'
|
| 1676 |
+
# returns: pil_img, coords_txt_reset, coords_state_reset, display_dpi, readout
|
| 1677 |
+
return pil_img, "", None, preview_dpi, readout
|
| 1678 |
+
|
| 1679 |
+
def on_region_coords_change(coords_txt):
|
| 1680 |
+
"""Parse 'x,y,w,h' string written by JS canvas into region_coords_state dict."""
|
| 1681 |
+
if not coords_txt or coords_txt.strip() == "":
|
| 1682 |
+
return None, '<div id="region-readout">No region selected β full page will be used</div>'
|
| 1683 |
+
try:
|
| 1684 |
+
parts = [float(v) for v in coords_txt.strip().split(",")]
|
| 1685 |
+
x, y, w, h = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])
|
| 1686 |
+
if w < 5 or h < 5:
|
| 1687 |
+
return None, '<div id="region-readout">Region too small β drag a larger area</div>'
|
| 1688 |
+
coords = {"x": x, "y": y, "width": w, "height": h}
|
| 1689 |
+
readout = (
|
| 1690 |
+
f'<div id="region-readout">'
|
| 1691 |
+
f'β
Region: ({x}, {y}) β ({x+w}, {y+h})'
|
| 1692 |
+
f' | {w}×{h} px'
|
| 1693 |
+
f'</div>'
|
| 1694 |
+
)
|
| 1695 |
+
return coords, readout
|
| 1696 |
+
except Exception:
|
| 1697 |
+
return None, '<div id="region-readout">Invalid region β drag again</div>'
|
| 1698 |
+
|
| 1699 |
+
def on_clear_region():
|
| 1700 |
+
"""Reset region β clear coords textbox and state (image stays, JS clears the overlay)."""
|
| 1701 |
+
return "", None, '<div id="region-readout">Draw a box on the image below to select a region</div>'
|
| 1702 |
+
|
| 1703 |
+
def on_run(pdf_old_f, pdf_new_f, skip_old, skip_new, align,
|
| 1704 |
+
cmp_mode, pg_old, pg_new,
|
| 1705 |
+
pg_cmp_mode, region_coords, display_dpi,
|
| 1706 |
+
progress=gr.Progress()):
|
| 1707 |
+
page_results, _summary, pdf_path, _, _ = run_comparison(
|
| 1708 |
+
pdf_old_f, pdf_new_f, skip_old, skip_new, align,
|
| 1709 |
+
cmp_mode, pg_old, pg_new,
|
| 1710 |
+
pg_cmp_mode, region_coords, display_dpi,
|
| 1711 |
+
progress
|
| 1712 |
+
)
|
| 1713 |
+
n_pages = len(page_results)
|
| 1714 |
+
first_img = page_results[0]["align_check"] if page_results else None
|
| 1715 |
+
return (
|
| 1716 |
+
page_results,
|
| 1717 |
+
0, # rotation reset
|
| 1718 |
+
0, # nudge_x reset
|
| 1719 |
+
0, # nudge_y reset
|
| 1720 |
+
1.0, # nudge_scale reset
|
| 1721 |
+
1, # page_num reset to 1
|
| 1722 |
+
n_pages,# total_pages
|
| 1723 |
+
pdf_path,
|
| 1724 |
+
first_img,
|
| 1725 |
+
_readout_html(0, 0, 1.0),
|
| 1726 |
+
gr.update(visible=n_pages > 1, minimum=1, maximum=n_pages, value=1),
|
| 1727 |
+
)
|
| 1728 |
+
|
| 1729 |
+
def on_view_change(view, pg, total, pages_data, rot, nx, ny, ns):
|
| 1730 |
+
return get_page_view(pg, pages_data, view, 0, nx, ny, ns), 0
|
| 1731 |
+
|
| 1732 |
+
def on_rot_left(pg, total, pages_data, view, rot, nx, ny, ns):
|
| 1733 |
+
new_rot = (rot + 90) % 360
|
| 1734 |
+
return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot
|
| 1735 |
+
|
| 1736 |
+
def on_rot_right(pg, total, pages_data, view, rot, nx, ny, ns):
|
| 1737 |
+
new_rot = (rot - 90) % 360
|
| 1738 |
+
return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot
|
| 1739 |
+
|
| 1740 |
+
def on_pg_slide(pg, total, pages_data, view, rot, nx, ny, ns):
|
| 1741 |
+
pg = int(pg or 1)
|
| 1742 |
+
img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
|
| 1743 |
+
return img, pg
|
| 1744 |
+
|
| 1745 |
+
# ββ Nudge handlers (arrow buttons + scale change) βββββββββββββββββ
|
| 1746 |
+
def on_nudge(direction: str, pg, total, pages_data, view, rot, nx, ny, ns, step):
|
| 1747 |
+
step = int(step or 1)
|
| 1748 |
+
if direction == "left": nx -= step
|
| 1749 |
+
elif direction == "right": nx += step
|
| 1750 |
+
elif direction == "up": ny -= step
|
| 1751 |
+
elif direction == "down": ny += step
|
| 1752 |
+
img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
|
| 1753 |
+
return img, nx, ny, ns, _readout_html(nx, ny, ns)
|
| 1754 |
+
|
| 1755 |
+
def on_scale_change(sc, pg, total, pages_data, view, rot, nx, ny):
|
| 1756 |
+
ns = float(sc) if sc else 1.0
|
| 1757 |
+
img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
|
| 1758 |
+
return img, ns, _readout_html(nx, ny, ns)
|
| 1759 |
+
|
| 1760 |
+
pdf_old.change(fn=on_pdf_upload, inputs=[pdf_old], outputs=[skip_old_p1])
|
| 1761 |
+
pdf_new.change(fn=on_pdf_upload, inputs=[pdf_new], outputs=[skip_new_p1])
|
| 1762 |
+
|
| 1763 |
+
# Show / hide specific-page inputs and region sub-options when compare mode changes
|
| 1764 |
+
compare_mode.change(
|
| 1765 |
+
fn=on_compare_mode_change,
|
| 1766 |
+
inputs=[compare_mode],
|
| 1767 |
+
outputs=[specific_pages_row, region_col],
|
| 1768 |
+
)
|
| 1769 |
+
|
| 1770 |
+
# Show / hide the region preview block AND auto-load the preview
|
| 1771 |
+
# _preview_outputs: [region_page_img, region_coords_txt, coords_state, display_dpi_state, region_readout]
|
| 1772 |
+
_preview_outputs = [region_page_img, region_coords_txt,
|
| 1773 |
+
region_coords_state, display_dpi_state, region_readout]
|
| 1774 |
+
|
| 1775 |
+
def on_page_compare_mode_change(sub_mode, pdf_new_f, pg_new):
|
| 1776 |
+
show = (sub_mode == "Specific Region")
|
| 1777 |
+
col_update = gr.update(visible=show)
|
| 1778 |
+
if show:
|
| 1779 |
+
try:
|
| 1780 |
+
pil_img, ctxt, coords, dpi, rdout = on_load_preview(pdf_new_f, pg_new)
|
| 1781 |
+
return col_update, pil_img, ctxt, coords, dpi, rdout
|
| 1782 |
+
except Exception:
|
| 1783 |
+
pass
|
| 1784 |
+
blank_readout = '<div id="region-readout">No region selected β full page will be used</div>'
|
| 1785 |
+
return col_update, None, "", None, 72, blank_readout
|
| 1786 |
+
|
| 1787 |
+
page_compare_mode.change(
|
| 1788 |
+
fn=on_page_compare_mode_change,
|
| 1789 |
+
inputs=[page_compare_mode, pdf_new, page_new_input],
|
| 1790 |
+
outputs=[region_preview_col] + _preview_outputs,
|
| 1791 |
+
)
|
| 1792 |
+
|
| 1793 |
+
# Re-load preview when the New Doc page number changes (if Specific Region is active)
|
| 1794 |
+
def on_page_new_change(pg_new, pdf_new_f, sub_mode):
|
| 1795 |
+
if sub_mode == "Specific Region" and pdf_new_f is not None:
|
| 1796 |
+
try:
|
| 1797 |
+
return on_load_preview(pdf_new_f, pg_new)
|
| 1798 |
+
except Exception:
|
| 1799 |
+
pass
|
| 1800 |
+
blank_readout = '<div id="region-readout">No region selected β full page will be used</div>'
|
| 1801 |
+
return None, "", None, 72, blank_readout
|
| 1802 |
+
|
| 1803 |
+
page_new_input.change(
|
| 1804 |
+
fn=on_page_new_change,
|
| 1805 |
+
inputs=[page_new_input, pdf_new, page_compare_mode],
|
| 1806 |
+
outputs=_preview_outputs,
|
| 1807 |
+
)
|
| 1808 |
+
|
| 1809 |
+
# JS canvas overlay writes "x,y,w,h" into region_coords_txt when drag ends β parse to dict
|
| 1810 |
+
region_coords_txt.change(
|
| 1811 |
+
fn=on_region_coords_change,
|
| 1812 |
+
inputs=[region_coords_txt],
|
| 1813 |
+
outputs=[region_coords_state, region_readout],
|
| 1814 |
+
show_progress="hidden",
|
| 1815 |
+
show_progress_on=[],
|
| 1816 |
+
)
|
| 1817 |
+
|
| 1818 |
+
# Clear region button β clear coords, JS overlay self-clears on next poll
|
| 1819 |
+
clear_region_btn.click(
|
| 1820 |
+
fn=on_clear_region,
|
| 1821 |
+
inputs=None,
|
| 1822 |
+
outputs=[region_coords_txt, region_coords_state, region_readout],
|
| 1823 |
+
)
|
| 1824 |
+
|
| 1825 |
+
run_btn.click(
|
| 1826 |
+
fn=on_run,
|
| 1827 |
+
inputs=[pdf_old, pdf_new, skip_old_p1, skip_new_p1, enable_align,
|
| 1828 |
+
compare_mode, page_old_input, page_new_input,
|
| 1829 |
+
page_compare_mode, region_coords_state, display_dpi_state],
|
| 1830 |
+
outputs=[pages_state, rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state,
|
| 1831 |
+
page_num_state, total_pages_state,
|
| 1832 |
+
pdf_output, result_image, nudge_readout, page_slider],
|
| 1833 |
+
)
|
| 1834 |
+
|
| 1835 |
+
# View-mode tab change
|
| 1836 |
+
view_mode.change(
|
| 1837 |
+
fn=on_view_change,
|
| 1838 |
+
inputs=[view_mode, page_num_state, total_pages_state, pages_state, rotation_state,
|
| 1839 |
+
nudge_x_state, nudge_y_state, nudge_scale_state],
|
| 1840 |
+
outputs=[result_image, rotation_state],
|
| 1841 |
+
show_progress="hidden",
|
| 1842 |
+
show_progress_on=[],
|
| 1843 |
+
)
|
| 1844 |
+
|
| 1845 |
+
# Rotation buttons
|
| 1846 |
+
rot_left_btn.click(
|
| 1847 |
+
fn=on_rot_left,
|
| 1848 |
+
inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
|
| 1849 |
+
nudge_x_state, nudge_y_state, nudge_scale_state],
|
| 1850 |
+
outputs=[result_image, rotation_state],
|
| 1851 |
+
show_progress="hidden",
|
| 1852 |
+
show_progress_on=[],
|
| 1853 |
+
)
|
| 1854 |
+
rot_right_btn.click(
|
| 1855 |
+
fn=on_rot_right,
|
| 1856 |
+
inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
|
| 1857 |
+
nudge_x_state, nudge_y_state, nudge_scale_state],
|
| 1858 |
+
outputs=[result_image, rotation_state],
|
| 1859 |
+
show_progress="hidden",
|
| 1860 |
+
show_progress_on=[],
|
| 1861 |
+
)
|
| 1862 |
+
|
| 1863 |
+
# Page slider
|
| 1864 |
+
page_slider.change(
|
| 1865 |
+
fn=on_pg_slide,
|
| 1866 |
+
inputs=[page_slider, total_pages_state, pages_state, view_mode,
|
| 1867 |
+
rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state],
|
| 1868 |
+
outputs=[result_image, page_num_state],
|
| 1869 |
+
show_progress="hidden",
|
| 1870 |
+
show_progress_on=[],
|
| 1871 |
+
)
|
| 1872 |
+
|
| 1873 |
+
# ββ Nudge arrow buttons βββββββββββββββββββββββββββββββββββββββββββ
|
| 1874 |
+
_nudge_inputs = [page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
|
| 1875 |
+
nudge_x_state, nudge_y_state, nudge_scale_state, nudge_step]
|
| 1876 |
+
_nudge_outputs = [result_image, nudge_x_state, nudge_y_state,
|
| 1877 |
+
nudge_scale_state, nudge_readout]
|
| 1878 |
+
|
| 1879 |
+
nudge_left_btn.click(
|
| 1880 |
+
fn=lambda *a: on_nudge("left", *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
|
| 1881 |
+
show_progress="hidden", show_progress_on=[])
|
| 1882 |
+
nudge_right_btn.click(
|
| 1883 |
+
fn=lambda *a: on_nudge("right", *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
|
| 1884 |
+
show_progress="hidden", show_progress_on=[])
|
| 1885 |
+
nudge_up_btn.click(
|
| 1886 |
+
fn=lambda *a: on_nudge("up", *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
|
| 1887 |
+
show_progress="hidden", show_progress_on=[])
|
| 1888 |
+
nudge_down_btn.click(
|
| 1889 |
+
fn=lambda *a: on_nudge("down", *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
|
| 1890 |
+
show_progress="hidden", show_progress_on=[])
|
| 1891 |
+
|
| 1892 |
+
# ββ Scale number input (live update on change) ββββββββββββββββββββ
|
| 1893 |
+
nudge_scale.change(
|
| 1894 |
+
fn=on_scale_change,
|
| 1895 |
+
inputs=[nudge_scale, page_num_state, total_pages_state, pages_state, view_mode,
|
| 1896 |
+
rotation_state, nudge_x_state, nudge_y_state],
|
| 1897 |
+
outputs=[result_image, nudge_scale_state, nudge_readout],
|
| 1898 |
+
show_progress="hidden",
|
| 1899 |
+
show_progress_on=[],
|
| 1900 |
+
)
|
| 1901 |
+
|
| 1902 |
+
# ββ Inline canvas JS β overlays a transparent draw canvas on the gr.Image ββ
|
| 1903 |
+
_INLINE_CANVAS_JS = """
|
| 1904 |
+
() => {
|
| 1905 |
+
let _overlay = null, _ctx = null;
|
| 1906 |
+
let _dragging = false, _sx = 0, _sy = 0, _sel = null;
|
| 1907 |
+
let _lastCoords = '';
|
| 1908 |
+
|
| 1909 |
+
function getImgEl() {
|
| 1910 |
+
// The rendered <img> inside the gr.Image component
|
| 1911 |
+
const wrap = document.getElementById('region-page-img');
|
| 1912 |
+
return wrap ? wrap.querySelector('img') : null;
|
| 1913 |
+
}
|
| 1914 |
+
|
| 1915 |
+
function getCoordsEl() {
|
| 1916 |
+
const wrap = document.getElementById('region-coords-txt');
|
| 1917 |
+
return wrap ? wrap.querySelector('textarea') : null;
|
| 1918 |
+
}
|
| 1919 |
+
|
| 1920 |
+
function syncOverlay() {
|
| 1921 |
+
if (!_overlay) return;
|
| 1922 |
+
const img = getImgEl();
|
| 1923 |
+
if (!img || !img.src || img.src.startsWith('data:image/gif')) return;
|
| 1924 |
+
const r = img.getBoundingClientRect();
|
| 1925 |
+
const pr = img.parentElement.getBoundingClientRect();
|
| 1926 |
+
_overlay.style.left = (r.left - pr.left) + 'px';
|
| 1927 |
+
_overlay.style.top = (r.top - pr.top) + 'px';
|
| 1928 |
+
_overlay.style.width = r.width + 'px';
|
| 1929 |
+
_overlay.style.height = r.height + 'px';
|
| 1930 |
+
if (_overlay.width !== Math.round(r.width) || _overlay.height !== Math.round(r.height)) {
|
| 1931 |
+
_overlay.width = Math.round(r.width);
|
| 1932 |
+
_overlay.height = Math.round(r.height);
|
| 1933 |
+
redraw();
|
| 1934 |
+
}
|
| 1935 |
+
}
|
| 1936 |
+
|
| 1937 |
+
function toCanvas(cx, cy) {
|
| 1938 |
+
const r = _overlay.getBoundingClientRect();
|
| 1939 |
+
return { x: (cx - r.left) * _overlay.width / r.width,
|
| 1940 |
+
y: (cy - r.top) * _overlay.height / r.height };
|
| 1941 |
+
}
|
| 1942 |
+
|
| 1943 |
+
function redraw() {
|
| 1944 |
+
if (!_ctx || !_overlay.width) return;
|
| 1945 |
+
_ctx.clearRect(0, 0, _overlay.width, _overlay.height);
|
| 1946 |
+
if (_sel) {
|
| 1947 |
+
_ctx.strokeStyle = '#00BBBB';
|
| 1948 |
+
_ctx.lineWidth = Math.max(2, _overlay.width / 400);
|
| 1949 |
+
_ctx.strokeRect(_sel.x, _sel.y, _sel.w, _sel.h);
|
| 1950 |
+
_ctx.fillStyle = 'rgba(0,187,187,0.15)';
|
| 1951 |
+
_ctx.fillRect(_sel.x, _sel.y, _sel.w, _sel.h);
|
| 1952 |
+
}
|
| 1953 |
+
}
|
| 1954 |
+
|
| 1955 |
+
function pushCoords() {
|
| 1956 |
+
const el = getCoordsEl();
|
| 1957 |
+
if (!el || !_sel) return;
|
| 1958 |
+
// Scale from display px back to natural image px
|
| 1959 |
+
const img = getImgEl();
|
| 1960 |
+
if (!img) return;
|
| 1961 |
+
const scaleX = img.naturalWidth / _overlay.width;
|
| 1962 |
+
const scaleY = img.naturalHeight / _overlay.height;
|
| 1963 |
+
const val = Math.round(_sel.x * scaleX) + ',' +
|
| 1964 |
+
Math.round(_sel.y * scaleY) + ',' +
|
| 1965 |
+
Math.round(_sel.w * scaleX) + ',' +
|
| 1966 |
+
Math.round(_sel.h * scaleY);
|
| 1967 |
+
const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value').set;
|
| 1968 |
+
setter.call(el, val);
|
| 1969 |
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
| 1970 |
+
}
|
| 1971 |
+
|
| 1972 |
+
function setupOverlay() {
|
| 1973 |
+
const imgWrap = document.getElementById('region-page-img');
|
| 1974 |
+
if (!imgWrap) return false;
|
| 1975 |
+
// Make sure parent is positioned
|
| 1976 |
+
const parent = imgWrap.querySelector('.image-container') || imgWrap;
|
| 1977 |
+
if (getComputedStyle(parent).position === 'static') parent.style.position = 'relative';
|
| 1978 |
+
|
| 1979 |
+
if (!_overlay) {
|
| 1980 |
+
_overlay = document.createElement('canvas');
|
| 1981 |
+
_overlay.id = 'region-draw-overlay';
|
| 1982 |
+
_overlay.style.cssText = 'position:absolute;top:0;left:0;cursor:crosshair;z-index:10;pointer-events:all;';
|
| 1983 |
+
parent.appendChild(_overlay);
|
| 1984 |
+
_ctx = _overlay.getContext('2d');
|
| 1985 |
+
|
| 1986 |
+
_overlay.addEventListener('mousedown', function(e) {
|
| 1987 |
+
const p = toCanvas(e.clientX, e.clientY);
|
| 1988 |
+
_sx = p.x; _sy = p.y; _sel = null; _dragging = true; e.preventDefault();
|
| 1989 |
+
});
|
| 1990 |
+
_overlay.addEventListener('mousemove', function(e) {
|
| 1991 |
+
if (!_dragging) return;
|
| 1992 |
+
const p = toCanvas(e.clientX, e.clientY);
|
| 1993 |
+
_sel = { x: Math.min(_sx, p.x), y: Math.min(_sy, p.y),
|
| 1994 |
+
w: Math.abs(p.x - _sx), h: Math.abs(p.y - _sy) };
|
| 1995 |
+
redraw(); e.preventDefault();
|
| 1996 |
+
});
|
| 1997 |
+
_overlay.addEventListener('mouseup', function(e) {
|
| 1998 |
+
if (!_dragging) return; _dragging = false;
|
| 1999 |
+
if (!_sel || _sel.w < 5 || _sel.h < 5) { _sel = null; redraw(); return; }
|
| 2000 |
+
redraw(); pushCoords(); e.preventDefault();
|
| 2001 |
+
});
|
| 2002 |
+
}
|
| 2003 |
+
return true;
|
| 2004 |
+
}
|
| 2005 |
+
|
| 2006 |
+
// Poll every 300ms: sync overlay size, watch for cleared coords
|
| 2007 |
+
setInterval(function() {
|
| 2008 |
+
setupOverlay();
|
| 2009 |
+
syncOverlay();
|
| 2010 |
+
|
| 2011 |
+
// Clear overlay when coords textbox is wiped by Clear button
|
| 2012 |
+
const el = getCoordsEl();
|
| 2013 |
+
if (el) {
|
| 2014 |
+
const cur = el.value;
|
| 2015 |
+
if (cur !== _lastCoords) {
|
| 2016 |
+
_lastCoords = cur;
|
| 2017 |
+
if (cur === '') { _sel = null; redraw(); }
|
| 2018 |
+
}
|
| 2019 |
+
}
|
| 2020 |
+
}, 300);
|
| 2021 |
+
}
|
| 2022 |
+
"""
|
| 2023 |
+
demo.load(fn=None, js=_INLINE_CANVAS_JS)
|
| 2024 |
+
|
| 2025 |
+
|
| 2026 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2027 |
+
# ENTRY POINT
|
| 2028 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2029 |
+
|
| 2030 |
+
if __name__ == "__main__":
|
| 2031 |
+
import socket as _socket
|
| 2032 |
+
def _find_free_port(start: int = 7860, end: int = 7880) -> int:
|
| 2033 |
+
for p in range(start, end + 1):
|
| 2034 |
+
with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as s:
|
| 2035 |
+
try:
|
| 2036 |
+
s.bind(("", p))
|
| 2037 |
+
return p
|
| 2038 |
+
except OSError:
|
| 2039 |
+
continue
|
| 2040 |
+
return start # fallback β Gradio will error with a clear message
|
| 2041 |
+
|
| 2042 |
+
_port = _find_free_port()
|
| 2043 |
+
print(f"\nπ POWERGRID Document Auditor β http://localhost:{_port}\n")
|
| 2044 |
+
demo.queue(default_concurrency_limit=20).launch(
|
| 2045 |
+
server_name="0.0.0.0",
|
| 2046 |
+
server_port=_port,
|
| 2047 |
+
share=False,
|
| 2048 |
+
show_error=True,
|
| 2049 |
+
theme=_THEME,
|
| 2050 |
+
css=_CSS,
|
| 2051 |
+
)
|