Spaces:
Running
Running
manpreet88 commited on
Commit ·
320fafc
1
Parent(s): f55ed04
Create gradio_interface.py
Browse files- PolyAgent/gradio_interface.py +1417 -0
PolyAgent/gradio_interface.py
ADDED
|
@@ -0,0 +1,1417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, List, Tuple, Optional
|
| 8 |
+
from urllib.parse import urlparse
|
| 9 |
+
|
| 10 |
+
# Load .env if present so OPENAI_API_KEY/OPENAI_MODEL are available
|
| 11 |
+
try:
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
+
load_dotenv()
|
| 15 |
+
except Exception:
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
import gradio as gr
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
from orchestrator_updated import PolymerOrchestrator, OrchestratorConfig
|
| 22 |
+
except Exception as e:
|
| 23 |
+
raise ImportError(
|
| 24 |
+
"Could not import PolymerOrchestrator from orchestrator_updated.py. "
|
| 25 |
+
"Ensure the updated orchestrator file is present. "
|
| 26 |
+
f"Original error: {e}"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# =============================================================================
|
| 31 |
+
# DOI NORMALIZATION HELPERS (UI layer must match orchestrator behavior)
|
| 32 |
+
# =============================================================================
|
| 33 |
+
_DOI_RE = re.compile(r"^10\.\d{4,9}/\S+$", re.IGNORECASE)
|
| 34 |
+
|
| 35 |
+
def normalize_doi(raw: str) -> Optional[str]:
|
| 36 |
+
if not isinstance(raw, str):
|
| 37 |
+
return None
|
| 38 |
+
s = raw.strip()
|
| 39 |
+
if not s:
|
| 40 |
+
return None
|
| 41 |
+
s = re.sub(r"^(?:https?://(?:dx\.)?doi\.org/)", "", s, flags=re.IGNORECASE)
|
| 42 |
+
s = re.sub(r"^doi:\s*", "", s, flags=re.IGNORECASE)
|
| 43 |
+
s = s.rstrip(").,;]}")
|
| 44 |
+
return s if _DOI_RE.match(s) else None
|
| 45 |
+
|
| 46 |
+
def doi_to_url(doi: str) -> str:
|
| 47 |
+
return f"https://doi.org/{doi}"
|
| 48 |
+
|
| 49 |
+
# -----------------------------------------------------------------------------
|
| 50 |
+
# Console defaults (no UI controls for these)
|
| 51 |
+
# -----------------------------------------------------------------------------
|
| 52 |
+
DEFAULT_CASE_BRIEF = (
|
| 53 |
+
"We are developing a polymer film for high-barrier flexible packaging (food-contact). "
|
| 54 |
+
"We need improved oxygen and water-vapor barrier while maintaining practical melt-processability "
|
| 55 |
+
"(film extrusion/cast). Please use web_search to ground your recommendations in recent literature "
|
| 56 |
+
"(last 5–10 years) on barrier improvement strategies (e.g., copolymerization, aromatic content, "
|
| 57 |
+
"rigid side groups, crystallinity control, chain stiffness, and compatibilization). "
|
| 58 |
+
"Constraints: avoid halogens; prioritize monomers with existing commercial suppliers; "
|
| 59 |
+
"avoid overly brittle formulations."
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
DEFAULT_PROPERTY_NAME = "glass transition"
|
| 63 |
+
DEFAULT_SEED_PSMILES = "[*]CC(=O)OCCOCCOC(=O)C[*]"
|
| 64 |
+
DEFAULT_LITERATURE_QUERY = (
|
| 65 |
+
"high barrier flexible packaging polyester copolymer Tg tuning oxygen permeability water vapor "
|
| 66 |
+
"rigid aromatic units side groups 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025"
|
| 67 |
+
)
|
| 68 |
+
DEFAULT_TARGET_VALUE = 60.0
|
| 69 |
+
DEFAULT_NUM_GEN_SAMPLES = 6
|
| 70 |
+
DEFAULT_FETCH_TOP_N = 3
|
| 71 |
+
|
| 72 |
+
# Increased to help ensure >=10 citations in typical runs
|
| 73 |
+
DEFAULT_SEARCH_ROWS = 12
|
| 74 |
+
|
| 75 |
+
# Property-specific fallback targets (ONLY used when generation is requested but target not found in questions)
|
| 76 |
+
DEFAULT_TARGET_BY_PROPERTY = {
|
| 77 |
+
"glass transition": 60.0, # °C (example placeholder)
|
| 78 |
+
"density": 1.20, # g/cm^3 (example placeholder)
|
| 79 |
+
"melting": 150.0, # °C (example placeholder)
|
| 80 |
+
"specific volume": 0.85, # cm^3/g (example placeholder)
|
| 81 |
+
"thermal decomposition": 350.0, # °C (example placeholder)
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# -----------------------------------------------------------------------------
|
| 85 |
+
# NEW: Run instructions bubble (shown on load and retained)
|
| 86 |
+
# -----------------------------------------------------------------------------
|
| 87 |
+
RUN_INSTRUCTIONS_MD = (
|
| 88 |
+
"### How to run PolyAgent (one-time setup)\n"
|
| 89 |
+
"\n"
|
| 90 |
+
"**1) Environment**\n"
|
| 91 |
+
"- Activate your conda/venv where this repository is installed.\n"
|
| 92 |
+
"- Install required Python packages (project-specific; example):\n"
|
| 93 |
+
" - `pip install gradio torch transformers numpy joblib sentencepiece requests beautifulsoup4`\n"
|
| 94 |
+
" - If you want PNG visuals and RDKit validation: install RDKit (recommended via conda-forge).\n"
|
| 95 |
+
"\n"
|
| 96 |
+
"**2) Required model/artifact paths**\n"
|
| 97 |
+
"- Ensure these paths exist and contain the expected artifacts (as configured in `OrchestratorConfig`):\n"
|
| 98 |
+
" - `cl_weights_path` (CL encoder weights)\n"
|
| 99 |
+
" - `DOWNSTREAM_BESTWEIGHTS_5M_DIR` (property heads)\n"
|
| 100 |
+
" - `INVERSE_DESIGN_5M_DIR` (inverse design generator bundles)\n"
|
| 101 |
+
" - `spm_5M.model` (SentencePiece model)\n"
|
| 102 |
+
"\n"
|
| 103 |
+
"**3) Required environment variables**\n"
|
| 104 |
+
"- `OPENAI_API_KEY` (required for planning + composed answers)\n"
|
| 105 |
+
"- Optional:\n"
|
| 106 |
+
" - `OPENAI_MODEL` (defaults to `gpt-4.1` in config)\n"
|
| 107 |
+
" - `HF_TOKEN` (recommended for `materials.selfies-ted` model downloads)\n"
|
| 108 |
+
" - `SPRINGER_NATURE_API_KEY`, `SEMANTIC_SCHOLAR_API_KEY` (improves web_search coverage)\n"
|
| 109 |
+
"\n"
|
| 110 |
+
"**4) Start the interface**\n"
|
| 111 |
+
"- Run:\n"
|
| 112 |
+
" - `python gradio_interface_for_polymer_orchestrator.py --server-name 0.0.0.0 --server-port 7860`\n"
|
| 113 |
+
"\n"
|
| 114 |
+
"**5) How to prompt in the Console**\n"
|
| 115 |
+
"- To trigger inverse design: include a generation intent (e.g., “generate”, “inverse design”) and a target value.\n"
|
| 116 |
+
"- You can specify `target_value` in text (examples): `target_value=60`, `target: 60`, `Tg 60`.\n"
|
| 117 |
+
"- To seed with a polymer, include a pSMILES in a code block or via `seed_psmiles:`.\n"
|
| 118 |
+
"- To control citation count: ask explicitly (e.g., “cite 10 papers”).\n"
|
| 119 |
+
"\n"
|
| 120 |
+
"**Notes**\n"
|
| 121 |
+
"- Tool facts are cited as `[T]`.\n"
|
| 122 |
+
"- Literature/web/RAG citations appear inline as clickable DOI links (e.g., `[https://doi.org/...](https://doi.org/...)`) next to the claim.\n"
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
def pretty_json(x: Any) -> str:
|
| 126 |
+
try:
|
| 127 |
+
return json.dumps(x, indent=2, ensure_ascii=False)
|
| 128 |
+
except Exception:
|
| 129 |
+
return str(x)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# -----------------------------------------------------------------------------
|
| 133 |
+
# Display normalization (MINIMAL): convert bracketed [At] endpoints to [*]
|
| 134 |
+
# -----------------------------------------------------------------------------
|
| 135 |
+
_AT_BRACKET_RE = re.compile(r"\[(at)\]", flags=re.IGNORECASE)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _convert_at_to_star(psmiles: str) -> str:
|
| 139 |
+
"""
|
| 140 |
+
Minimal, display-only conversion:
|
| 141 |
+
- "[At]" / "[AT]" / ... -> "[*]"
|
| 142 |
+
"""
|
| 143 |
+
if not isinstance(psmiles, str) or not psmiles:
|
| 144 |
+
return psmiles
|
| 145 |
+
return _AT_BRACKET_RE.sub("[*]", psmiles)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _normalize_seed_inputs_for_display(obj: Any) -> Any:
|
| 149 |
+
"""
|
| 150 |
+
Recursively normalize ONLY seed/input pSMILES fields for display.
|
| 151 |
+
We do NOT touch generation outputs here to preserve exact tool-returned strings.
|
| 152 |
+
"""
|
| 153 |
+
if isinstance(obj, str):
|
| 154 |
+
if "[" in obj and "]" in obj and ("At" in obj or "AT" in obj or "at" in obj):
|
| 155 |
+
return _convert_at_to_star(obj)
|
| 156 |
+
return obj
|
| 157 |
+
|
| 158 |
+
if isinstance(obj, list):
|
| 159 |
+
return [_normalize_seed_inputs_for_display(x) for x in obj]
|
| 160 |
+
|
| 161 |
+
if isinstance(obj, dict):
|
| 162 |
+
out = {}
|
| 163 |
+
for k, v in obj.items():
|
| 164 |
+
if k in ("psmiles", "seed_psmiles", "seed_psmiles_used", "canonical_psmiles"):
|
| 165 |
+
out[k] = _normalize_seed_inputs_for_display(v)
|
| 166 |
+
else:
|
| 167 |
+
out[k] = _normalize_seed_inputs_for_display(v)
|
| 168 |
+
return out
|
| 169 |
+
|
| 170 |
+
return obj
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# -----------------------------------------------------------------------------
|
| 174 |
+
# Markdown safety: keep polymer endpoint token "[*]" from being rendered as "[]"
|
| 175 |
+
# -----------------------------------------------------------------------------
|
| 176 |
+
# Markdown safety: keep polymer endpoint token "[*]" from being rendered as "[]"
|
| 177 |
+
_ENDPOINT_TOKEN_RE = re.compile(r"\[\*\]")
|
| 178 |
+
|
| 179 |
+
def _escape_endpoint_tokens_for_markdown(text: str) -> str:
|
| 180 |
+
"""
|
| 181 |
+
Escape '[*]' ONLY outside code blocks and inline code.
|
| 182 |
+
This avoids turning '[*]' into '[\\*]' inside ```...``` where the backslash would show.
|
| 183 |
+
"""
|
| 184 |
+
if not isinstance(text, str) or not text:
|
| 185 |
+
return text
|
| 186 |
+
|
| 187 |
+
# Split by fenced code blocks, keep delimiters
|
| 188 |
+
parts = re.split(r"(```[\s\S]*?```)", text)
|
| 189 |
+
out_parts = []
|
| 190 |
+
|
| 191 |
+
for part in parts:
|
| 192 |
+
# If this is a fenced code block, leave untouched
|
| 193 |
+
if part.startswith("```") and part.endswith("```"):
|
| 194 |
+
out_parts.append(part)
|
| 195 |
+
continue
|
| 196 |
+
|
| 197 |
+
# Split by inline code, keep delimiters
|
| 198 |
+
subparts = re.split(r"(`[^`]*`)", part)
|
| 199 |
+
for i, sp in enumerate(subparts):
|
| 200 |
+
if sp.startswith("`") and sp.endswith("`"):
|
| 201 |
+
continue
|
| 202 |
+
subparts[i] = _ENDPOINT_TOKEN_RE.sub(r"[\\*]", sp)
|
| 203 |
+
|
| 204 |
+
out_parts.append("".join(subparts))
|
| 205 |
+
|
| 206 |
+
return "".join(out_parts)
|
| 207 |
+
|
| 208 |
+
# -----------------------------------------------------------------------------
|
| 209 |
+
# NEW: Auto-detect property / target_value / seed from Questions (NO GUI CHANGES)
|
| 210 |
+
# -----------------------------------------------------------------------------
|
| 211 |
+
_NUM_RE = r"[-+]?\d+(?:\.\d+)?"
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _infer_property_from_questions(q: str) -> Optional[str]:
|
| 215 |
+
"""
|
| 216 |
+
Infer canonical property name from free-text questions.
|
| 217 |
+
Canonical keys must match orchestrator's PROPERTY_HEAD_PATHS/GENERATOR_DIRS keys.
|
| 218 |
+
"""
|
| 219 |
+
s = (q or "").lower()
|
| 220 |
+
|
| 221 |
+
# Allow explicit "property:" forms
|
| 222 |
+
m = re.search(r"\bproperty\b\s*[:=]\s*([a-zA-Z _-]+)", s)
|
| 223 |
+
if m:
|
| 224 |
+
cand = m.group(1).strip().lower()
|
| 225 |
+
# map common variants
|
| 226 |
+
if "glass" in cand or re.search(r"\btg\b", cand):
|
| 227 |
+
return "glass transition"
|
| 228 |
+
if "density" in cand or re.search(r"\brho\b", cand):
|
| 229 |
+
return "density"
|
| 230 |
+
if "melting" in cand or re.search(r"\btm\b", cand):
|
| 231 |
+
return "melting"
|
| 232 |
+
if "specific" in cand or re.search(r"\bsv\b", cand):
|
| 233 |
+
return "specific volume"
|
| 234 |
+
if "decomp" in cand or "decomposition" in cand or re.search(r"\btd\b", cand):
|
| 235 |
+
return "thermal decomposition"
|
| 236 |
+
|
| 237 |
+
# Token-based inference
|
| 238 |
+
if "thermal decomposition" in s or "decomposition temperature" in s or "decomposition" in s or re.search(r"\btd\b", s):
|
| 239 |
+
return "thermal decomposition"
|
| 240 |
+
if "specific volume" in s or re.search(r"\bsv\b", s):
|
| 241 |
+
return "specific volume"
|
| 242 |
+
if "glass transition" in s or "glass-transition" in s or re.search(r"\btg\b", s):
|
| 243 |
+
return "glass transition"
|
| 244 |
+
if "melting" in s or "melt temperature" in s or re.search(r"\btm\b", s):
|
| 245 |
+
return "melting"
|
| 246 |
+
if "density" in s or re.search(r"\brho\b", s):
|
| 247 |
+
return "density"
|
| 248 |
+
|
| 249 |
+
return None
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _infer_target_value_from_questions(q: str, prop: Optional[str]) -> Optional[float]:
|
| 253 |
+
"""
|
| 254 |
+
Infer numeric target_value from free-text questions.
|
| 255 |
+
- supports explicit: target_value=..., target: ..., tgt ...
|
| 256 |
+
- supports property-attached: Tg 60, density 1.25, Td=380, sv 0.85, Tm 180
|
| 257 |
+
"""
|
| 258 |
+
sl = (q or "").lower()
|
| 259 |
+
|
| 260 |
+
# Explicit
|
| 261 |
+
m = re.search(rf"\b(target_value|target|tgt)\b\s*[:=]?\s*({_NUM_RE})", sl)
|
| 262 |
+
if m:
|
| 263 |
+
try:
|
| 264 |
+
return float(m.group(2))
|
| 265 |
+
except Exception:
|
| 266 |
+
pass
|
| 267 |
+
|
| 268 |
+
prop = (prop or "").strip().lower()
|
| 269 |
+
prop_patterns: List[str] = []
|
| 270 |
+
|
| 271 |
+
if prop == "glass transition":
|
| 272 |
+
prop_patterns = [rf"\b(tg|glass\s*transition)\b\s*[:=]?\s*({_NUM_RE})"]
|
| 273 |
+
elif prop == "density":
|
| 274 |
+
prop_patterns = [rf"\b(density|rho)\b\s*[:=]?\s*({_NUM_RE})"]
|
| 275 |
+
elif prop == "melting":
|
| 276 |
+
prop_patterns = [rf"\b(tm|melting)\b\s*[:=]?\s*({_NUM_RE})"]
|
| 277 |
+
elif prop == "specific volume":
|
| 278 |
+
prop_patterns = [rf"\b(specific\s*volume|sv)\b\s*[:=]?\s*({_NUM_RE})"]
|
| 279 |
+
elif prop == "thermal decomposition":
|
| 280 |
+
prop_patterns = [rf"\b(td|thermal\s*decomposition|decomposition)\b\s*[:=]?\s*({_NUM_RE})"]
|
| 281 |
+
|
| 282 |
+
for pat in prop_patterns:
|
| 283 |
+
m = re.search(pat, sl)
|
| 284 |
+
if m:
|
| 285 |
+
try:
|
| 286 |
+
return float(m.group(m.lastindex))
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
+
|
| 290 |
+
# Token-near-number fallback: pick first number within 80 chars after property token
|
| 291 |
+
tokens: List[str] = []
|
| 292 |
+
if prop == "glass transition":
|
| 293 |
+
tokens = ["tg", "glass transition"]
|
| 294 |
+
elif prop == "density":
|
| 295 |
+
tokens = ["density", "rho"]
|
| 296 |
+
elif prop == "melting":
|
| 297 |
+
tokens = ["tm", "melting"]
|
| 298 |
+
elif prop == "specific volume":
|
| 299 |
+
tokens = ["specific volume", "sv"]
|
| 300 |
+
elif prop == "thermal decomposition":
|
| 301 |
+
tokens = ["td", "thermal decomposition", "decomposition"]
|
| 302 |
+
|
| 303 |
+
for tok in tokens:
|
| 304 |
+
for mt in re.finditer(re.escape(tok), sl):
|
| 305 |
+
window = sl[mt.end():mt.end() + 80]
|
| 306 |
+
mn = re.search(rf"({_NUM_RE})", window)
|
| 307 |
+
if mn:
|
| 308 |
+
try:
|
| 309 |
+
return float(mn.group(1))
|
| 310 |
+
except Exception:
|
| 311 |
+
pass
|
| 312 |
+
|
| 313 |
+
return None
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def _infer_generate_intent(q: str) -> bool:
|
| 317 |
+
"""
|
| 318 |
+
Decide if the user is asking for inverse design / generation.
|
| 319 |
+
Conservative: only true when generation-ish verbs appear.
|
| 320 |
+
"""
|
| 321 |
+
s = (q or "").lower()
|
| 322 |
+
triggers = [
|
| 323 |
+
"generate",
|
| 324 |
+
"inverse design",
|
| 325 |
+
"inverse-design",
|
| 326 |
+
"design candidates",
|
| 327 |
+
"propose candidates",
|
| 328 |
+
"suggest candidates",
|
| 329 |
+
"design polymer",
|
| 330 |
+
"design polymers",
|
| 331 |
+
"synthesize candidates",
|
| 332 |
+
"optimize",
|
| 333 |
+
]
|
| 334 |
+
return any(t in s for t in triggers)
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def _infer_seed_psmiles_from_questions(q: str) -> Optional[str]:
|
| 338 |
+
"""
|
| 339 |
+
Best-effort extraction of seed pSMILES from the Questions text without GUI changes.
|
| 340 |
+
Supports:
|
| 341 |
+
- seed_psmiles: <token>
|
| 342 |
+
- psmiles=...
|
| 343 |
+
- smiles=...
|
| 344 |
+
- code block containing a single pSMILES/SMILES line
|
| 345 |
+
"""
|
| 346 |
+
text = (q or "").strip()
|
| 347 |
+
if not text:
|
| 348 |
+
return None
|
| 349 |
+
|
| 350 |
+
# 1) Prefer code block content (first non-empty line)
|
| 351 |
+
code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text)
|
| 352 |
+
for block in code_blocks:
|
| 353 |
+
for line in (block or "").splitlines():
|
| 354 |
+
line = line.strip()
|
| 355 |
+
if not line:
|
| 356 |
+
continue
|
| 357 |
+
# Heuristic: polymer pSMILES often includes [*] or [At]
|
| 358 |
+
if "[*]" in line or "[At]" in line or "[AT]" in line or "*" in line or "[" in line:
|
| 359 |
+
return line
|
| 360 |
+
|
| 361 |
+
# 2) Keyed patterns
|
| 362 |
+
m = re.search(r"(seed_psmiles|seed|psmiles|smiles)\s*[:=]\s*([^\s]+)", text, flags=re.IGNORECASE)
|
| 363 |
+
if m:
|
| 364 |
+
return m.group(2).strip()
|
| 365 |
+
|
| 366 |
+
return None
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
# -----------------------------------------------------------------------------
|
| 370 |
+
# Domain normalization: show ROOT domain like nature.com, springer.com, etc.
|
| 371 |
+
# -----------------------------------------------------------------------------
|
| 372 |
+
_SECOND_LEVEL_TLDS = {
|
| 373 |
+
"co.uk",
|
| 374 |
+
"ac.uk",
|
| 375 |
+
"gov.uk",
|
| 376 |
+
"org.uk",
|
| 377 |
+
"co.jp",
|
| 378 |
+
"ne.jp",
|
| 379 |
+
"or.jp",
|
| 380 |
+
"com.au",
|
| 381 |
+
"net.au",
|
| 382 |
+
"org.au",
|
| 383 |
+
"edu.au",
|
| 384 |
+
"co.in",
|
| 385 |
+
"com.br",
|
| 386 |
+
"com.cn",
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _root_domain(netloc: str) -> str:
|
| 391 |
+
netloc = (netloc or "").strip().lower()
|
| 392 |
+
if netloc.startswith("www."):
|
| 393 |
+
netloc = netloc[4:]
|
| 394 |
+
parts = [p for p in netloc.split(".") if p]
|
| 395 |
+
if len(parts) <= 2:
|
| 396 |
+
return netloc
|
| 397 |
+
last2 = ".".join(parts[-2:])
|
| 398 |
+
last3 = ".".join(parts[-3:])
|
| 399 |
+
# handle second-level public suffixes
|
| 400 |
+
if last2 in _SECOND_LEVEL_TLDS and len(parts) >= 3:
|
| 401 |
+
return last3
|
| 402 |
+
if ".".join(parts[-2:]) in _SECOND_LEVEL_TLDS and len(parts) >= 3:
|
| 403 |
+
return last3
|
| 404 |
+
# if suffix looks like co.uk style
|
| 405 |
+
if last2 in _SECOND_LEVEL_TLDS:
|
| 406 |
+
return last3
|
| 407 |
+
if last2.endswith(".uk") and len(parts) >= 3:
|
| 408 |
+
if ".".join(parts[-2:]) in _SECOND_LEVEL_TLDS:
|
| 409 |
+
return last3
|
| 410 |
+
return last2
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def _url_to_domain(url: str) -> Optional[str]:
|
| 414 |
+
if not isinstance(url, str) or not url.strip():
|
| 415 |
+
return None
|
| 416 |
+
try:
|
| 417 |
+
u = url.strip()
|
| 418 |
+
if not (u.startswith("http://") or u.startswith("https://")):
|
| 419 |
+
return None
|
| 420 |
+
netloc = urlparse(u).netloc.strip().lower()
|
| 421 |
+
if not netloc:
|
| 422 |
+
return None
|
| 423 |
+
return _root_domain(netloc)
|
| 424 |
+
except Exception:
|
| 425 |
+
return None
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def _attach_source_domains(obj: Any) -> Any:
|
| 429 |
+
"""
|
| 430 |
+
Recursively add a short source/domain field for RAG + web_search items where URLs are present.
|
| 431 |
+
"""
|
| 432 |
+
if isinstance(obj, list):
|
| 433 |
+
return [_attach_source_domains(x) for x in obj]
|
| 434 |
+
|
| 435 |
+
if isinstance(obj, dict):
|
| 436 |
+
out: Dict[str, Any] = {}
|
| 437 |
+
for k, v in obj.items():
|
| 438 |
+
out[k] = _attach_source_domains(v)
|
| 439 |
+
|
| 440 |
+
for url_key in ("url", "landing_page", "landingPage", "doi_url", "pdf_url", "link", "href"):
|
| 441 |
+
v = out.get(url_key)
|
| 442 |
+
dom = _url_to_domain(v) if isinstance(v, str) else None
|
| 443 |
+
if dom:
|
| 444 |
+
out.setdefault("source_domain", dom)
|
| 445 |
+
break
|
| 446 |
+
|
| 447 |
+
return out
|
| 448 |
+
|
| 449 |
+
return obj
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def _index_citable_sources(report: Dict[str, Any]) -> Dict[str, Any]:
|
| 453 |
+
"""
|
| 454 |
+
Build a compact citation index for web_search + rag retrieval items.
|
| 455 |
+
|
| 456 |
+
Requirement:
|
| 457 |
+
- Tag format is STRICTLY: COMPLETE DOI URL (https://doi.org/...) when DOI exists,
|
| 458 |
+
otherwise the best available http(s) URL.
|
| 459 |
+
- No numbered citations.
|
| 460 |
+
"""
|
| 461 |
+
citation_index: Dict[str, Any] = {"sources": []}
|
| 462 |
+
|
| 463 |
+
def is_citable_item(d: Dict[str, Any]) -> bool:
|
| 464 |
+
if not isinstance(d, dict):
|
| 465 |
+
return False
|
| 466 |
+
for k in ("url", "landing_page", "landingPage", "doi_url", "pdf_url", "link", "href"):
|
| 467 |
+
if isinstance(d.get(k), str) and (d[k].startswith("http://") or d[k].startswith("https://")):
|
| 468 |
+
return True
|
| 469 |
+
if isinstance(d.get("doi"), str) and d["doi"].strip():
|
| 470 |
+
return True
|
| 471 |
+
return False
|
| 472 |
+
|
| 473 |
+
def get_best_url(d: Dict[str, Any]) -> Optional[str]:
|
| 474 |
+
# DOI-first
|
| 475 |
+
doi = normalize_doi(d.get("doi", ""))
|
| 476 |
+
if doi:
|
| 477 |
+
return doi_to_url(doi)
|
| 478 |
+
for k in ("url", "landing_page", "landingPage", "doi_url", "pdf_url", "link", "href"):
|
| 479 |
+
v = d.get(k)
|
| 480 |
+
if isinstance(v, str) and (v.startswith("http://") or v.startswith("https://")):
|
| 481 |
+
return v
|
| 482 |
+
return None
|
| 483 |
+
|
| 484 |
+
def walk_and_tag(node: Any) -> Any:
|
| 485 |
+
if isinstance(node, list):
|
| 486 |
+
return [walk_and_tag(x) for x in node]
|
| 487 |
+
|
| 488 |
+
if isinstance(node, dict):
|
| 489 |
+
out = {k: walk_and_tag(v) for k, v in node.items()}
|
| 490 |
+
|
| 491 |
+
if is_citable_item(out):
|
| 492 |
+
url = get_best_url(out)
|
| 493 |
+
dom = out.get("source_domain") or (_url_to_domain(url) if url else None) or "source"
|
| 494 |
+
tag = url.strip() if isinstance(url, str) and url.strip() else "source"
|
| 495 |
+
# cite_tag must be DOI URL or URL fallback
|
| 496 |
+
cur = out.get("cite_tag")
|
| 497 |
+
if not (isinstance(cur, str) and cur.strip().startswith(("http://", "https://"))):
|
| 498 |
+
out["cite_tag"] = tag
|
| 499 |
+
|
| 500 |
+
citation_index["sources"].append(
|
| 501 |
+
{
|
| 502 |
+
"tag": out.get("cite_tag"),
|
| 503 |
+
"domain": dom,
|
| 504 |
+
"title": out.get("title") or out.get("name") or "Untitled",
|
| 505 |
+
"url": url,
|
| 506 |
+
"doi": out.get("doi"),
|
| 507 |
+
}
|
| 508 |
+
)
|
| 509 |
+
return out
|
| 510 |
+
|
| 511 |
+
return node
|
| 512 |
+
|
| 513 |
+
tagged = walk_and_tag(report)
|
| 514 |
+
if isinstance(tagged, dict):
|
| 515 |
+
tagged["citation_index"] = citation_index
|
| 516 |
+
return tagged
|
| 517 |
+
|
| 518 |
+
report["citation_index"] = citation_index
|
| 519 |
+
return report
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
def ensure_orch(state: Dict[str, Any]) -> Tuple[PolymerOrchestrator, Dict[str, Any]]:
|
| 523 |
+
if state.get("orch") is None:
|
| 524 |
+
cfg = OrchestratorConfig()
|
| 525 |
+
state["orch"] = PolymerOrchestrator(cfg)
|
| 526 |
+
state["ctx"] = {}
|
| 527 |
+
reason = getattr(state["orch"], "_openai_unavailable_reason", None)
|
| 528 |
+
if reason:
|
| 529 |
+
print("[OpenAI diagnostic]", reason)
|
| 530 |
+
if "ctx" not in state:
|
| 531 |
+
state["ctx"] = {}
|
| 532 |
+
return state["orch"], state["ctx"]
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
# -----------------------------------------------------------------------------
|
| 536 |
+
# NEW: extract tool output so the PLAN (execute_plan) drives the final report
|
| 537 |
+
# -----------------------------------------------------------------------------
|
| 538 |
+
def _extract_tool_output(exec_res: Dict[str, Any], tool_name: str) -> Optional[Any]:
|
| 539 |
+
"""
|
| 540 |
+
Best-effort extraction of a tool output from execute_plan() results.
|
| 541 |
+
|
| 542 |
+
Supports a variety of common shapes:
|
| 543 |
+
exec_res["steps"] = [{"tool": "...", "output": {...}}, ...]
|
| 544 |
+
exec_res["steps"] = [{"tool": "...", "result": {...}}, ...]
|
| 545 |
+
exec_res["steps"] = [{"tool": "...", "data": {...}}, ...]
|
| 546 |
+
"""
|
| 547 |
+
if not isinstance(exec_res, dict):
|
| 548 |
+
return None
|
| 549 |
+
steps = exec_res.get("steps")
|
| 550 |
+
if not isinstance(steps, list):
|
| 551 |
+
return None
|
| 552 |
+
|
| 553 |
+
tool_name = (tool_name or "").strip()
|
| 554 |
+
if not tool_name:
|
| 555 |
+
return None
|
| 556 |
+
|
| 557 |
+
for s in steps:
|
| 558 |
+
if not isinstance(s, dict):
|
| 559 |
+
continue
|
| 560 |
+
t = str(s.get("tool") or s.get("name") or "").strip()
|
| 561 |
+
if t != tool_name:
|
| 562 |
+
continue
|
| 563 |
+
for k in ("output", "result", "data", "payload"):
|
| 564 |
+
if k in s:
|
| 565 |
+
return s.get(k)
|
| 566 |
+
# fallback: sometimes the step dict itself is the output
|
| 567 |
+
return s
|
| 568 |
+
|
| 569 |
+
return None
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
def _compose_planner_prompt(
|
| 573 |
+
case_brief: str,
|
| 574 |
+
questions: str,
|
| 575 |
+
property_name: str,
|
| 576 |
+
seed_psmiles: str,
|
| 577 |
+
literature_query: str,
|
| 578 |
+
target_value: Optional[float],
|
| 579 |
+
) -> str:
|
| 580 |
+
"""
|
| 581 |
+
Planner prompt updated to enforce:
|
| 582 |
+
- per-question coverage
|
| 583 |
+
- explicit mapping Qi -> steps
|
| 584 |
+
- report_generation included as a planned step
|
| 585 |
+
"""
|
| 586 |
+
lines = []
|
| 587 |
+
lines.append("### CASE / CONTEXT (POLYMER SYSTEM)")
|
| 588 |
+
if case_brief.strip():
|
| 589 |
+
lines.append(case_brief.strip())
|
| 590 |
+
if seed_psmiles.strip():
|
| 591 |
+
lines.append(f"Seed pSMILES: {seed_psmiles.strip()}")
|
| 592 |
+
if property_name.strip():
|
| 593 |
+
lines.append(f"Primary property of interest: {property_name.strip()}")
|
| 594 |
+
if target_value is not None:
|
| 595 |
+
lines.append(f"Inverse-design target_value (required for generation): {target_value}")
|
| 596 |
+
if literature_query.strip():
|
| 597 |
+
lines.append(f"Literature query hint (optional): {literature_query.strip()}")
|
| 598 |
+
|
| 599 |
+
lines.append("\n### USER QUESTIONS (ANSWER THESE)")
|
| 600 |
+
q = questions.strip()
|
| 601 |
+
if q:
|
| 602 |
+
lines.append(q)
|
| 603 |
+
else:
|
| 604 |
+
lines.append(
|
| 605 |
+
"Q1. Interpret the current formulation and key properties.\n"
|
| 606 |
+
"Q2. Analyze structure–property relationships and root causes.\n"
|
| 607 |
+
"Q3. Propose and (if possible) generate candidate polymers.\n"
|
| 608 |
+
"Q4. Summarize evidence, limitations, and next experiments."
|
| 609 |
+
)
|
| 610 |
+
|
| 611 |
+
lines.append("\n### TOOLING REQUIREMENTS")
|
| 612 |
+
lines.append(
|
| 613 |
+
"- Select from tools: data_extraction, cl_encoding, property_prediction, polymer_generation,\n"
|
| 614 |
+
" rag_retrieval, web_search, report_generation, and PNG-only visual tools.\n"
|
| 615 |
+
"- Plan a small, ordered tool chain (2–10 steps) that answers the USER QUESTIONS.\n"
|
| 616 |
+
"- Ensure property_prediction uses cl_encoding output when possible.\n"
|
| 617 |
+
"- polymer_generation is inverse design and REQUIRES target_value.\n"
|
| 618 |
+
"- Do NOT answer the scientific questions yourself; only plan which tools to run."
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
# Critical: make the plan sensitive to the questions, not a fixed recipe
|
| 622 |
+
lines.append("\n### PLANNING RULES (STRICT)")
|
| 623 |
+
lines.append(
|
| 624 |
+
"- Create an explicit mapping: for each question Qi, list the step numbers that address it.\n"
|
| 625 |
+
"- Every planned step must contribute to at least one Qi.\n"
|
| 626 |
+
"- If a Qi needs literature evidence, include web_search and/or rag_retrieval steps.\n"
|
| 627 |
+
"- Include a final report_generation step that synthesizes tool outputs into answers for each Qi.\n"
|
| 628 |
+
"- If a Qi cannot be answered from tools, plan to state 'not available' for missing numeric values "
|
| 629 |
+
"and provide clearly labeled qualitative expectations where appropriate."
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
return "\n".join(lines)
|
| 633 |
+
|
| 634 |
+
|
| 635 |
+
def _seed_inputs(
|
| 636 |
+
property_name: str,
|
| 637 |
+
seed_psmiles: str,
|
| 638 |
+
literature_query: str,
|
| 639 |
+
target_value: Optional[float],
|
| 640 |
+
questions: str,
|
| 641 |
+
) -> Dict[str, Any]:
|
| 642 |
+
"""
|
| 643 |
+
Provide user_inputs to execute_plan(). Include questions so the orchestrator/tools
|
| 644 |
+
can condition retrieval and synthesis on the actual user ask.
|
| 645 |
+
"""
|
| 646 |
+
payload: Dict[str, Any] = {}
|
| 647 |
+
if property_name.strip():
|
| 648 |
+
payload["property"] = property_name.strip()
|
| 649 |
+
if seed_psmiles.strip():
|
| 650 |
+
payload["psmiles"] = seed_psmiles.strip()
|
| 651 |
+
if literature_query.strip():
|
| 652 |
+
payload["literature_query"] = literature_query.strip()
|
| 653 |
+
payload["query"] = literature_query.strip()
|
| 654 |
+
if target_value is not None:
|
| 655 |
+
payload["target_value"] = float(target_value)
|
| 656 |
+
payload["num_samples"] = int(DEFAULT_NUM_GEN_SAMPLES)
|
| 657 |
+
if isinstance(questions, str) and questions.strip():
|
| 658 |
+
payload["questions"] = questions.strip()
|
| 659 |
+
return payload
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
def _maybe_add_artifacts(
|
| 663 |
+
orch: PolymerOrchestrator,
|
| 664 |
+
report: Dict[str, Any],
|
| 665 |
+
seed_psmiles_fallback: Optional[str] = None,
|
| 666 |
+
property_name_fallback: Optional[str] = None,
|
| 667 |
+
) -> Tuple[List[str], Dict[str, Any]]:
|
| 668 |
+
imgs: List[str] = []
|
| 669 |
+
extras: Dict[str, Any] = {}
|
| 670 |
+
|
| 671 |
+
# Generation grid (unchanged)
|
| 672 |
+
try:
|
| 673 |
+
gen = (report.get("summary", {}) or {}).get("generation", {})
|
| 674 |
+
if isinstance(gen, dict) and gen.get("generated_psmiles"):
|
| 675 |
+
grid = orch._run_gen_grid({}, {"polymer_generation": gen})
|
| 676 |
+
if isinstance(grid, dict) and grid.get("png_path") and Path(grid["png_path"]).exists():
|
| 677 |
+
imgs.append(grid["png_path"])
|
| 678 |
+
extras["gen_grid"] = grid
|
| 679 |
+
except Exception as e:
|
| 680 |
+
extras["gen_grid_error"] = str(e)
|
| 681 |
+
|
| 682 |
+
# Molecule render (seed) (unchanged but you may also want fallback)
|
| 683 |
+
try:
|
| 684 |
+
seed_psmiles = ((report.get("summary", {}) or {}).get("property_prediction", {}) or {}).get("psmiles")
|
| 685 |
+
if not seed_psmiles:
|
| 686 |
+
seed_psmiles = seed_psmiles_fallback
|
| 687 |
+
if seed_psmiles:
|
| 688 |
+
mol_png = orch._run_mol_render({}, {"psmiles": seed_psmiles, "view": "2d"})
|
| 689 |
+
if isinstance(mol_png, dict) and mol_png.get("png_path") and Path(mol_png["png_path"]).exists():
|
| 690 |
+
imgs.append(mol_png["png_path"])
|
| 691 |
+
extras["mol_render"] = mol_png
|
| 692 |
+
except Exception as e:
|
| 693 |
+
extras["mol_render_error"] = str(e)
|
| 694 |
+
|
| 695 |
+
# Explainability heatmap (NOW ALWAYS ATTEMPTED WHEN WE HAVE ANY pSMILES)
|
| 696 |
+
try:
|
| 697 |
+
summary = report.get("summary", {}) or {}
|
| 698 |
+
tool_outputs = report.get("tool_outputs", {}) or {}
|
| 699 |
+
|
| 700 |
+
prop_pred = summary.get("property_prediction", {}) or {}
|
| 701 |
+
data_ex = summary.get("data_extraction", {}) or tool_outputs.get("data_extraction", {}) or {}
|
| 702 |
+
|
| 703 |
+
seed_psmiles = (
|
| 704 |
+
prop_pred.get("psmiles")
|
| 705 |
+
or data_ex.get("canonical_psmiles")
|
| 706 |
+
or seed_psmiles_fallback
|
| 707 |
+
)
|
| 708 |
+
|
| 709 |
+
prop_name = (
|
| 710 |
+
prop_pred.get("property")
|
| 711 |
+
or property_name_fallback
|
| 712 |
+
or DEFAULT_PROPERTY_NAME
|
| 713 |
+
)
|
| 714 |
+
|
| 715 |
+
if seed_psmiles:
|
| 716 |
+
expl_payload = {"psmiles": seed_psmiles, "top_k_atoms": 12, "property": prop_name}
|
| 717 |
+
expl = orch._run_prop_attribution({}, expl_payload)
|
| 718 |
+
if isinstance(expl, dict) and expl.get("png_path") and Path(expl["png_path"]).exists():
|
| 719 |
+
imgs.append(expl["png_path"])
|
| 720 |
+
extras["prop_attribution"] = expl
|
| 721 |
+
else:
|
| 722 |
+
extras["prop_attribution_error"] = expl.get("error") if isinstance(expl, dict) else "unknown"
|
| 723 |
+
else:
|
| 724 |
+
extras["prop_attribution_error"] = "No seed pSMILES available for attribution."
|
| 725 |
+
except Exception as e:
|
| 726 |
+
extras["prop_attribution_error"] = str(e)
|
| 727 |
+
|
| 728 |
+
return imgs, extras
|
| 729 |
+
|
| 730 |
+
def _requested_citation_count(questions: str, default_n: int = 10) -> int:
|
| 731 |
+
"""
|
| 732 |
+
If the user explicitly asks for N citations/papers/sources/references, honor that.
|
| 733 |
+
Otherwise, default to 10.
|
| 734 |
+
"""
|
| 735 |
+
q = (questions or "").lower()
|
| 736 |
+
|
| 737 |
+
patterns = [
|
| 738 |
+
r"(?:at\s+least\s+)?(\d{1,3})\s*(?:citations|citation|papers|paper|sources|source|references|reference)\b",
|
| 739 |
+
r"\bcite\s+(\d{1,3})\s*(?:papers|paper|sources|source|references|reference|citations|citation)\b",
|
| 740 |
+
r"\b(\d{1,3})\s*(?:papers|paper|sources|source|references|reference|citations|citation)\s*(?:minimum|min)\b",
|
| 741 |
+
]
|
| 742 |
+
for pat in patterns:
|
| 743 |
+
m = re.search(pat, q, flags=re.IGNORECASE)
|
| 744 |
+
if m:
|
| 745 |
+
try:
|
| 746 |
+
n = int(m.group(1))
|
| 747 |
+
return max(1, min(n, 200))
|
| 748 |
+
except Exception:
|
| 749 |
+
pass
|
| 750 |
+
return max(1, default_n)
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
def _collect_citations(report: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 754 |
+
"""
|
| 755 |
+
Collect citations from report['citation_index']['sources'] if present; otherwise walk the report.
|
| 756 |
+
Deduplicate by DOI (preferred) or URL.
|
| 757 |
+
"""
|
| 758 |
+
if not isinstance(report, dict):
|
| 759 |
+
return []
|
| 760 |
+
|
| 761 |
+
sources = []
|
| 762 |
+
ci = report.get("citation_index")
|
| 763 |
+
if isinstance(ci, dict) and isinstance(ci.get("sources"), list):
|
| 764 |
+
for s in ci["sources"]:
|
| 765 |
+
if isinstance(s, dict):
|
| 766 |
+
sources.append(s)
|
| 767 |
+
|
| 768 |
+
# fallback walk
|
| 769 |
+
if not sources:
|
| 770 |
+
def walk(node: Any):
|
| 771 |
+
if isinstance(node, dict):
|
| 772 |
+
if "url" in node or "doi" in node:
|
| 773 |
+
doi = normalize_doi(node.get("doi", "")) or ""
|
| 774 |
+
url = None
|
| 775 |
+
if doi:
|
| 776 |
+
url = doi_to_url(doi)
|
| 777 |
+
else:
|
| 778 |
+
url = node.get("url")
|
| 779 |
+
sources.append({
|
| 780 |
+
"domain": node.get("source_domain") or _url_to_domain(node.get("url") or ""),
|
| 781 |
+
"title": node.get("title") or node.get("name") or "Untitled",
|
| 782 |
+
"url": url,
|
| 783 |
+
"doi": doi,
|
| 784 |
+
"tag": url,
|
| 785 |
+
})
|
| 786 |
+
for v in node.values():
|
| 787 |
+
walk(v)
|
| 788 |
+
elif isinstance(node, list):
|
| 789 |
+
for x in node:
|
| 790 |
+
walk(x)
|
| 791 |
+
walk(report)
|
| 792 |
+
|
| 793 |
+
# normalize + dedupe
|
| 794 |
+
dedup: Dict[str, Dict[str, Any]] = {}
|
| 795 |
+
for s in sources:
|
| 796 |
+
if not isinstance(s, dict):
|
| 797 |
+
continue
|
| 798 |
+
url = s.get("url")
|
| 799 |
+
doi = normalize_doi(s.get("doi", "")) or ""
|
| 800 |
+
|
| 801 |
+
# Requirement: label should be COMPLETE DOI URL (preferred) else URL.
|
| 802 |
+
tag = s.get("tag")
|
| 803 |
+
if doi:
|
| 804 |
+
cite_url = doi_to_url(doi)
|
| 805 |
+
elif isinstance(url, str) and url.strip():
|
| 806 |
+
cite_url = url.strip()
|
| 807 |
+
else:
|
| 808 |
+
continue
|
| 809 |
+
|
| 810 |
+
key = None
|
| 811 |
+
if doi:
|
| 812 |
+
key = "doi:" + doi.lower()
|
| 813 |
+
elif isinstance(cite_url, str) and cite_url.strip():
|
| 814 |
+
key = "url:" + cite_url.strip()
|
| 815 |
+
else:
|
| 816 |
+
continue
|
| 817 |
+
|
| 818 |
+
title = s.get("title") or "Untitled"
|
| 819 |
+
|
| 820 |
+
dedup[key] = {
|
| 821 |
+
# Keep key name "domain" for UI compatibility, but it now holds the DOI URL / URL text requirement.
|
| 822 |
+
"domain": cite_url,
|
| 823 |
+
"title": title,
|
| 824 |
+
"url": cite_url,
|
| 825 |
+
"doi": doi,
|
| 826 |
+
"tag": cite_url if isinstance(cite_url, str) else tag,
|
| 827 |
+
}
|
| 828 |
+
|
| 829 |
+
# stable-ish ordering: prefer items that have a URL and non-generic domain
|
| 830 |
+
def _rank(x: Dict[str, Any]) -> Tuple[int, int, str]:
|
| 831 |
+
dom = (x.get("domain") or "").lower()
|
| 832 |
+
url = x.get("url") or ""
|
| 833 |
+
generic = int(dom in ("source", "doi.org"))
|
| 834 |
+
has_url = 0 if (isinstance(url, str) and url.startswith("http")) else 1
|
| 835 |
+
return (generic, has_url, dom)
|
| 836 |
+
|
| 837 |
+
out = list(dedup.values())
|
| 838 |
+
out.sort(key=_rank)
|
| 839 |
+
return out
|
| 840 |
+
|
| 841 |
+
|
| 842 |
+
def _build_sources_section(citations: List[Dict[str, Any]], n_needed: int) -> str:
|
| 843 |
+
"""
|
| 844 |
+
Deterministic clickable source list.
|
| 845 |
+
|
| 846 |
+
Requirement:
|
| 847 |
+
- link text must be the COMPLETE DOI URL (preferred) else URL.
|
| 848 |
+
Bullet format:
|
| 849 |
+
- [https://doi.org/...](https://doi.org/...) — Title
|
| 850 |
+
"""
|
| 851 |
+
if n_needed < 1:
|
| 852 |
+
n_needed = 1
|
| 853 |
+
|
| 854 |
+
picked: List[Dict[str, Any]] = []
|
| 855 |
+
seen_urls: set = set()
|
| 856 |
+
for c in citations:
|
| 857 |
+
url = c.get("url")
|
| 858 |
+
if not isinstance(url, str) or not url.startswith("http"):
|
| 859 |
+
continue
|
| 860 |
+
if url in seen_urls:
|
| 861 |
+
continue
|
| 862 |
+
seen_urls.add(url)
|
| 863 |
+
picked.append(c)
|
| 864 |
+
if len(picked) >= n_needed:
|
| 865 |
+
break
|
| 866 |
+
|
| 867 |
+
lines = []
|
| 868 |
+
lines.append("\n\n---\n\n### Sources (clickable)\n")
|
| 869 |
+
if not picked:
|
| 870 |
+
lines.append("_No citable web/RAG sources were available in the report output._\n")
|
| 871 |
+
return "".join(lines)
|
| 872 |
+
|
| 873 |
+
if len(picked) < n_needed:
|
| 874 |
+
lines.append(f"_Only {len(picked)} unique sources were available; target was {n_needed}._\n\n")
|
| 875 |
+
|
| 876 |
+
for c in picked:
|
| 877 |
+
cite_text = (c.get("domain") or c.get("url") or "source").strip()
|
| 878 |
+
url = c.get("url")
|
| 879 |
+
title = (c.get("title") or "Untitled").strip()
|
| 880 |
+
lines.append(f"- [{cite_text}]({url}) — {title}\n")
|
| 881 |
+
|
| 882 |
+
return "".join(lines)
|
| 883 |
+
|
| 884 |
+
|
| 885 |
+
def _augment_questions_for_grounding(questions: str, n_citations: int) -> str:
|
| 886 |
+
"""
|
| 887 |
+
Updated grounding constraints:
|
| 888 |
+
- Tool citations MUST be [T] only.
|
| 889 |
+
- Paper citations MUST be clickable hyperlinks whose link text is the COMPLETE DOI URL (preferred).
|
| 890 |
+
- Ensure at least n_citations unique citations unless user asked otherwise.
|
| 891 |
+
- Do not repeat the same DOI/URL more than once.
|
| 892 |
+
"""
|
| 893 |
+
constraints = (
|
| 894 |
+
"\n\nCONSTRAINTS FOR THE ANSWER:\n"
|
| 895 |
+
"- Do NOT manufacture DOIs or sources. Use only URLs/DOIs present in the provided report.\n"
|
| 896 |
+
"- Tool-derived facts: cite inline using [T] (exactly; do NOT use [T1], [T2], etc.).\n"
|
| 897 |
+
"- Literature/web/RAG citations: cite as clickable hyperlinks where the bracket text is the COMPLETE DOI URL "
|
| 898 |
+
"(https://doi.org/...) when DOI is available; otherwise use the best available URL.\n"
|
| 899 |
+
"- Do NOT use numbered bracket citations like [1], [2].\n"
|
| 900 |
+
"- You are FORBIDDEN from adding a separate references list/section (e.g., 'References', 'Sources').\n"
|
| 901 |
+
"- All literature citations must be inline hyperlinks: [https://doi.org/...](https://doi.org/...) placed immediately after the claim.\n"
|
| 902 |
+
"- Distribute citations across the answer (do not cluster them in one place).\n"
|
| 903 |
+
"- NON-DUPLICATES: Do not repeat the same paper link. Each DOI/URL may appear at most once in the entire answer.\n"
|
| 904 |
+
"- Each major section should include at least 1 inline literature citation when relevant.\n"
|
| 905 |
+
"- Numeric values: only use numeric values that appear in tool outputs; otherwise state 'not available'.\n"
|
| 906 |
+
"- Qualitative expectations are allowed when numeric outputs are not available; label them clearly as qualitative.\n"
|
| 907 |
+
"- When presenting polymer_generation outputs (e.g., generated_psmiles), reproduce them verbatim exactly as returned.\n"
|
| 908 |
+
"- Polymer endpoint tokens: preserve attachment-point placeholders exactly as '[*]' in any pSMILES/SMILES shown.\n"
|
| 909 |
+
" Do NOT drop the '*' or render it as empty brackets '[]'.\n"
|
| 910 |
+
f"- Citation minimum: include at least {int(n_citations)} NON-DUPLICATE literature citations (unique by URL/DOI), "
|
| 911 |
+
"unless the user explicitly requested a different number.\n"
|
| 912 |
+
)
|
| 913 |
+
q = (questions or "").rstrip()
|
| 914 |
+
return q + constraints
|
| 915 |
+
|
| 916 |
+
|
| 917 |
+
def _assign_tool_tags(plan: Dict[str, Any], exec_res: Dict[str, Any], report: Dict[str, Any]) -> None:
|
| 918 |
+
"""
|
| 919 |
+
Tool tags are ALWAYS [T] (single tag only).
|
| 920 |
+
"""
|
| 921 |
+
try:
|
| 922 |
+
steps_executed = (exec_res or {}).get("steps", []) or []
|
| 923 |
+
for s in steps_executed:
|
| 924 |
+
if isinstance(s, dict):
|
| 925 |
+
s["cite_tag"] = "[T]"
|
| 926 |
+
except Exception:
|
| 927 |
+
pass
|
| 928 |
+
|
| 929 |
+
try:
|
| 930 |
+
summary = report.get("summary", {}) if isinstance(report, dict) else {}
|
| 931 |
+
if isinstance(summary, dict):
|
| 932 |
+
for k, v in list(summary.items()):
|
| 933 |
+
if isinstance(v, dict):
|
| 934 |
+
v["cite_tag"] = "[T]"
|
| 935 |
+
except Exception:
|
| 936 |
+
pass
|
| 937 |
+
|
| 938 |
+
try:
|
| 939 |
+
tool_outputs = report.get("tool_outputs", {}) if isinstance(report, dict) else {}
|
| 940 |
+
if isinstance(tool_outputs, dict):
|
| 941 |
+
for _, v in tool_outputs.items():
|
| 942 |
+
if isinstance(v, dict):
|
| 943 |
+
v["cite_tag"] = "[T]"
|
| 944 |
+
except Exception:
|
| 945 |
+
pass
|
| 946 |
+
|
| 947 |
+
|
| 948 |
+
# -----------------------------------------------------------------------------
|
| 949 |
+
# PolyAgent Console: corrected run (plan drives tools; report comes from execute_plan)
|
| 950 |
+
# -----------------------------------------------------------------------------
|
| 951 |
+
def run_agent(state: Dict[str, Any], questions: str) -> Tuple[str, List[str]]:
|
| 952 |
+
orch, ctx = ensure_orch(state)
|
| 953 |
+
|
| 954 |
+
# ---------- AUTO-DETECTION (NO GUI CHANGES) ----------
|
| 955 |
+
qtxt = questions or ""
|
| 956 |
+
|
| 957 |
+
inferred_prop = _infer_property_from_questions(qtxt) or DEFAULT_PROPERTY_NAME
|
| 958 |
+
|
| 959 |
+
inferred_seed = _infer_seed_psmiles_from_questions(qtxt)
|
| 960 |
+
seed_psmiles = _convert_at_to_star(inferred_seed) if inferred_seed else _convert_at_to_star(DEFAULT_SEED_PSMILES)
|
| 961 |
+
|
| 962 |
+
want_generation = _infer_generate_intent(qtxt)
|
| 963 |
+
|
| 964 |
+
inferred_target = _infer_target_value_from_questions(qtxt, inferred_prop)
|
| 965 |
+
|
| 966 |
+
# Only default a target when the user appears to want generation but omitted an explicit value
|
| 967 |
+
if inferred_target is None and want_generation:
|
| 968 |
+
inferred_target = float(DEFAULT_TARGET_BY_PROPERTY.get(inferred_prop, DEFAULT_TARGET_VALUE))
|
| 969 |
+
|
| 970 |
+
target_value: Optional[float] = float(inferred_target) if inferred_target is not None else None
|
| 971 |
+
|
| 972 |
+
# Literature query: keep your existing behavior (fallback to default unless questions long enough)
|
| 973 |
+
literature_query_default = DEFAULT_LITERATURE_QUERY
|
| 974 |
+
case_brief = DEFAULT_CASE_BRIEF
|
| 975 |
+
property_name = inferred_prop
|
| 976 |
+
|
| 977 |
+
# Planner prompt
|
| 978 |
+
planner_prompt = _compose_planner_prompt(
|
| 979 |
+
case_brief=case_brief,
|
| 980 |
+
questions=qtxt,
|
| 981 |
+
property_name=property_name,
|
| 982 |
+
seed_psmiles=seed_psmiles,
|
| 983 |
+
literature_query=literature_query_default,
|
| 984 |
+
target_value=target_value,
|
| 985 |
+
)
|
| 986 |
+
plan = orch.analyze_query(planner_prompt)
|
| 987 |
+
ctx["last_plan"] = plan
|
| 988 |
+
|
| 989 |
+
# Execute plan with inferred inputs
|
| 990 |
+
exec_inputs = _seed_inputs(
|
| 991 |
+
property_name=property_name,
|
| 992 |
+
seed_psmiles=seed_psmiles,
|
| 993 |
+
literature_query=literature_query_default,
|
| 994 |
+
target_value=target_value,
|
| 995 |
+
questions=qtxt,
|
| 996 |
+
)
|
| 997 |
+
exec_res = orch.execute_plan(plan, user_inputs=exec_inputs)
|
| 998 |
+
ctx["last_exec"] = exec_res
|
| 999 |
+
|
| 1000 |
+
# IMPORTANT: Prefer report_generation output from execute_plan (plan-driven)
|
| 1001 |
+
report = _extract_tool_output(exec_res, "report_generation")
|
| 1002 |
+
|
| 1003 |
+
# Fallback if orchestrator didn't include report_generation in the executed plan
|
| 1004 |
+
if report is None:
|
| 1005 |
+
qhint = (qtxt or "").strip()
|
| 1006 |
+
if len(qhint) >= 20:
|
| 1007 |
+
lit_query = qhint
|
| 1008 |
+
else:
|
| 1009 |
+
lit_query = literature_query_default
|
| 1010 |
+
|
| 1011 |
+
rep_inputs: Dict[str, Any] = {
|
| 1012 |
+
"questions": qtxt,
|
| 1013 |
+
"literature_query": lit_query,
|
| 1014 |
+
"query": lit_query,
|
| 1015 |
+
"psmiles": seed_psmiles,
|
| 1016 |
+
"property": property_name,
|
| 1017 |
+
"rows": int(DEFAULT_SEARCH_ROWS),
|
| 1018 |
+
"fetch_top_n": int(DEFAULT_FETCH_TOP_N),
|
| 1019 |
+
"fetch_top_n_arxiv": 1,
|
| 1020 |
+
"num_samples": int(DEFAULT_NUM_GEN_SAMPLES),
|
| 1021 |
+
}
|
| 1022 |
+
|
| 1023 |
+
# Only request generation if we have a target_value (or generation intent + fallback target above)
|
| 1024 |
+
if target_value is not None:
|
| 1025 |
+
rep_inputs["generate"] = True
|
| 1026 |
+
rep_inputs["target_value"] = float(target_value)
|
| 1027 |
+
|
| 1028 |
+
report = orch.generate_report(rep_inputs)
|
| 1029 |
+
|
| 1030 |
+
if not isinstance(report, dict):
|
| 1031 |
+
report = {"summary": {"report_generation": {"text": str(report)}}}
|
| 1032 |
+
|
| 1033 |
+
# Attach domains/citations; do NOT normalize generation outputs here
|
| 1034 |
+
report = _attach_source_domains(report)
|
| 1035 |
+
report = _index_citable_sources(report)
|
| 1036 |
+
|
| 1037 |
+
# Tool tags: ALWAYS [T]
|
| 1038 |
+
_assign_tool_tags(plan=plan, exec_res=exec_res, report=report)
|
| 1039 |
+
|
| 1040 |
+
# Normalize seed-related pSMILES for display only
|
| 1041 |
+
report = _normalize_seed_inputs_for_display(report)
|
| 1042 |
+
ctx["last_report"] = report
|
| 1043 |
+
|
| 1044 |
+
# Artifacts
|
| 1045 |
+
imgs, extras = _maybe_add_artifacts(
|
| 1046 |
+
orch,
|
| 1047 |
+
report,
|
| 1048 |
+
seed_psmiles_fallback=seed_psmiles,
|
| 1049 |
+
property_name_fallback=property_name,
|
| 1050 |
+
)
|
| 1051 |
+
ctx.update(extras)
|
| 1052 |
+
|
| 1053 |
+
# Decide required citation count (default 10 unless user asked otherwise)
|
| 1054 |
+
n_citations = _requested_citation_count(qtxt, default_n=10)
|
| 1055 |
+
ctx["required_citations"] = n_citations
|
| 1056 |
+
|
| 1057 |
+
# Collect citations deterministically for an explicit clickable list
|
| 1058 |
+
citations = _collect_citations(report)
|
| 1059 |
+
ctx["citations_collected"] = len(citations)
|
| 1060 |
+
|
| 1061 |
+
# Compose final answer with strict constraints
|
| 1062 |
+
guarded_questions = _augment_questions_for_grounding(qtxt, n_citations=n_citations)
|
| 1063 |
+
final_md, composer_imgs = orch.compose_gpt_style_answer(
|
| 1064 |
+
report,
|
| 1065 |
+
case_brief=case_brief,
|
| 1066 |
+
questions=guarded_questions,
|
| 1067 |
+
)
|
| 1068 |
+
|
| 1069 |
+
final_md = _escape_endpoint_tokens_for_markdown(final_md)
|
| 1070 |
+
|
| 1071 |
+
# Append deterministic source list to GUARANTEE explicit clickable citations
|
| 1072 |
+
# final_md = final_md.rstrip() + _build_sources_section(citations, n_needed=n_citations)
|
| 1073 |
+
|
| 1074 |
+
for p in composer_imgs:
|
| 1075 |
+
if p not in imgs and Path(p).exists():
|
| 1076 |
+
imgs.append(p)
|
| 1077 |
+
|
| 1078 |
+
return final_md, imgs
|
| 1079 |
+
|
| 1080 |
+
|
| 1081 |
+
# ----------------------------- Advanced Tools (optional tab) ----------------------------- #
|
| 1082 |
+
def tool_data_extraction(state: Dict[str, Any], psmiles: str) -> Tuple[str, List[str]]:
|
| 1083 |
+
orch, ctx = ensure_orch(state)
|
| 1084 |
+
psmiles = _convert_at_to_star(psmiles)
|
| 1085 |
+
out = orch._run_data_extraction({"step": 1}, {"psmiles": psmiles})
|
| 1086 |
+
ctx["data_extraction"] = out
|
| 1087 |
+
images: List[str] = []
|
| 1088 |
+
|
| 1089 |
+
if isinstance(out, dict) and out.get("canonical_psmiles"):
|
| 1090 |
+
mimg = orch._run_mol_render({}, {"psmiles": out["canonical_psmiles"], "view": "2d"})
|
| 1091 |
+
if isinstance(mimg, dict) and mimg.get("png_path") and Path(mimg["png_path"]).exists():
|
| 1092 |
+
images.append(mimg["png_path"])
|
| 1093 |
+
|
| 1094 |
+
expl = orch._run_prop_attribution({}, {"psmiles": out["canonical_psmiles"], "top_k_atoms": 12})
|
| 1095 |
+
if isinstance(expl, dict) and expl.get("png_path") and Path(expl["png_path"]).exists():
|
| 1096 |
+
images.append(expl["png_path"])
|
| 1097 |
+
|
| 1098 |
+
return pretty_json(out), images
|
| 1099 |
+
|
| 1100 |
+
|
| 1101 |
+
def tool_property_prediction(state: Dict[str, Any], property_name: str, psmiles: Optional[str]) -> str:
|
| 1102 |
+
orch, ctx = ensure_orch(state)
|
| 1103 |
+
payload: Dict[str, Any] = {"property": property_name}
|
| 1104 |
+
if psmiles:
|
| 1105 |
+
payload["psmiles"] = _convert_at_to_star(psmiles)
|
| 1106 |
+
if ctx.get("data_extraction"):
|
| 1107 |
+
payload["data_extraction"] = ctx["data_extraction"]
|
| 1108 |
+
if ctx.get("cl_encoding"):
|
| 1109 |
+
payload["cl_encoding"] = ctx["cl_encoding"]
|
| 1110 |
+
out = orch._run_property_prediction({"step": 3}, payload)
|
| 1111 |
+
ctx["property_prediction"] = out
|
| 1112 |
+
return pretty_json(out)
|
| 1113 |
+
|
| 1114 |
+
|
| 1115 |
+
def tool_polymer_generation(
|
| 1116 |
+
state: Dict[str, Any], property_name: str, target_value: float, num_samples: int
|
| 1117 |
+
) -> Tuple[str, List[str]]:
|
| 1118 |
+
orch, ctx = ensure_orch(state)
|
| 1119 |
+
payload: Dict[str, Any] = {
|
| 1120 |
+
"property": property_name,
|
| 1121 |
+
"target_value": float(target_value),
|
| 1122 |
+
"num_samples": int(num_samples),
|
| 1123 |
+
}
|
| 1124 |
+
out = orch._run_polymer_generation({"step": 4}, payload)
|
| 1125 |
+
ctx["polymer_generation"] = out
|
| 1126 |
+
|
| 1127 |
+
images: List[str] = []
|
| 1128 |
+
try:
|
| 1129 |
+
grid = orch._run_gen_grid({}, {"polymer_generation": out})
|
| 1130 |
+
if isinstance(grid, dict) and grid.get("png_path") and Path(grid["png_path"]).exists():
|
| 1131 |
+
images.append(grid["png_path"])
|
| 1132 |
+
except Exception:
|
| 1133 |
+
pass
|
| 1134 |
+
|
| 1135 |
+
return pretty_json(out), images
|
| 1136 |
+
|
| 1137 |
+
|
| 1138 |
+
def tool_web_search(state: Dict[str, Any], source: str, query: str, rows: int) -> Tuple[str, List[str]]:
|
| 1139 |
+
orch, ctx = ensure_orch(state)
|
| 1140 |
+
out = orch._run_web_search({"step": 5}, {"source": source, "query": query, "rows": rows})
|
| 1141 |
+
out = _attach_source_domains(out)
|
| 1142 |
+
out = _index_citable_sources(out) if isinstance(out, dict) else out
|
| 1143 |
+
ctx.setdefault("web_search", {})[source] = out
|
| 1144 |
+
return pretty_json(out), []
|
| 1145 |
+
|
| 1146 |
+
|
| 1147 |
+
def tool_rag_retrieval(state: Dict[str, Any], query: str) -> str:
|
| 1148 |
+
orch, ctx = ensure_orch(state)
|
| 1149 |
+
out = orch._run_rag_retrieval({"step": 7}, {"query": query})
|
| 1150 |
+
out = _attach_source_domains(out)
|
| 1151 |
+
out = _index_citable_sources(out) if isinstance(out, dict) else out
|
| 1152 |
+
ctx["rag_retrieval"] = out
|
| 1153 |
+
return pretty_json(out)
|
| 1154 |
+
|
| 1155 |
+
|
| 1156 |
+
def tool_explainability(state: Dict[str, Any], psmiles: str, property_name: str) -> Tuple[str, List[str]]:
|
| 1157 |
+
orch, ctx = ensure_orch(state)
|
| 1158 |
+
psmiles = _convert_at_to_star(psmiles)
|
| 1159 |
+
payload: Dict[str, Any] = {"psmiles": psmiles, "top_k_atoms": 12}
|
| 1160 |
+
if property_name:
|
| 1161 |
+
payload["property"] = property_name
|
| 1162 |
+
out = orch._run_prop_attribution({"step": 8}, payload)
|
| 1163 |
+
images: List[str] = []
|
| 1164 |
+
if isinstance(out, dict) and out.get("png_path") and Path(out["png_path"]).exists():
|
| 1165 |
+
images.append(out["png_path"])
|
| 1166 |
+
return pretty_json(out), images
|
| 1167 |
+
|
| 1168 |
+
|
| 1169 |
+
def tool_openai_probe(state: Dict[str, Any]) -> str:
|
| 1170 |
+
orch, _ = ensure_orch(state)
|
| 1171 |
+
if getattr(orch, "openai_client", None) is None or orch.openai_client is None:
|
| 1172 |
+
return pretty_json({"ok": False, "reason": getattr(orch, "_openai_unavailable_reason", "OpenAI client not available")})
|
| 1173 |
+
|
| 1174 |
+
try:
|
| 1175 |
+
resp = orch.openai_client.chat.completions.create(
|
| 1176 |
+
model=orch.config.model,
|
| 1177 |
+
messages=[
|
| 1178 |
+
{"role": "system", "content": 'Return a tiny JSON object {"ok":true} and nothing else.'},
|
| 1179 |
+
{"role": "user", "content": "ping"},
|
| 1180 |
+
],
|
| 1181 |
+
response_format={"type": "json_object"},
|
| 1182 |
+
)
|
| 1183 |
+
return resp.choices[0].message.content
|
| 1184 |
+
except Exception as e:
|
| 1185 |
+
return pretty_json({"ok": False, "error": str(e)})
|
| 1186 |
+
|
| 1187 |
+
|
| 1188 |
+
# ----------------------------- GPT-only ----------------------------- #
|
| 1189 |
+
def gpt_only_answer(state: Dict[str, Any], prompt: str) -> str:
|
| 1190 |
+
"""
|
| 1191 |
+
Pure GPT-only responses. This function will not call orchestrator tools or perform web search.
|
| 1192 |
+
"""
|
| 1193 |
+
orch, _ = ensure_orch(state)
|
| 1194 |
+
if getattr(orch, "openai_client", None) is None or orch.openai_client is None:
|
| 1195 |
+
return pretty_json({"ok": False, "reason": getattr(orch, "_openai_unavailable_reason", "OpenAI client not available")})
|
| 1196 |
+
|
| 1197 |
+
p = (prompt or "").strip()
|
| 1198 |
+
if not p:
|
| 1199 |
+
return "Please provide a prompt."
|
| 1200 |
+
|
| 1201 |
+
try:
|
| 1202 |
+
resp = orch.openai_client.chat.completions.create(
|
| 1203 |
+
model=orch.config.model,
|
| 1204 |
+
messages=[
|
| 1205 |
+
{
|
| 1206 |
+
"role": "system",
|
| 1207 |
+
"content": (
|
| 1208 |
+
"You are a polymer R&D assistant. Answer directly and clearly. "
|
| 1209 |
+
"Do not call tools or run web searches. If you are uncertain, state uncertainty."
|
| 1210 |
+
),
|
| 1211 |
+
},
|
| 1212 |
+
{"role": "user", "content": p},
|
| 1213 |
+
],
|
| 1214 |
+
)
|
| 1215 |
+
return resp.choices[0].message.content or ""
|
| 1216 |
+
except Exception as e:
|
| 1217 |
+
return pretty_json({"ok": False, "error": str(e)})
|
| 1218 |
+
|
| 1219 |
+
|
| 1220 |
+
# ----------------------------- Other LLMs (Hugging Face Inference) ----------------------------- #
|
| 1221 |
+
def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
| 1222 |
+
"""
|
| 1223 |
+
LLM-only responses using Hugging Face Inference API for non-GPT models.
|
| 1224 |
+
"""
|
| 1225 |
+
ensure_orch(state)
|
| 1226 |
+
|
| 1227 |
+
import os
|
| 1228 |
+
from huggingface_hub import InferenceClient
|
| 1229 |
+
|
| 1230 |
+
HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip()
|
| 1231 |
+
if not HF_TOKEN:
|
| 1232 |
+
return pretty_json({"ok": False, "error": "HF_TOKEN is not set. Add HF_TOKEN=hf_... to your .env or env vars."})
|
| 1233 |
+
|
| 1234 |
+
HF_MODEL_MAP = {
|
| 1235 |
+
"mixtral-8x22b-instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 1236 |
+
"llama-3.1-8b-instruct": "meta-llama/Llama-3.1-8B-Instruct",
|
| 1237 |
+
}
|
| 1238 |
+
|
| 1239 |
+
m = (model_name or "").strip()
|
| 1240 |
+
p = (prompt or "").strip()
|
| 1241 |
+
if not p:
|
| 1242 |
+
return "Please provide a prompt."
|
| 1243 |
+
if not m:
|
| 1244 |
+
return "Please select a model."
|
| 1245 |
+
|
| 1246 |
+
model_id = HF_MODEL_MAP.get(m)
|
| 1247 |
+
if not model_id:
|
| 1248 |
+
return pretty_json({"ok": False, "error": f"Unsupported model selection: {m}", "supported": list(HF_MODEL_MAP.keys())})
|
| 1249 |
+
|
| 1250 |
+
client = InferenceClient(model=model_id, token=HF_TOKEN)
|
| 1251 |
+
|
| 1252 |
+
try:
|
| 1253 |
+
resp = client.chat_completion(
|
| 1254 |
+
messages=[
|
| 1255 |
+
{
|
| 1256 |
+
"role": "system",
|
| 1257 |
+
"content": (
|
| 1258 |
+
"You are a polymer R&D assistant. Answer directly and clearly. "
|
| 1259 |
+
"Do not call tools or run web searches. If you are uncertain, state uncertainty."
|
| 1260 |
+
),
|
| 1261 |
+
},
|
| 1262 |
+
{"role": "user", "content": p},
|
| 1263 |
+
],
|
| 1264 |
+
max_tokens=900,
|
| 1265 |
+
temperature=0.7,
|
| 1266 |
+
)
|
| 1267 |
+
return resp.choices[0].message.content or ""
|
| 1268 |
+
except Exception as e:
|
| 1269 |
+
return pretty_json({"ok": False, "error": str(e), "model_id": model_id})
|
| 1270 |
+
|
| 1271 |
+
|
| 1272 |
+
def build_ui() -> gr.Blocks:
|
| 1273 |
+
with gr.Blocks(
|
| 1274 |
+
css="""
|
| 1275 |
+
.mono {font-family: ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,'Liberation Mono','Courier New',monospace}
|
| 1276 |
+
.info-bubble {
|
| 1277 |
+
border: 1px solid rgba(15, 23, 42, 0.18);
|
| 1278 |
+
background: rgba(15, 23, 42, 0.04);
|
| 1279 |
+
border-radius: 18px;
|
| 1280 |
+
padding: 16px 18px;
|
| 1281 |
+
margin: 10px 0 14px 0;
|
| 1282 |
+
}
|
| 1283 |
+
"""
|
| 1284 |
+
) as demo:
|
| 1285 |
+
state = gr.State({})
|
| 1286 |
+
|
| 1287 |
+
gr.Markdown("## PolyAgent\n")
|
| 1288 |
+
|
| 1289 |
+
# Big bubble shown on load and retained (no dismiss / no state gating).
|
| 1290 |
+
gr.Markdown(RUN_INSTRUCTIONS_MD, elem_classes=["info-bubble"])
|
| 1291 |
+
|
| 1292 |
+
with gr.Tabs():
|
| 1293 |
+
with gr.Tab("PolyAgent Console"):
|
| 1294 |
+
with gr.Row():
|
| 1295 |
+
with gr.Column(scale=1):
|
| 1296 |
+
gr.Markdown("### Questions")
|
| 1297 |
+
questions = gr.Textbox(
|
| 1298 |
+
label="Ask your questions",
|
| 1299 |
+
lines=16,
|
| 1300 |
+
placeholder=(
|
| 1301 |
+
"Example:\n"
|
| 1302 |
+
"1) For high-barrier flexible packaging films, what polymer design strategies improve OTR/WVTR?\n"
|
| 1303 |
+
"2) What recent (2015–2025) literature supports these strategies? (cite 10 papers)\n"
|
| 1304 |
+
"3) Suggest candidate polyester families and practical next experiments.\n"
|
| 1305 |
+
),
|
| 1306 |
+
)
|
| 1307 |
+
btn_run = gr.Button("Run PolyAgent", variant="primary")
|
| 1308 |
+
|
| 1309 |
+
with gr.Column(scale=1):
|
| 1310 |
+
gr.Markdown("### PolyAgent Answer")
|
| 1311 |
+
final_answer = gr.Markdown("PolyAgent will respond here with a single structured answer.")
|
| 1312 |
+
gr.Markdown("### PNG Artifacts (Molecule, Grid, Explainability)")
|
| 1313 |
+
ev_imgs = gr.Gallery(label="", columns=3, height=260)
|
| 1314 |
+
|
| 1315 |
+
btn_run.click(
|
| 1316 |
+
fn=run_agent,
|
| 1317 |
+
inputs=[state, questions],
|
| 1318 |
+
outputs=[final_answer, ev_imgs],
|
| 1319 |
+
)
|
| 1320 |
+
|
| 1321 |
+
with gr.Tab("Tools"):
|
| 1322 |
+
gr.Markdown("Run individual tools for debugging/ad-hoc usage. Visuals are PNG-only.")
|
| 1323 |
+
|
| 1324 |
+
with gr.Accordion("Data Extraction", open=True):
|
| 1325 |
+
psm_in = gr.Textbox(label="pSMILES")
|
| 1326 |
+
btn_ex = gr.Button("Extract", variant="primary")
|
| 1327 |
+
ex_json = gr.Code(label="Output", language="json", elem_classes=["mono"])
|
| 1328 |
+
ex_imgs = gr.Gallery(label="PNG (molecule + explainability)", columns=3, height=220)
|
| 1329 |
+
btn_ex.click(tool_data_extraction, [state, psm_in], [ex_json, ex_imgs])
|
| 1330 |
+
|
| 1331 |
+
with gr.Accordion("Property Prediction", open=False):
|
| 1332 |
+
prop = gr.Dropdown(
|
| 1333 |
+
label="Property",
|
| 1334 |
+
choices=["density", "glass transition", "melting", "specific volume", "thermal decomposition"],
|
| 1335 |
+
value="glass transition",
|
| 1336 |
+
)
|
| 1337 |
+
psm_pred = gr.Textbox(label="Optional pSMILES (if not using previous extraction)")
|
| 1338 |
+
btn_pred = gr.Button("Predict", variant="primary")
|
| 1339 |
+
pred_json = gr.Code(label="Output", language="json", elem_classes=["mono"])
|
| 1340 |
+
btn_pred.click(tool_property_prediction, [state, prop, psm_pred], [pred_json])
|
| 1341 |
+
|
| 1342 |
+
with gr.Accordion("Polymer Generation (inverse design)", open=False):
|
| 1343 |
+
prop_g = gr.Dropdown(
|
| 1344 |
+
label="Property (select generator)",
|
| 1345 |
+
choices=["density", "glass transition", "melting", "specific volume", "thermal decomposition"],
|
| 1346 |
+
value="glass transition",
|
| 1347 |
+
)
|
| 1348 |
+
tgt = gr.Number(label="target_value (required)", value=60.0, precision=4)
|
| 1349 |
+
ns = gr.Slider(1, 24, value=4, step=1, label="# Samples")
|
| 1350 |
+
btn_gen = gr.Button("Generate", variant="primary")
|
| 1351 |
+
gen_json = gr.Code(label="Output", language="json", elem_classes=["mono"])
|
| 1352 |
+
gen_imgs = gr.Gallery(label="PNG (generation grid)", columns=3, height=220)
|
| 1353 |
+
btn_gen.click(tool_polymer_generation, [state, prop_g, tgt, ns], [gen_json, gen_imgs])
|
| 1354 |
+
|
| 1355 |
+
with gr.Accordion("Web / RAG", open=False):
|
| 1356 |
+
src = gr.Dropdown(
|
| 1357 |
+
label="Source",
|
| 1358 |
+
choices=["crossref", "openalex", "epmc", "arxiv", "semanticscholar", "springer", "internetarchive", "all"],
|
| 1359 |
+
value="all",
|
| 1360 |
+
)
|
| 1361 |
+
query = gr.Textbox(label="Query")
|
| 1362 |
+
rows = gr.Slider(1, 50, value=12, step=1, label="rows")
|
| 1363 |
+
btn_ws = gr.Button("Search", variant="primary")
|
| 1364 |
+
ws_json = gr.Code(label="Output", language="json", elem_classes=["mono"])
|
| 1365 |
+
ws_imgs = gr.Gallery(label="(not used)", columns=3, height=10)
|
| 1366 |
+
btn_ws.click(tool_web_search, [state, src, query, rows], [ws_json, ws_imgs])
|
| 1367 |
+
|
| 1368 |
+
rag_q = gr.Textbox(label="RAG query (local polymer KB)")
|
| 1369 |
+
btn_rag = gr.Button("Retrieve (RAG)", variant="secondary")
|
| 1370 |
+
rag_json = gr.Code(label="Output", language="json", elem_classes=["mono"])
|
| 1371 |
+
btn_rag.click(tool_rag_retrieval, [state, rag_q], [rag_json])
|
| 1372 |
+
|
| 1373 |
+
with gr.Accordion("Explainability (top-K atom occlusion)", open=False):
|
| 1374 |
+
psm_expl = gr.Textbox(label="pSMILES")
|
| 1375 |
+
prop_expl = gr.Dropdown(
|
| 1376 |
+
label="Property (for attribution)",
|
| 1377 |
+
choices=["density", "glass transition", "melting", "specific volume", "thermal decomposition"],
|
| 1378 |
+
value="glass transition",
|
| 1379 |
+
)
|
| 1380 |
+
btn_expl = gr.Button("Explain", variant="primary")
|
| 1381 |
+
expl_json = gr.Code(label="Attribution data (JSON)", language="json", elem_classes=["mono"])
|
| 1382 |
+
expl_imgs = gr.Gallery(label="PNG (heatmap)", columns=2, height=220)
|
| 1383 |
+
btn_expl.click(tool_explainability, [state, psm_expl, prop_expl], [expl_json, expl_imgs])
|
| 1384 |
+
|
| 1385 |
+
with gr.Accordion("Diagnostics", open=False):
|
| 1386 |
+
btn_probe = gr.Button("Probe OpenAI (JSON ping)")
|
| 1387 |
+
probe_json = gr.Code(label="Result", language="json", elem_classes=["mono"])
|
| 1388 |
+
btn_probe.click(tool_openai_probe, [state], [probe_json])
|
| 1389 |
+
|
| 1390 |
+
with gr.Tab("Other LLMs"):
|
| 1391 |
+
gr.Markdown("Run a direct LLM-only response (no tools, no web search) using a non-GPT model name.")
|
| 1392 |
+
|
| 1393 |
+
llm_model = gr.Dropdown(
|
| 1394 |
+
label="Model",
|
| 1395 |
+
choices=["mixtral-8x22b-instruct", "llama-3.1-8b-instruct"],
|
| 1396 |
+
value="mixtral-8x22b-instruct",
|
| 1397 |
+
)
|
| 1398 |
+
llm_prompt = gr.Textbox(label="Prompt", lines=10, placeholder="Enter your polymer question/prompt.")
|
| 1399 |
+
llm_btn = gr.Button("Run LLM", variant="primary")
|
| 1400 |
+
llm_out = gr.Markdown("The model response will appear here.")
|
| 1401 |
+
llm_btn.click(fn=llm_only_answer, inputs=[state, llm_model, llm_prompt], outputs=[llm_out])
|
| 1402 |
+
|
| 1403 |
+
return demo
|
| 1404 |
+
|
| 1405 |
+
|
| 1406 |
+
def main():
|
| 1407 |
+
parser = argparse.ArgumentParser()
|
| 1408 |
+
parser.add_argument("--server-name", type=str, default=None)
|
| 1409 |
+
parser.add_argument("--server-port", type=int, default=None)
|
| 1410 |
+
args = parser.parse_args()
|
| 1411 |
+
|
| 1412 |
+
demo = build_ui()
|
| 1413 |
+
demo.launch(server_name=args.server_name, server_port=args.server_port, show_api=False, share=True)
|
| 1414 |
+
|
| 1415 |
+
|
| 1416 |
+
if __name__ == "__main__":
|
| 1417 |
+
main()
|