Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,1466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# 1. CONSOLIDATED IMPORTS
|
| 3 |
+
# ============================================================================
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import spacy
|
| 6 |
+
from spacy import displacy
|
| 7 |
+
import base64
|
| 8 |
+
import traceback
|
| 9 |
+
import subprocess
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import importlib
|
| 14 |
+
import site
|
| 15 |
+
import threading
|
| 16 |
+
import queue
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
from enum import Enum
|
| 19 |
+
from typing import Dict, Any, List, Set, Optional, Tuple
|
| 20 |
+
|
| 21 |
+
# --- LanguageTool Import ---
|
| 22 |
+
try:
|
| 23 |
+
import language_tool_python
|
| 24 |
+
LT_AVAILABLE = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
LT_AVAILABLE = False
|
| 27 |
+
print("="*70)
|
| 28 |
+
print("CRITICAL WARNING: `language-tool-python` library not found.")
|
| 29 |
+
print("The 'German Grammar Check' tab will not function.")
|
| 30 |
+
print("="*70)
|
| 31 |
+
|
| 32 |
+
# --- OdeNet (wn) Import ---
|
| 33 |
+
try:
|
| 34 |
+
import wn
|
| 35 |
+
WN_AVAILABLE = True
|
| 36 |
+
except ImportError:
|
| 37 |
+
WN_AVAILABLE = False
|
| 38 |
+
print("="*70)
|
| 39 |
+
print("CRITICAL WARNING: `wn` library not found.")
|
| 40 |
+
print("The 'German Thesaurus' tab will not function.")
|
| 41 |
+
print("="*70)
|
| 42 |
+
|
| 43 |
+
# --- Pattern.de Import ---
|
| 44 |
+
try:
|
| 45 |
+
from pattern.de import (
|
| 46 |
+
pluralize, singularize, conjugate, tenses, lemma, lexeme,
|
| 47 |
+
attributive, predicative,
|
| 48 |
+
article, gender, MALE, FEMALE, NEUTRAL, PLURAL,
|
| 49 |
+
INFINITIVE, PRESENT, PAST, PARTICIPLE,
|
| 50 |
+
FIRST, SECOND, THIRD, SINGULAR, PLURAL as PL,
|
| 51 |
+
INDICATIVE, IMPERATIVE, SUBJUNCTIVE,
|
| 52 |
+
NOMINATIVE, ACCUSATIVE, DATIVE, GENITIVE,
|
| 53 |
+
SUBJECT, OBJECT, INDIRECT, PROPERTY,
|
| 54 |
+
DEFINITE, INDEFINITE,
|
| 55 |
+
comparative, superlative,
|
| 56 |
+
NOUN, VERB, ADJECTIVE,
|
| 57 |
+
parse, split
|
| 58 |
+
)
|
| 59 |
+
PATTERN_DE_AVAILABLE = True
|
| 60 |
+
except ImportError as e:
|
| 61 |
+
PATTERN_DE_AVAILABLE = False
|
| 62 |
+
print("="*70)
|
| 63 |
+
print(f"CRITICAL WARNING: `pattern.de` library not found: {e}")
|
| 64 |
+
print("The 'German Inflections' tab will not function.")
|
| 65 |
+
print("="*70)
|
| 66 |
+
|
| 67 |
+
# ============================================================================
|
| 68 |
+
# 2. SHARED GLOBALS & CONFIG
|
| 69 |
+
# ============================================================================
|
| 70 |
+
VERBOSE = True # Enable verbose debug output for Pattern.de
|
| 71 |
+
def log(msg):
|
| 72 |
+
"""Print debug messages if verbose mode is on."""
|
| 73 |
+
if VERBOSE:
|
| 74 |
+
print(f"[DEBUG] {msg}")
|
| 75 |
+
|
| 76 |
+
# --- NEW HELPER ---
|
| 77 |
+
def _html_wrap(content: str, line_height: str = "2.0") -> str:
|
| 78 |
+
"""Wraps displaCy HTML in a consistent, scrollable div."""
|
| 79 |
+
return f'<div style="overflow-x:auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; line-height: {line_height};">{content}</div>'
|
| 80 |
+
|
| 81 |
+
# --- NEW HELPER for SVA ---
|
| 82 |
+
def _conjugate_to_person_number(verb_lemma: str, person: str, number: str) -> Optional[str]:
|
| 83 |
+
"""
|
| 84 |
+
Return a present tense finite form for given person/number.
|
| 85 |
+
person in {'1','2','3'}, number in {'sg','pl'}.
|
| 86 |
+
"""
|
| 87 |
+
if not PATTERN_DE_AVAILABLE:
|
| 88 |
+
return None
|
| 89 |
+
try:
|
| 90 |
+
alias = {"1sg":"1sg","2sg":"2sg","3sg":"3sg","1pl":"1pl","2pl":"2pl","3pl":"3pl"}[f"{person}{number}"]
|
| 91 |
+
return conjugate(verb_lemma, alias)
|
| 92 |
+
except Exception:
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
# ============================================================================
|
| 96 |
+
# 3. SPACY ANALYZER LOGIC (from spacy-app.py)
|
| 97 |
+
# ============================================================================
|
| 98 |
+
|
| 99 |
+
# --- Globals & Config for spaCy ---
|
| 100 |
+
SPACY_MODEL_INFO: Dict[str, Tuple[str, str, str]] = {
|
| 101 |
+
"de": ("German", "de_core_news_md", "spacy"),
|
| 102 |
+
"en": ("English", "en_core_web_md", "spacy"),
|
| 103 |
+
"es": ("Spanish", "es_core_news_md", "spacy"),
|
| 104 |
+
"grc-proiel-trf": ("Ancient Greek (PROIEL TRF)", "grc_proiel_trf", "grecy"),
|
| 105 |
+
"grc-perseus-trf": ("Ancient Greek (Perseus TRF)", "grc_perseus_trf", "grecy"),
|
| 106 |
+
"grc_ner_trf": ("Ancient Greek (NER TRF)", "grc_ner_trf", "grecy"),
|
| 107 |
+
"grc-proiel-lg": ("Ancient Greek (PROIEL LG)", "grc_proiel_lg", "grecy"),
|
| 108 |
+
"grc-perseus-lg": ("Ancient Greek (Perseus LG)", "grc_perseus_lg", "grecy"),
|
| 109 |
+
"grc-proiel-sm": ("Ancient Greek (PROIEL SM)", "grc_proiel_sm", "grecy"),
|
| 110 |
+
"grc-perseus-sm": ("Ancient Greek (Perseus SM)", "grc_perseus_sm", "grecy"),
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
SPACY_UI_TEXT = {
|
| 114 |
+
"de": {
|
| 115 |
+
"title": "# 🔍 Mehrsprachiger Morpho-Syntaktischer Analysator",
|
| 116 |
+
"subtitle": "Analysieren Sie Texte auf Deutsch, Englisch, Spanisch und Altgriechisch",
|
| 117 |
+
"ui_lang_label": "Benutzeroberflächensprache",
|
| 118 |
+
"model_lang_label": "Textsprache für Analyse",
|
| 119 |
+
"input_label": "Text eingeben",
|
| 120 |
+
"input_placeholder": "Geben Sie hier Ihren Text ein...",
|
| 121 |
+
"button_text": "Text analysieren",
|
| 122 |
+
"button_processing_text": "Verarbeitung läuft...",
|
| 123 |
+
"tab_graphic": "Grafische Darstellung",
|
| 124 |
+
"tab_table": "Tabelle",
|
| 125 |
+
"tab_json": "JSON",
|
| 126 |
+
"tab_ner": "Entitäten",
|
| 127 |
+
"html_label": "Abhängigkeitsparsing",
|
| 128 |
+
"table_label": "Morphologische Analyse",
|
| 129 |
+
"table_headers": ["Wort", "Lemma", "POS", "Tag", "Morphologie", "Abhängigkeit"],
|
| 130 |
+
"json_label": "JSON-Ausgabe",
|
| 131 |
+
"ner_label": "Benannte Entitäten",
|
| 132 |
+
"error_message": "Fehler: "
|
| 133 |
+
},
|
| 134 |
+
"en": {
|
| 135 |
+
"title": "# 🔍 Multilingual Morpho-Syntactic Analyzer",
|
| 136 |
+
"subtitle": "Analyze texts in German, English, Spanish, and Ancient Greek",
|
| 137 |
+
"ui_lang_label": "Interface Language",
|
| 138 |
+
"model_lang_label": "Text Language for Analysis",
|
| 139 |
+
"input_label": "Enter Text",
|
| 140 |
+
"input_placeholder": "Enter your text here...",
|
| 141 |
+
"button_text": "Analyze Text",
|
| 142 |
+
"button_processing_text": "Processing...",
|
| 143 |
+
"tab_graphic": "Graphic View",
|
| 144 |
+
"tab_table": "Table",
|
| 145 |
+
"tab_json": "JSON",
|
| 146 |
+
"tab_ner": "Entities",
|
| 147 |
+
"html_label": "Dependency Parsing",
|
| 148 |
+
"table_label": "Morphological Analysis",
|
| 149 |
+
"table_headers": ["Word", "Lemma", "POS", "Tag", "Morphology", "Dependency"],
|
| 150 |
+
"json_label": "JSON Output",
|
| 151 |
+
"ner_label": "Named Entities",
|
| 152 |
+
"error_message": "Error: "
|
| 153 |
+
},
|
| 154 |
+
"es": {
|
| 155 |
+
"title": "# 🔍 Analizador Morfo-Sintáctico Multilingüe",
|
| 156 |
+
"subtitle": "Analice textos en alemán, inglés, español y griego antiguo",
|
| 157 |
+
"ui_lang_label": "Idioma de la Interfaz",
|
| 158 |
+
"model_lang_label": "Idioma del Texto para Análisis",
|
| 159 |
+
"input_label": "Introducir Texto",
|
| 160 |
+
"input_placeholder": "Ingrese su texto aquí...",
|
| 161 |
+
"button_text": "Analizar Texto",
|
| 162 |
+
"button_processing_text": "Procesando...",
|
| 163 |
+
"tab_graphic": "Vista Gráfica",
|
| 164 |
+
"tab_table": "Tabla",
|
| 165 |
+
"tab_json": "JSON",
|
| 166 |
+
"tab_ner": "Entidades",
|
| 167 |
+
"html_label": "Análisis de Dependencias",
|
| 168 |
+
"table_label": "Análisis Morfológico",
|
| 169 |
+
"table_headers": ["Palabra", "Lema", "POS", "Etiqueta", "Morfología", "Dependencia"],
|
| 170 |
+
"json_label": "Salida JSON",
|
| 171 |
+
"ner_label": "Entidades Nombradas",
|
| 172 |
+
"error_message": "Error: "
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
SPACY_MODELS: Dict[str, Optional[spacy.Language]] = {}
|
| 176 |
+
|
| 177 |
+
# --- Dependency Installation ---
|
| 178 |
+
def spacy_install_spacy_transformers_once():
|
| 179 |
+
""" Installs spacy-transformers, required for all _trf models. """
|
| 180 |
+
marker_file = Path(".spacy_transformers_installed")
|
| 181 |
+
if marker_file.exists():
|
| 182 |
+
print("✓ spacy-transformers already installed (marker found)")
|
| 183 |
+
return True
|
| 184 |
+
|
| 185 |
+
print("Installing spacy-transformers (for _trf models)...")
|
| 186 |
+
cmd = [sys.executable, "-m", "pip", "install", "spacy-transformers"]
|
| 187 |
+
try:
|
| 188 |
+
subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=900) # Increased timeout
|
| 189 |
+
print("✓ Successfully installed spacy-transformers")
|
| 190 |
+
marker_file.touch()
|
| 191 |
+
return True
|
| 192 |
+
except Exception as e:
|
| 193 |
+
print(f"✗ FAILED to install spacy-transformers: {e}")
|
| 194 |
+
if hasattr(e, 'stdout'): print(f"STDOUT: {e.stdout}")
|
| 195 |
+
if hasattr(e, 'stderr'): print(f"STDERR: {e.stderr}")
|
| 196 |
+
return False
|
| 197 |
+
|
| 198 |
+
def spacy_install_grecy_model_from_github(model_name: str) -> bool:
|
| 199 |
+
""" Installs a greCy model from GitHub Release. """
|
| 200 |
+
marker_file = Path(f".{model_name}_installed")
|
| 201 |
+
if marker_file.exists():
|
| 202 |
+
print(f"✓ {model_name} already installed (marker found)")
|
| 203 |
+
return True
|
| 204 |
+
|
| 205 |
+
print(f"Installing grecy model: {model_name}...")
|
| 206 |
+
|
| 207 |
+
if model_name == "grc_proiel_trf":
|
| 208 |
+
wheel_filename = "grc_proiel_trf-3.7.5-py3-none-any.whl"
|
| 209 |
+
elif model_name in ["grc_perseus_trf", "grc_proiel_lg", "grc_perseus_lg",
|
| 210 |
+
"grc_proiel_sm", "grc_perseus_sm", "grc_ner_trf"]:
|
| 211 |
+
wheel_filename = f"{model_name}-0.0.0-py3-none-any.whl"
|
| 212 |
+
else:
|
| 213 |
+
print(f"✗ Unknown grecy model: {model_name}")
|
| 214 |
+
return False
|
| 215 |
+
|
| 216 |
+
install_url = f"https://github.com/CrispStrobe/greCy/releases/download/v1.0-models/{wheel_filename}"
|
| 217 |
+
cmd = [sys.executable, "-m", "pip", "install", install_url, "--no-deps"]
|
| 218 |
+
|
| 219 |
+
print(f"Running: {' '.join(cmd)}")
|
| 220 |
+
try:
|
| 221 |
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=900)
|
| 222 |
+
if result.stdout: print("STDOUT:", result.stdout)
|
| 223 |
+
if result.stderr: print("STDERR:", result.stderr)
|
| 224 |
+
print(f"✓ Successfully installed {model_name} from GitHub")
|
| 225 |
+
marker_file.touch()
|
| 226 |
+
return True
|
| 227 |
+
except subprocess.CalledProcessError as e:
|
| 228 |
+
print(f"✗ Installation subprocess FAILED with code {e.returncode}")
|
| 229 |
+
print("STDOUT:", e.stdout)
|
| 230 |
+
print("STDERR:", e.stderr)
|
| 231 |
+
return False
|
| 232 |
+
except Exception as e:
|
| 233 |
+
print(f"✗ Installation exception: {e}")
|
| 234 |
+
traceback.print_exc()
|
| 235 |
+
return False
|
| 236 |
+
|
| 237 |
+
# --- Model Loading (Lazy Loading) ---
|
| 238 |
+
def spacy_load_spacy_model(model_name: str) -> Optional[spacy.Language]:
|
| 239 |
+
"""Load or install a standard spaCy model."""
|
| 240 |
+
try:
|
| 241 |
+
return spacy.load(model_name)
|
| 242 |
+
except OSError:
|
| 243 |
+
print(f"Installing {model_name}...")
|
| 244 |
+
try:
|
| 245 |
+
subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
|
| 246 |
+
return spacy.load(model_name)
|
| 247 |
+
except Exception as e:
|
| 248 |
+
print(f"✗ Failed to install {model_name}: {e}")
|
| 249 |
+
if hasattr(e, 'stderr'): print(f"STDERR: {e.stderr}")
|
| 250 |
+
return None
|
| 251 |
+
|
| 252 |
+
def spacy_load_grecy_model(model_name: str) -> Optional[spacy.Language]:
|
| 253 |
+
""" Load a grecy model, installing from GitHub if needed. """
|
| 254 |
+
if not spacy_install_grecy_model_from_github(model_name):
|
| 255 |
+
print(f"✗ Cannot load {model_name} because installation failed.")
|
| 256 |
+
return None
|
| 257 |
+
try:
|
| 258 |
+
print("Refreshing importlib to find new package...")
|
| 259 |
+
importlib.invalidate_caches()
|
| 260 |
+
try: importlib.reload(site)
|
| 261 |
+
except Exception: pass
|
| 262 |
+
|
| 263 |
+
print(f"Trying: spacy.load('{model_name}')")
|
| 264 |
+
nlp = spacy.load(model_name)
|
| 265 |
+
print(f"✓ Successfully loaded {model_name}")
|
| 266 |
+
return nlp
|
| 267 |
+
except Exception as e:
|
| 268 |
+
print(f"✗ Model {model_name} is installed but FAILED to load.")
|
| 269 |
+
print(f" Error: {e}")
|
| 270 |
+
traceback.print_exc()
|
| 271 |
+
return None
|
| 272 |
+
|
| 273 |
+
def spacy_initialize_models():
|
| 274 |
+
""" Pre-load standard models and ensure _trf dependencies are ready. """
|
| 275 |
+
print("\n" + "="*70)
|
| 276 |
+
print("INITIALIZING SPACY MODELS")
|
| 277 |
+
print("="*70 + "\n")
|
| 278 |
+
|
| 279 |
+
spacy_install_spacy_transformers_once()
|
| 280 |
+
|
| 281 |
+
loaded_count = 0
|
| 282 |
+
spacy_model_count = 0
|
| 283 |
+
|
| 284 |
+
for lang_code, (lang_name, model_name, model_type) in SPACY_MODEL_INFO.items():
|
| 285 |
+
if model_type == "spacy":
|
| 286 |
+
spacy_model_count += 1
|
| 287 |
+
print(f"Loading {lang_name} ({model_name})...")
|
| 288 |
+
nlp = spacy_load_spacy_model(model_name)
|
| 289 |
+
SPACY_MODELS[lang_code] = nlp
|
| 290 |
+
if nlp:
|
| 291 |
+
print(f"✓ {lang_name} ready\n")
|
| 292 |
+
loaded_count += 1
|
| 293 |
+
else:
|
| 294 |
+
print(f"✗ {lang_name} FAILED\n")
|
| 295 |
+
else:
|
| 296 |
+
print(f"✓ {lang_name} ({model_name}) will be loaded on first use.\n")
|
| 297 |
+
SPACY_MODELS[lang_code] = None
|
| 298 |
+
|
| 299 |
+
print(f"Pre-loaded {loaded_count}/{spacy_model_count} standard models.")
|
| 300 |
+
print("="*70 + "\n")
|
| 301 |
+
|
| 302 |
+
# --- Analysis Logic ---
|
| 303 |
+
def spacy_get_analysis(ui_lang: str, model_lang_key: str, text: str):
|
| 304 |
+
"""Analyze text and return results."""
|
| 305 |
+
ui_config = SPACY_UI_TEXT.get(ui_lang.lower(), SPACY_UI_TEXT["en"])
|
| 306 |
+
error_prefix = ui_config["error_message"]
|
| 307 |
+
|
| 308 |
+
try:
|
| 309 |
+
if not text.strip():
|
| 310 |
+
# Return empty values for all outputs
|
| 311 |
+
return ([], [], "<p style='color: orange;'>No text provided.</p>", "<p>No text provided.</p>",
|
| 312 |
+
gr.Button(value=ui_config["button_text"], interactive=True))
|
| 313 |
+
|
| 314 |
+
nlp = SPACY_MODELS.get(model_lang_key)
|
| 315 |
+
|
| 316 |
+
if nlp is None:
|
| 317 |
+
print(f"First use of {model_lang_key}. Loading model...")
|
| 318 |
+
if model_lang_key not in SPACY_MODEL_INFO:
|
| 319 |
+
# Check key before access
|
| 320 |
+
raise ValueError(f"Unknown model key: {model_lang_key}")
|
| 321 |
+
_, model_name, model_type = SPACY_MODEL_INFO[model_lang_key]
|
| 322 |
+
|
| 323 |
+
if model_type == "grecy":
|
| 324 |
+
nlp = spacy_load_grecy_model(model_name)
|
| 325 |
+
else:
|
| 326 |
+
nlp = spacy_load_spacy_model(model_name)
|
| 327 |
+
|
| 328 |
+
if nlp is None:
|
| 329 |
+
SPACY_MODELS.pop(model_lang_key, None)
|
| 330 |
+
err_msg = f"Model for {model_lang_key} ({model_name}) FAILED to load. Check logs."
|
| 331 |
+
err_html = f"<p style='color: red;'>{err_msg}</p>"
|
| 332 |
+
# Return error tuple on load failure
|
| 333 |
+
return ([], {"error": err_msg}, err_html, err_html,
|
| 334 |
+
gr.Button(value=ui_config["button_text"], interactive=True))
|
| 335 |
+
else:
|
| 336 |
+
SPACY_MODELS[model_lang_key] = nlp
|
| 337 |
+
print(f"✓ {model_lang_key} is now loaded and cached.")
|
| 338 |
+
|
| 339 |
+
doc = nlp(text)
|
| 340 |
+
|
| 341 |
+
dataframe_output = []
|
| 342 |
+
json_output = []
|
| 343 |
+
|
| 344 |
+
for token in doc:
|
| 345 |
+
# --- Robust attribute access ---
|
| 346 |
+
lemma_str = token.lemma_
|
| 347 |
+
morph_str = str(token.morph) if token.morph else ''
|
| 348 |
+
dep_str = token.dep_ if doc.is_parsed else ''
|
| 349 |
+
tag_str = token.tag_ or ''
|
| 350 |
+
pos_str = token.pos_ or ''
|
| 351 |
+
# --- End robust access ---
|
| 352 |
+
|
| 353 |
+
json_output.append({
|
| 354 |
+
"word": token.text, "lemma": lemma_str, "pos": pos_str,
|
| 355 |
+
"tag": tag_str, "morphology": morph_str, "dependency": dep_str,
|
| 356 |
+
"is_stopword": token.is_stop
|
| 357 |
+
})
|
| 358 |
+
dataframe_output.append([token.text, lemma_str, pos_str, tag_str, morph_str, dep_str])
|
| 359 |
+
|
| 360 |
+
# --- DEPENDENCY PARSE VISUALIZATION ---
|
| 361 |
+
html_dep_out = ""
|
| 362 |
+
if "parser" in nlp.pipe_names and doc.is_parsed:
|
| 363 |
+
try:
|
| 364 |
+
options = {"compact": True, "bg": "#ffffff", "color": "#000000", "font": "Source Sans Pro"}
|
| 365 |
+
# --- Use direct HTML, no base64 ---
|
| 366 |
+
html_svg = displacy.render(doc, style="dep", jupyter=False, options=options)
|
| 367 |
+
html_dep_out = _html_wrap(html_svg, line_height="2.5")
|
| 368 |
+
# --- End direct HTML ---
|
| 369 |
+
except Exception as e:
|
| 370 |
+
html_dep_out = f"<p style='color: orange;'>Visualization error (DEP): {e}</p>"
|
| 371 |
+
else:
|
| 372 |
+
html_dep_out = "<p style='color: orange;'>Dependency parsing ('parser') not available or doc not parsed.</p>"
|
| 373 |
+
|
| 374 |
+
# --- NAMED ENTITY VISUALIZATION ---
|
| 375 |
+
html_ner_out = ""
|
| 376 |
+
if "ner" in nlp.pipe_names:
|
| 377 |
+
if doc.ents:
|
| 378 |
+
try:
|
| 379 |
+
html_ner = displacy.render(doc, style="ent", jupyter=False)
|
| 380 |
+
html_ner_out = _html_wrap(html_ner, line_height="2.5")
|
| 381 |
+
except Exception as e:
|
| 382 |
+
html_ner_out = f"<p style='color: orange;'>Visualization error (NER): {e}</p>"
|
| 383 |
+
else:
|
| 384 |
+
html_ner_out = "<p>No named entities found in this text.</p>"
|
| 385 |
+
else:
|
| 386 |
+
html_ner_out = "<p style='color: orange;'>Named Entity Recognition ('ner') not available for this model.</p>"
|
| 387 |
+
|
| 388 |
+
return (dataframe_output, json_output, html_dep_out, html_ner_out,
|
| 389 |
+
gr.Button(value=ui_config["button_text"], interactive=True))
|
| 390 |
+
|
| 391 |
+
except Exception as e:
|
| 392 |
+
traceback.print_exc()
|
| 393 |
+
error_html = f"<div style='color: red; border: 1px solid red; padding: 10px; border-radius: 5px; background-color: #fff5f5;'><strong>{error_prefix}</strong> {str(e)}</div>"
|
| 394 |
+
# --- Consistent error return shape ---
|
| 395 |
+
return ([], {"error": str(e)}, error_html, error_html,
|
| 396 |
+
gr.Button(value=ui_config["button_text"], interactive=True))
|
| 397 |
+
|
| 398 |
+
# --- UI Update Logic ---
|
| 399 |
+
def spacy_update_ui(ui_lang: str):
|
| 400 |
+
"""Update UI language for the spaCy tab."""
|
| 401 |
+
ui_config = SPACY_UI_TEXT.get(ui_lang.lower(), SPACY_UI_TEXT["en"])
|
| 402 |
+
# --- Return component UPDATES, not new components ---
|
| 403 |
+
return [
|
| 404 |
+
gr.Markdown.update(value=ui_config["title"]),
|
| 405 |
+
gr.Markdown.update(value=ui_config["subtitle"]),
|
| 406 |
+
gr.update(label=ui_config["ui_lang_label"]),
|
| 407 |
+
gr.update(label=ui_config["model_lang_label"]),
|
| 408 |
+
gr.Textbox.update(label=ui_config["input_label"], placeholder=ui_config["input_placeholder"]),
|
| 409 |
+
gr.Button.update(value=ui_config["button_text"]),
|
| 410 |
+
gr.update(label=ui_config["tab_graphic"]),
|
| 411 |
+
gr.update(label=ui_config["tab_table"]),
|
| 412 |
+
gr.update(label=ui_config["tab_json"]),
|
| 413 |
+
gr.update(label=ui_config["tab_ner"]),
|
| 414 |
+
gr.HTML.update(label=ui_config["html_label"]),
|
| 415 |
+
gr.DataFrame.update(label=ui_config["table_label"], headers=ui_config["table_headers"]),
|
| 416 |
+
gr.JSON.update(label=ui_config["json_label"]),
|
| 417 |
+
gr.HTML.update(label=ui_config["ner_label"])
|
| 418 |
+
]
|
| 419 |
+
# --- End component updates ---
|
| 420 |
+
|
| 421 |
+
# ============================================================================
|
| 422 |
+
# 4. LANGUAGETOOL LOGIC (from languagetool-server.py)
|
| 423 |
+
# ============================================================================
|
| 424 |
+
|
| 425 |
+
# --- Globals for LanguageTool ---
|
| 426 |
+
LT_TOOL_INSTANCE: language_tool_python.LanguageTool = None
|
| 427 |
+
LT_TOOL_LOCK = threading.Lock()
|
| 428 |
+
|
| 429 |
+
def lt_get_language_tool() -> language_tool_python.LanguageTool:
|
| 430 |
+
"""
|
| 431 |
+
Thread-safe function to get a single instance of the LanguageTool.
|
| 432 |
+
"""
|
| 433 |
+
global LT_TOOL_INSTANCE
|
| 434 |
+
|
| 435 |
+
if not LT_AVAILABLE:
|
| 436 |
+
raise ImportError("language-tool-python library is not installed.")
|
| 437 |
+
|
| 438 |
+
# Fast path: If the tool is already initialized, return it.
|
| 439 |
+
if LT_TOOL_INSTANCE:
|
| 440 |
+
return LT_TOOL_INSTANCE
|
| 441 |
+
|
| 442 |
+
# Slow path: Tool is not initialized. Acquire the lock.
|
| 443 |
+
with LT_TOOL_LOCK:
|
| 444 |
+
# Check again *inside* the lock
|
| 445 |
+
if LT_TOOL_INSTANCE:
|
| 446 |
+
return LT_TOOL_INSTANCE
|
| 447 |
+
|
| 448 |
+
try:
|
| 449 |
+
print("Initializing LanguageTool for German (de-DE)...")
|
| 450 |
+
tool = language_tool_python.LanguageTool('de-DE')
|
| 451 |
+
try:
|
| 452 |
+
tool.picky = True
|
| 453 |
+
except Exception:
|
| 454 |
+
pass
|
| 455 |
+
# Warm-up to ensure rules are loaded
|
| 456 |
+
_ = tool.check("Dies ist ein Test.")
|
| 457 |
+
print("LanguageTool (local server) initialized successfully.")
|
| 458 |
+
LT_TOOL_INSTANCE = tool
|
| 459 |
+
return LT_TOOL_INSTANCE
|
| 460 |
+
except Exception as e:
|
| 461 |
+
print(f"CRITICAL ERROR: Failed to initialize LanguageTool: {e}")
|
| 462 |
+
return None
|
| 463 |
+
|
| 464 |
+
# --- Grammar Checking Logic ---
|
| 465 |
+
def lt_check_grammar(text: str) -> List[Dict[str, Any]]:
|
| 466 |
+
"""
|
| 467 |
+
Checks a German text for grammar and spelling errors and returns a JSON list.
|
| 468 |
+
"""
|
| 469 |
+
try:
|
| 470 |
+
# Get the singleton instance of the tool.
|
| 471 |
+
tool = lt_get_language_tool()
|
| 472 |
+
|
| 473 |
+
if tool is None:
|
| 474 |
+
return [{"error": "LanguageTool service failed to initialize."}]
|
| 475 |
+
|
| 476 |
+
if not text or not text.strip():
|
| 477 |
+
return [{"info": "No text provided to check."}]
|
| 478 |
+
|
| 479 |
+
print(f"Checking text: {text}")
|
| 480 |
+
matches = tool.check(text)
|
| 481 |
+
|
| 482 |
+
if not matches:
|
| 483 |
+
try:
|
| 484 |
+
tool.picky = True
|
| 485 |
+
matches = tool.check(text)
|
| 486 |
+
except Exception:
|
| 487 |
+
pass
|
| 488 |
+
|
| 489 |
+
if not matches:
|
| 490 |
+
return [{"info": "No errors found!", "status": "perfect"}]
|
| 491 |
+
|
| 492 |
+
errors_list = []
|
| 493 |
+
for match in matches:
|
| 494 |
+
# --- Robust attribute access for match ---
|
| 495 |
+
error = {
|
| 496 |
+
"message": match.message,
|
| 497 |
+
"rule_id": match.ruleId,
|
| 498 |
+
"category": getattr(match.category, 'name', match.category),
|
| 499 |
+
"incorrect_text": text[match.offset : match.offset + match.errorLength],
|
| 500 |
+
"replacements": match.replacements,
|
| 501 |
+
"offset": match.offset,
|
| 502 |
+
"length": match.errorLength,
|
| 503 |
+
"context": getattr(match, "context", None),
|
| 504 |
+
"short_message": getattr(match, "shortMessage", None)
|
| 505 |
+
}
|
| 506 |
+
# --- End robust access ---
|
| 507 |
+
errors_list.append(error)
|
| 508 |
+
|
| 509 |
+
print(f"Found {len(errors_list)} errors.")
|
| 510 |
+
return errors_list
|
| 511 |
+
|
| 512 |
+
except Exception as e:
|
| 513 |
+
traceback.print_exc()
|
| 514 |
+
return [{"error": f"An unexpected error occurred: {str(e)}"}]
|
| 515 |
+
|
| 516 |
+
# ============================================================================
|
| 517 |
+
# 5. ODENET THESAURUS LOGIC (from odenet_app.py)
|
| 518 |
+
# ============================================================================
|
| 519 |
+
|
| 520 |
+
# --- Globals & Classes for OdeNet ---
|
| 521 |
+
@dataclass
|
| 522 |
+
class OdeNetWorkItem:
|
| 523 |
+
"""Represents a lookup request."""
|
| 524 |
+
word: str
|
| 525 |
+
response_queue: queue.Queue
|
| 526 |
+
|
| 527 |
+
class OdeNetWorkerState(Enum):
|
| 528 |
+
NOT_STARTED = 1
|
| 529 |
+
INITIALIZING = 2
|
| 530 |
+
READY = 3
|
| 531 |
+
ERROR = 4
|
| 532 |
+
|
| 533 |
+
odenet_worker_state = OdeNetWorkerState.NOT_STARTED
|
| 534 |
+
odenet_worker_thread = None
|
| 535 |
+
odenet_work_queue = queue.Queue()
|
| 536 |
+
odenet_de_wn = None # Single WordNet instance for the worker thread
|
| 537 |
+
|
| 538 |
+
# --- Worker Thread Logic ---
|
| 539 |
+
def odenet_download_wordnet_data():
|
| 540 |
+
"""Download WordNet data. Called once by worker thread."""
|
| 541 |
+
if not WN_AVAILABLE:
|
| 542 |
+
print("[OdeNet Worker] 'wn' library not available. Skipping download.")
|
| 543 |
+
return False
|
| 544 |
+
try:
|
| 545 |
+
print("[OdeNet Worker] Downloading WordNet data...")
|
| 546 |
+
try:
|
| 547 |
+
wn.download('odenet:1.4')
|
| 548 |
+
except Exception as e:
|
| 549 |
+
print(f"[OdeNet Worker] Note: odenet download: {e}")
|
| 550 |
+
|
| 551 |
+
try:
|
| 552 |
+
wn.download('cili:1.0')
|
| 553 |
+
except Exception as e:
|
| 554 |
+
print(f"[OdeNet Worker] Note: cili download: {e}")
|
| 555 |
+
|
| 556 |
+
print("[OdeNet Worker] ✓ WordNet data ready")
|
| 557 |
+
return True
|
| 558 |
+
except Exception as e:
|
| 559 |
+
print(f"[OdeNet Worker] ✗ Failed to download WordNet data: {e}")
|
| 560 |
+
return False
|
| 561 |
+
|
| 562 |
+
def odenet_worker_loop():
|
| 563 |
+
"""
|
| 564 |
+
Worker thread main loop.
|
| 565 |
+
This is the ONLY thread that accesses the SQLite database.
|
| 566 |
+
"""
|
| 567 |
+
global odenet_worker_state, odenet_de_wn
|
| 568 |
+
|
| 569 |
+
if not WN_AVAILABLE:
|
| 570 |
+
print("[OdeNet Worker] 'wn' library not available. Worker cannot start.")
|
| 571 |
+
odenet_worker_state = OdeNetWorkerState.ERROR
|
| 572 |
+
return
|
| 573 |
+
|
| 574 |
+
try:
|
| 575 |
+
print("[OdeNet Worker] Starting worker thread...")
|
| 576 |
+
odenet_worker_state = OdeNetWorkerState.INITIALIZING
|
| 577 |
+
|
| 578 |
+
if not odenet_download_wordnet_data():
|
| 579 |
+
odenet_worker_state = OdeNetWorkerState.ERROR
|
| 580 |
+
print("[OdeNet Worker] Failed to initialize")
|
| 581 |
+
return
|
| 582 |
+
|
| 583 |
+
print("[OdeNet Worker] Creating WordNet instance...")
|
| 584 |
+
odenet_de_wn = wn.Wordnet('odenet:1.4')
|
| 585 |
+
odenet_worker_state = OdeNetWorkerState.READY
|
| 586 |
+
print("[OdeNet Worker] Ready to process requests")
|
| 587 |
+
|
| 588 |
+
while True:
|
| 589 |
+
try:
|
| 590 |
+
item: OdeNetWorkItem = odenet_work_queue.get(timeout=1)
|
| 591 |
+
|
| 592 |
+
try:
|
| 593 |
+
result = odenet_process_word_lookup(item.word)
|
| 594 |
+
item.response_queue.put(("success", result))
|
| 595 |
+
except Exception as e:
|
| 596 |
+
traceback.print_exc()
|
| 597 |
+
item.response_queue.put(("error", str(e)))
|
| 598 |
+
finally:
|
| 599 |
+
odenet_work_queue.task_done()
|
| 600 |
+
|
| 601 |
+
except queue.Empty:
|
| 602 |
+
continue
|
| 603 |
+
|
| 604 |
+
except Exception as e:
|
| 605 |
+
print(f"[OdeNet Worker] Fatal error: {e}")
|
| 606 |
+
traceback.print_exc()
|
| 607 |
+
odenet_worker_state = OdeNetWorkerState.ERROR
|
| 608 |
+
|
| 609 |
+
def odenet_process_word_lookup(word: str) -> Dict[str, Any]:
|
| 610 |
+
"""
|
| 611 |
+
Process a single word lookup. Runs in the worker thread.
|
| 612 |
+
"""
|
| 613 |
+
global odenet_de_wn
|
| 614 |
+
|
| 615 |
+
if not word or not word.strip():
|
| 616 |
+
return {"info": "No word provided to check."}
|
| 617 |
+
|
| 618 |
+
word = word.strip().lower()
|
| 619 |
+
|
| 620 |
+
senses = odenet_de_wn.senses(word)
|
| 621 |
+
|
| 622 |
+
if not senses:
|
| 623 |
+
return {"info": f"The word '{word}' was not found in the thesaurus."}
|
| 624 |
+
|
| 625 |
+
results: Dict[str, Any] = {
|
| 626 |
+
"input_word": word,
|
| 627 |
+
"senses": []
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
for sense in senses:
|
| 631 |
+
synset = sense.synset()
|
| 632 |
+
|
| 633 |
+
def get_lemmas(synsets, remove_self=False):
|
| 634 |
+
lemmas: Set[str] = set()
|
| 635 |
+
for s in synsets:
|
| 636 |
+
for lemma in s.lemmas():
|
| 637 |
+
if not (remove_self and lemma == word):
|
| 638 |
+
lemmas.add(lemma)
|
| 639 |
+
return sorted(list(lemmas))
|
| 640 |
+
|
| 641 |
+
antonym_words: Set[str] = set()
|
| 642 |
+
try:
|
| 643 |
+
for ant_sense in sense.get_related('antonym'):
|
| 644 |
+
antonym_words.add(ant_sense.word().lemma())
|
| 645 |
+
except Exception:
|
| 646 |
+
pass
|
| 647 |
+
|
| 648 |
+
sense_info = {
|
| 649 |
+
"pos": synset.pos,
|
| 650 |
+
"definition": synset.definition() or "No definition available.",
|
| 651 |
+
"synonyms": get_lemmas([synset], remove_self=True),
|
| 652 |
+
"antonyms": sorted(list(antonym_words)),
|
| 653 |
+
"hypernyms (is a type of)": get_lemmas(synset.hypernyms()),
|
| 654 |
+
"hyponyms (examples are)": get_lemmas(synset.hyponyms()),
|
| 655 |
+
"holonyms (is part of)": get_lemmas(synset.holonyms()),
|
| 656 |
+
"meronyms (has parts)": get_lemmas(synset.meronyms()),
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
results["senses"].append(sense_info)
|
| 660 |
+
|
| 661 |
+
print(f"[OdeNet Worker] Found {len(results['senses'])} senses for '{word}'")
|
| 662 |
+
return results
|
| 663 |
+
|
| 664 |
+
def odenet_start_worker():
|
| 665 |
+
"""Start the worker thread if not already started."""
|
| 666 |
+
global odenet_worker_thread, odenet_worker_state
|
| 667 |
+
|
| 668 |
+
if odenet_worker_state != OdeNetWorkerState.NOT_STARTED:
|
| 669 |
+
return
|
| 670 |
+
|
| 671 |
+
if not WN_AVAILABLE:
|
| 672 |
+
print("[OdeNet] 'wn' library not available. Worker will not be started.")
|
| 673 |
+
odenet_worker_state = OdeNetWorkerState.ERROR
|
| 674 |
+
return
|
| 675 |
+
|
| 676 |
+
odenet_worker_thread = threading.Thread(target=odenet_worker_loop, daemon=True, name="OdeNetWorker")
|
| 677 |
+
odenet_worker_thread.start()
|
| 678 |
+
|
| 679 |
+
timeout = 30 # 30 seconds
|
| 680 |
+
for _ in range(timeout * 10):
|
| 681 |
+
if odenet_worker_state in (OdeNetWorkerState.READY, OdeNetWorkerState.ERROR):
|
| 682 |
+
break
|
| 683 |
+
threading.Event().wait(0.1)
|
| 684 |
+
|
| 685 |
+
if odenet_worker_state != OdeNetWorkerState.READY:
|
| 686 |
+
raise Exception("OdeNet Worker failed to initialize")
|
| 687 |
+
|
| 688 |
+
# --- Public API (Called by Gradio) ---
|
| 689 |
+
def odenet_get_thesaurus_info(word: str) -> Dict[str, Any]:
|
| 690 |
+
"""
|
| 691 |
+
Public API: Finds thesaurus info for a German word. Thread-safe.
|
| 692 |
+
"""
|
| 693 |
+
if not WN_AVAILABLE:
|
| 694 |
+
return {"error": "WordNet (wn) library is not available."}
|
| 695 |
+
|
| 696 |
+
if odenet_worker_state != OdeNetWorkerState.READY:
|
| 697 |
+
return {"error": "WordNet service is not ready yet. Please try again in a moment."}
|
| 698 |
+
|
| 699 |
+
try:
|
| 700 |
+
response_queue = queue.Queue()
|
| 701 |
+
item = OdeNetWorkItem(word=word, response_queue=response_queue)
|
| 702 |
+
odenet_work_queue.put(item)
|
| 703 |
+
|
| 704 |
+
try:
|
| 705 |
+
status, result = response_queue.get(timeout=30)
|
| 706 |
+
if status == "success":
|
| 707 |
+
return result
|
| 708 |
+
else:
|
| 709 |
+
return {"error": f"Lookup failed: {result}"}
|
| 710 |
+
|
| 711 |
+
except queue.Empty:
|
| 712 |
+
return {"error": "Request timed out"}
|
| 713 |
+
|
| 714 |
+
except Exception as e:
|
| 715 |
+
traceback.print_exc()
|
| 716 |
+
return {"error": f"An unexpected error occurred: {str(e)}"}
|
| 717 |
+
|
| 718 |
+
# ============================================================================
|
| 719 |
+
# 6. PATTERN INFLECTION LOGIC (from pattern-app.py)
|
| 720 |
+
# ============================================================================
|
| 721 |
+
|
| 722 |
+
# --- Word Type Detection ---
|
| 723 |
+
def pattern_detect_word_type(word: str) -> Dict[str, Any]:
|
| 724 |
+
"""
|
| 725 |
+
Use pattern.de's parser as a hint.
|
| 726 |
+
"""
|
| 727 |
+
if not PATTERN_DE_AVAILABLE:
|
| 728 |
+
return {'pos': None, 'lemma': word, 'type': 'unknown'}
|
| 729 |
+
|
| 730 |
+
# --- FIX: Replaced special ellipsis '…' with '...' ---
|
| 731 |
+
if not word or not word.strip() or all(ch in ".,;:!?()[]{}-–—'.../\|" for ch in word):
|
| 732 |
+
return {'pos': None, 'lemma': word, 'type': 'unknown'}
|
| 733 |
+
word_norm = word.strip()
|
| 734 |
+
|
| 735 |
+
log(f"Detecting type for: {word_norm}")
|
| 736 |
+
parser_result = {'pos': None, 'lemma': word_norm, 'type': None}
|
| 737 |
+
|
| 738 |
+
try:
|
| 739 |
+
parsed = parse(word_norm, lemmata=True)
|
| 740 |
+
for sentence in split(parsed):
|
| 741 |
+
# --- Robust attribute access ---
|
| 742 |
+
if hasattr(sentence, "words") and sentence.words:
|
| 743 |
+
w = sentence.words[0]
|
| 744 |
+
w_type = getattr(w, "type", None) or getattr(w, "pos", None)
|
| 745 |
+
w_lemma = (getattr(w, "lemma", None) or word_norm)
|
| 746 |
+
|
| 747 |
+
# Ignore common German function POS from pattern.de
|
| 748 |
+
non_content_prefixes = ("DT","ART","IN","APPR","APPRART","APPO","APZR","PTK","PRP","PPER","PPOS","PDS","PIS","KOUI","KON","$,","$.")
|
| 749 |
+
if w_type and any(w_type.startswith(p) for p in non_content_prefixes):
|
| 750 |
+
return {'pos': w_type, 'lemma': w_lemma, 'type': None}
|
| 751 |
+
|
| 752 |
+
parser_result['pos'] = w_type or ""
|
| 753 |
+
parser_result['lemma'] = w_lemma
|
| 754 |
+
|
| 755 |
+
if w_type and w_type.startswith('NN'):
|
| 756 |
+
parser_result['type'] = 'noun'
|
| 757 |
+
elif w_type and w_type.startswith('VB'):
|
| 758 |
+
parser_result['type'] = 'verb'
|
| 759 |
+
elif w_type and w_type.startswith('JJ'):
|
| 760 |
+
parser_result['type'] = 'adjective'
|
| 761 |
+
|
| 762 |
+
log(f" Parser says: POS={w_type}, lemma={w_lemma}, type={parser_result['type']}")
|
| 763 |
+
# --- End robust access ---
|
| 764 |
+
except Exception as e:
|
| 765 |
+
log(f" Parser failed: {e}")
|
| 766 |
+
|
| 767 |
+
return parser_result
|
| 768 |
+
|
| 769 |
+
def pattern_is_good_analysis(analysis, analysis_type):
|
| 770 |
+
"""Check if an analysis has meaningful data."""
|
| 771 |
+
if not analysis:
|
| 772 |
+
return False
|
| 773 |
+
|
| 774 |
+
if analysis_type == 'noun':
|
| 775 |
+
return len(analysis.get('declension', {})) >= 4
|
| 776 |
+
elif analysis_type == 'verb':
|
| 777 |
+
present = analysis.get('conjugation', {}).get('Präsens', {})
|
| 778 |
+
if len(present) < 4:
|
| 779 |
+
return False
|
| 780 |
+
unique_forms = set(present.values())
|
| 781 |
+
if len(unique_forms) < 2:
|
| 782 |
+
return False
|
| 783 |
+
return True
|
| 784 |
+
elif analysis_type == 'adjective':
|
| 785 |
+
return len(analysis.get('attributive', {})) > 0
|
| 786 |
+
return False
|
| 787 |
+
|
| 788 |
+
# --- Inflection Generators ---
|
| 789 |
+
def pattern_analyze_as_noun(word: str, hint_lemma: str = None) -> Dict[str, Any]:
|
| 790 |
+
"""Comprehensive noun inflection analysis."""
|
| 791 |
+
log(f" Analyzing as noun (hint_lemma={hint_lemma})")
|
| 792 |
+
analysis = {}
|
| 793 |
+
|
| 794 |
+
singular = singularize(word)
|
| 795 |
+
plural = pluralize(word)
|
| 796 |
+
log(f" singularize({word}) = {singular}")
|
| 797 |
+
log(f" pluralize({word}) = {plural}")
|
| 798 |
+
|
| 799 |
+
if plural != word and singular != word:
|
| 800 |
+
base = word
|
| 801 |
+
log(f" Word changes when pluralized => base = {base}")
|
| 802 |
+
elif singular != word:
|
| 803 |
+
base = singular
|
| 804 |
+
log(f" Word changes when singularized => base = {base}")
|
| 805 |
+
elif hint_lemma and hint_lemma != word:
|
| 806 |
+
base = hint_lemma
|
| 807 |
+
log(f" Using hint lemma => base = {base}")
|
| 808 |
+
else:
|
| 809 |
+
log(f" Cannot determine base form")
|
| 810 |
+
return None
|
| 811 |
+
|
| 812 |
+
g = gender(base, pos=NOUN)
|
| 813 |
+
log(f" gender({base}) = {g}")
|
| 814 |
+
|
| 815 |
+
if g is None:
|
| 816 |
+
g = MALE
|
| 817 |
+
log(f" Gender unknown, defaulting to MALE")
|
| 818 |
+
|
| 819 |
+
gender_str = {MALE: "Masculine", FEMALE: "Feminine", NEUTRAL: "Neuter"}.get(g, "Unknown")
|
| 820 |
+
|
| 821 |
+
analysis["base_form"] = base
|
| 822 |
+
analysis["gender"] = gender_str
|
| 823 |
+
analysis["plural"] = pluralize(base)
|
| 824 |
+
analysis["singular"] = base
|
| 825 |
+
analysis["declension"] = {}
|
| 826 |
+
|
| 827 |
+
for number, number_name in [(SINGULAR, "Singular"), (PLURAL, "Plural")]:
|
| 828 |
+
word_form = base if number == SINGULAR else pluralize(base)
|
| 829 |
+
# --- FIX: Capitalize noun form for presentation ---
|
| 830 |
+
word_form_cap = word_form.capitalize()
|
| 831 |
+
gender_for_article = g if number == SINGULAR else PLURAL
|
| 832 |
+
|
| 833 |
+
for case, case_name in [(NOMINATIVE, "Nominativ"), (ACCUSATIVE, "Akkusativ"),
|
| 834 |
+
(DATIVE, "Dativ"), (GENITIVE, "Genitiv")]:
|
| 835 |
+
try:
|
| 836 |
+
def_art = article(word_form, DEFINITE, gender_for_article, case)
|
| 837 |
+
indef_art = article(word_form, INDEFINITE, gender_for_article, case)
|
| 838 |
+
|
| 839 |
+
indef_form = f"{indef_art} {word_form_cap}" if indef_art else word_form_cap
|
| 840 |
+
|
| 841 |
+
# --- FIX: German has no plural indefinite article, set to '—' ---
|
| 842 |
+
if number == PLURAL:
|
| 843 |
+
indef_form = "—"
|
| 844 |
+
|
| 845 |
+
analysis["declension"][f"{case_name} {number_name}"] = {
|
| 846 |
+
"definite": f"{def_art} {word_form_cap}" if def_art else word_form_cap,
|
| 847 |
+
"indefinite": indef_form,
|
| 848 |
+
"bare": word_form_cap
|
| 849 |
+
}
|
| 850 |
+
except Exception as e:
|
| 851 |
+
log(f" Failed to get article for {case_name} {number_name}: {e}")
|
| 852 |
+
|
| 853 |
+
log(f" Generated {len(analysis.get('declension', {}))} declension forms")
|
| 854 |
+
return analysis
|
| 855 |
+
|
| 856 |
+
def pattern_analyze_as_verb(word: str, hint_lemma: str = None) -> Dict[str, Any]:
|
| 857 |
+
"""Comprehensive verb conjugation analysis."""
|
| 858 |
+
log(f" Analyzing as verb (hint_lemma={hint_lemma})")
|
| 859 |
+
|
| 860 |
+
verb_lemma = lemma(word)
|
| 861 |
+
log(f" lemma({word}) = {verb_lemma}")
|
| 862 |
+
|
| 863 |
+
if not verb_lemma or verb_lemma == word:
|
| 864 |
+
if hint_lemma and hint_lemma != word:
|
| 865 |
+
verb_lemma = hint_lemma
|
| 866 |
+
log(f" Using hint lemma: {verb_lemma}")
|
| 867 |
+
elif not verb_lemma:
|
| 868 |
+
log(f" No lemma found")
|
| 869 |
+
return None
|
| 870 |
+
|
| 871 |
+
analysis = {"infinitive": verb_lemma}
|
| 872 |
+
|
| 873 |
+
try:
|
| 874 |
+
lex = lexeme(verb_lemma)
|
| 875 |
+
if lex and len(lex) > 1:
|
| 876 |
+
analysis["lexeme"] = lex
|
| 877 |
+
log(f" lexeme has {len(lex)} forms")
|
| 878 |
+
except Exception as e:
|
| 879 |
+
log(f" Failed to get lexeme: {e}")
|
| 880 |
+
|
| 881 |
+
analysis["conjugation"] = {}
|
| 882 |
+
analysis["conjugation"]["Präsens"] = {}
|
| 883 |
+
|
| 884 |
+
present_count = 0
|
| 885 |
+
for alias, name in [("1sg", "ich"), ("2sg", "du"), ("3sg", "er/sie/es"),
|
| 886 |
+
("1pl", "wir"), ("2pl", "ihr"), ("3pl", "sie/Sie")]:
|
| 887 |
+
try:
|
| 888 |
+
form = conjugate(verb_lemma, alias)
|
| 889 |
+
if form:
|
| 890 |
+
analysis["conjugation"]["Präsens"][name] = form
|
| 891 |
+
present_count += 1
|
| 892 |
+
except Exception as e:
|
| 893 |
+
log(f" Failed conjugate({verb_lemma}, {alias}): {e}")
|
| 894 |
+
|
| 895 |
+
log(f" Generated {present_count} present tense forms")
|
| 896 |
+
if present_count < 4:
|
| 897 |
+
log(f" Too few present forms, not a valid verb")
|
| 898 |
+
return None
|
| 899 |
+
|
| 900 |
+
analysis["conjugation"]["Präteritum"] = {}
|
| 901 |
+
for alias, name in [("1sgp", "ich"), ("2sgp", "du"), ("3sgp", "er/sie/es"),
|
| 902 |
+
("1ppl", "wir"), ("2ppl", "ihr"), ("3ppl", "sie/Sie")]:
|
| 903 |
+
try:
|
| 904 |
+
form = conjugate(verb_lemma, alias)
|
| 905 |
+
if form: analysis["conjugation"]["Präteritum"][name] = form
|
| 906 |
+
except: pass
|
| 907 |
+
|
| 908 |
+
analysis["participles"] = {}
|
| 909 |
+
try:
|
| 910 |
+
form = conjugate(verb_lemma, "part")
|
| 911 |
+
if form: analysis["participles"]["Partizip Präsens"] = form
|
| 912 |
+
except: pass
|
| 913 |
+
try:
|
| 914 |
+
form = conjugate(verb_lemma, "ppart")
|
| 915 |
+
if form: analysis["participles"]["Partizip Perfekt"] = form
|
| 916 |
+
except: pass
|
| 917 |
+
|
| 918 |
+
analysis["conjugation"]["Imperativ"] = {}
|
| 919 |
+
for alias, name in [("2sg!", "du"), ("2pl!", "ihr")]:
|
| 920 |
+
try:
|
| 921 |
+
form = conjugate(verb_lemma, alias)
|
| 922 |
+
if form: analysis["conjugation"]["Imperativ"][name] = form
|
| 923 |
+
except: pass
|
| 924 |
+
|
| 925 |
+
analysis["conjugation"]["Konjunktiv I"] = {}
|
| 926 |
+
for alias, name in [("1sg?", "ich"), ("2sg?", "du"), ("3sg?", "er/sie/es"),
|
| 927 |
+
("1pl?", "wir"), ("2pl?", "ihr"), ("3pl?", "sie/Sie")]:
|
| 928 |
+
try:
|
| 929 |
+
form = conjugate(verb_lemma, alias)
|
| 930 |
+
if form: analysis["conjugation"]["Konjunktiv I"][name] = form
|
| 931 |
+
except: pass
|
| 932 |
+
|
| 933 |
+
analysis["conjugation"]["Konjunktiv II"] = {}
|
| 934 |
+
for alias, name in [("1sgp?", "ich"), ("2sgp?", "du"), ("3sgp?", "er/sie/es"),
|
| 935 |
+
("1ppl?", "wir"), ("2ppl?", "ihr"), ("3ppl?", "sie/Sie")]:
|
| 936 |
+
try:
|
| 937 |
+
form = conjugate(verb_lemma, alias)
|
| 938 |
+
if form: analysis["conjugation"]["Konjunktiv II"][name] = form
|
| 939 |
+
except: pass
|
| 940 |
+
|
| 941 |
+
return analysis
|
| 942 |
+
|
| 943 |
+
def pattern_analyze_as_adjective(word: str, hint_lemma: str = None) -> Dict[str, Any]:
|
| 944 |
+
"""Comprehensive adjective inflection analysis."""
|
| 945 |
+
log(f" Analyzing as adjective (hint_lemma={hint_lemma})")
|
| 946 |
+
|
| 947 |
+
base = predicative(word)
|
| 948 |
+
log(f" predicative({word}) = {base}")
|
| 949 |
+
|
| 950 |
+
if base == word.lower() and hint_lemma and hint_lemma != word:
|
| 951 |
+
base = hint_lemma
|
| 952 |
+
log(f" Using hint lemma: {base}")
|
| 953 |
+
|
| 954 |
+
analysis = {}
|
| 955 |
+
analysis["predicative"] = base
|
| 956 |
+
analysis["comparative"] = comparative(base)
|
| 957 |
+
analysis["superlative"] = superlative(base)
|
| 958 |
+
log(f" comparative = {analysis['comparative']}")
|
| 959 |
+
log(f" superlative = {analysis['superlative']}")
|
| 960 |
+
|
| 961 |
+
analysis["attributive"] = {}
|
| 962 |
+
attr_count = 0
|
| 963 |
+
for article_type, article_name in [(None, "Strong"), (INDEFINITE, "Mixed"), (DEFINITE, "Weak")]:
|
| 964 |
+
analysis["attributive"][article_name] = {}
|
| 965 |
+
for gender, gender_name in [(MALE, "Masculine"), (FEMALE, "Feminine"),
|
| 966 |
+
(NEUTRAL, "Neuter"), (PLURAL, "Plural")]:
|
| 967 |
+
analysis["attributive"][article_name][gender_name] = {}
|
| 968 |
+
for case, case_name in [(NOMINATIVE, "Nom"), (ACCUSATIVE, "Acc"),
|
| 969 |
+
(DATIVE, "Dat"), (GENITIVE, "Gen")]:
|
| 970 |
+
try:
|
| 971 |
+
attr_form = attributive(base, gender, case, article_type)
|
| 972 |
+
if article_type:
|
| 973 |
+
art = article("_", article_type, gender, case)
|
| 974 |
+
full_form = f"{art} {attr_form} [Noun]" if art else f"{attr_form} [Noun]"
|
| 975 |
+
else:
|
| 976 |
+
full_form = f"{attr_form} [Noun]"
|
| 977 |
+
analysis["attributive"][article_name][gender_name][case_name] = {
|
| 978 |
+
"form": attr_form, "example": full_form
|
| 979 |
+
}
|
| 980 |
+
attr_count += 1
|
| 981 |
+
except Exception as e:
|
| 982 |
+
log(f" Failed attributive for {article_name}/{gender_name}/{case_name}: {e}")
|
| 983 |
+
|
| 984 |
+
log(f" Generated {attr_count} attributive forms")
|
| 985 |
+
if attr_count == 0:
|
| 986 |
+
return None
|
| 987 |
+
return analysis
|
| 988 |
+
|
| 989 |
+
# --- Public API (Called by Gradio) ---
|
| 990 |
+
def pattern_get_all_inflections(word: str) -> Dict[str, Any]:
|
| 991 |
+
"""
|
| 992 |
+
Generates ALL possible inflections for a German word.
|
| 993 |
+
"""
|
| 994 |
+
if not PATTERN_DE_AVAILABLE:
|
| 995 |
+
return {"error": "`PatternLite` library not available."}
|
| 996 |
+
|
| 997 |
+
if not word or not word.strip():
|
| 998 |
+
return {"info": "Please enter a word."}
|
| 999 |
+
|
| 1000 |
+
word = word.strip()
|
| 1001 |
+
log("="*70); log(f"ANALYZING: {word}"); log("="*70)
|
| 1002 |
+
|
| 1003 |
+
detection = pattern_detect_word_type(word)
|
| 1004 |
+
results: Dict[str, Any] = {
|
| 1005 |
+
"input_word": word,
|
| 1006 |
+
"parser_hint": {
|
| 1007 |
+
"pos": detection['pos'],
|
| 1008 |
+
"lemma": detection['lemma'],
|
| 1009 |
+
"type": detection['type']
|
| 1010 |
+
},
|
| 1011 |
+
"analyses": {}
|
| 1012 |
+
}
|
| 1013 |
+
|
| 1014 |
+
try:
|
| 1015 |
+
detected_type = detection['type']
|
| 1016 |
+
|
| 1017 |
+
if detected_type == 'noun':
|
| 1018 |
+
log("\n--- NOUN DETECTED - Analyzing as noun ---")
|
| 1019 |
+
noun_analysis = pattern_analyze_as_noun(word, detection['lemma'])
|
| 1020 |
+
if noun_analysis and pattern_is_good_analysis(noun_analysis, 'noun'):
|
| 1021 |
+
log("✓ Noun analysis successful")
|
| 1022 |
+
results["analyses"]["noun"] = noun_analysis
|
| 1023 |
+
else: log("✗ Noun analysis failed")
|
| 1024 |
+
|
| 1025 |
+
elif detected_type == 'verb':
|
| 1026 |
+
log("\n--- VERB DETECTED - Analyzing as verb ---")
|
| 1027 |
+
verb_analysis = pattern_analyze_as_verb(word, detection['lemma'])
|
| 1028 |
+
if verb_analysis and pattern_is_good_analysis(verb_analysis, 'verb'):
|
| 1029 |
+
log("✓ Verb analysis successful")
|
| 1030 |
+
results["analyses"]["verb"] = verb_analysis
|
| 1031 |
+
else: log("✗ Verb analysis failed")
|
| 1032 |
+
|
| 1033 |
+
elif detected_type == 'adjective':
|
| 1034 |
+
log("\n--- ADJECTIVE DETECTED - Analyzing as adjective ---")
|
| 1035 |
+
adj_analysis = pattern_analyze_as_adjective(word, detection['lemma'])
|
| 1036 |
+
if adj_analysis and pattern_is_good_analysis(adj_analysis, 'adjective'):
|
| 1037 |
+
log("✓ Adjective analysis successful")
|
| 1038 |
+
results["analyses"]["adjective"] = adj_analysis
|
| 1039 |
+
else: log("✗ Adjective analysis failed")
|
| 1040 |
+
|
| 1041 |
+
else:
|
| 1042 |
+
log("\n--- TYPE UNKNOWN - Trying all analyses ---")
|
| 1043 |
+
noun_analysis = pattern_analyze_as_noun(word, detection['lemma'])
|
| 1044 |
+
if noun_analysis and pattern_is_good_analysis(noun_analysis, 'noun'):
|
| 1045 |
+
log("✓ Noun analysis is good")
|
| 1046 |
+
results["analyses"]["noun"] = noun_analysis
|
| 1047 |
+
|
| 1048 |
+
verb_analysis = pattern_analyze_as_verb(word, detection['lemma'])
|
| 1049 |
+
if verb_analysis and pattern_is_good_analysis(verb_analysis, 'verb'):
|
| 1050 |
+
log("✓ Verb analysis is good")
|
| 1051 |
+
results["analyses"]["verb"] = verb_analysis
|
| 1052 |
+
|
| 1053 |
+
adj_analysis = pattern_analyze_as_adjective(word, detection['lemma'])
|
| 1054 |
+
if adj_analysis and pattern_is_good_analysis(adj_analysis, 'adjective'):
|
| 1055 |
+
log("✓ Adjective analysis is good")
|
| 1056 |
+
results["analyses"]["adjective"] = adj_analysis
|
| 1057 |
+
|
| 1058 |
+
if not results["analyses"]:
|
| 1059 |
+
results["info"] = "Word could not be analyzed."
|
| 1060 |
+
|
| 1061 |
+
log(f"\nFinal result: {len(results['analyses'])} analysis/analyses")
|
| 1062 |
+
return results
|
| 1063 |
+
|
| 1064 |
+
except Exception as e:
|
| 1065 |
+
log(f"\nERROR: {e}")
|
| 1066 |
+
traceback.print_exc()
|
| 1067 |
+
return {"error": f"An unexpected error occurred: {str(e)}"}
|
| 1068 |
+
|
| 1069 |
+
# ============================================================================
|
| 1070 |
+
# 7. NEW: COMBINED ANALYZER LOGIC
|
| 1071 |
+
# ============================================================================
|
| 1072 |
+
|
| 1073 |
+
def comprehensive_german_analysis(text: str) -> Dict[str, Any]:
|
| 1074 |
+
"""
|
| 1075 |
+
Combines all available tools for a deep analysis of German text.
|
| 1076 |
+
"""
|
| 1077 |
+
if not text or not text.strip():
|
| 1078 |
+
return {"info": "Please enter text to analyze."}
|
| 1079 |
+
|
| 1080 |
+
print(f"\n[Comprehensive Analysis] Starting analysis for: \"{text}\"")
|
| 1081 |
+
results: Dict[str, Any] = {"input_text": text}
|
| 1082 |
+
|
| 1083 |
+
# --- 1. LanguageTool Grammar Check ---
|
| 1084 |
+
print("[Comprehensive Analysis] Running LanguageTool...")
|
| 1085 |
+
if LT_AVAILABLE:
|
| 1086 |
+
try:
|
| 1087 |
+
results["grammar_check"] = lt_check_grammar(text)
|
| 1088 |
+
except Exception as e:
|
| 1089 |
+
results["grammar_check"] = {"error": f"LanguageTool failed: {e}"}
|
| 1090 |
+
else:
|
| 1091 |
+
results["grammar_check"] = {"error": "LanguageTool not available."}
|
| 1092 |
+
|
| 1093 |
+
# --- 2. spaCy Morpho-Syntactic Backbone ---
|
| 1094 |
+
print("[Comprehensive Analysis] Running spaCy...")
|
| 1095 |
+
spacy_json_output = []
|
| 1096 |
+
# --- Always call spacy_get_analysis to handle lazy-loading ---
|
| 1097 |
+
try:
|
| 1098 |
+
_, spacy_json, _, _, _ = spacy_get_analysis("en", "de", text)
|
| 1099 |
+
if isinstance(spacy_json, list):
|
| 1100 |
+
spacy_json_output = spacy_json
|
| 1101 |
+
results["spacy_analysis"] = spacy_json_output
|
| 1102 |
+
else:
|
| 1103 |
+
results["spacy_analysis"] = spacy_json # Could be an error dict
|
| 1104 |
+
except Exception as e:
|
| 1105 |
+
results["spacy_analysis"] = {"error": f"spaCy analysis failed: {e}"}
|
| 1106 |
+
# --- End lazy-load call ---
|
| 1107 |
+
|
| 1108 |
+
# --- 2b. Heuristic SVA check if LT missed it ---
|
| 1109 |
+
try:
|
| 1110 |
+
if isinstance(results.get("grammar_check"), list) and any(d.get("status") == "perfect" for d in results["grammar_check"]):
|
| 1111 |
+
# Find subject and finite verb
|
| 1112 |
+
subj_num = None
|
| 1113 |
+
verb_num = None
|
| 1114 |
+
verb_token = None
|
| 1115 |
+
subj_token = None
|
| 1116 |
+
for tok in spacy_json_output:
|
| 1117 |
+
if tok.get("dependency") in {"sb", "nsubj"}:
|
| 1118 |
+
m = tok.get("morphology","")
|
| 1119 |
+
if "Number=Sing" in m:
|
| 1120 |
+
subj_num = "Sing"
|
| 1121 |
+
subj_token = tok
|
| 1122 |
+
|
| 1123 |
+
spacy_pos_up = (tok.get("pos") or "").upper()
|
| 1124 |
+
if (spacy_pos_up in {"VERB", "AUX"}) and ("VerbForm=Fin" in tok.get("morphology","")):
|
| 1125 |
+
verb_token = tok
|
| 1126 |
+
m = tok.get("morphology","")
|
| 1127 |
+
if "Number=Plur" in m:
|
| 1128 |
+
verb_num = "Plur"
|
| 1129 |
+
|
| 1130 |
+
if subj_num == "Sing" and verb_num == "Plur":
|
| 1131 |
+
# --- Build suggestions ---
|
| 1132 |
+
corrected_sentence_sg = None
|
| 1133 |
+
corrected_sentence_pl = None
|
| 1134 |
+
replacements = []
|
| 1135 |
+
|
| 1136 |
+
v_lemma = verb_token.get("lemma")
|
| 1137 |
+
v_word = verb_token.get("word")
|
| 1138 |
+
v_3sg = _conjugate_to_person_number(v_lemma, "3", "sg") if v_lemma else None
|
| 1139 |
+
if v_3sg and v_word:
|
| 1140 |
+
corrected_sentence_sg = text.replace(v_word, v_3sg, 1)
|
| 1141 |
+
replacements.append(corrected_sentence_sg)
|
| 1142 |
+
|
| 1143 |
+
subj_word = subj_token.get("word") if subj_token else None
|
| 1144 |
+
subj_pl = None
|
| 1145 |
+
if subj_word and PATTERN_DE_AVAILABLE:
|
| 1146 |
+
try:
|
| 1147 |
+
subj_pl = pluralize(subj_word)
|
| 1148 |
+
except Exception:
|
| 1149 |
+
subj_pl = None
|
| 1150 |
+
|
| 1151 |
+
if subj_word and subj_pl and subj_pl != subj_word:
|
| 1152 |
+
corrected_sentence_pl = text.replace(subj_word, subj_pl, 1)
|
| 1153 |
+
replacements.append(corrected_sentence_pl)
|
| 1154 |
+
# --- End build suggestions ---
|
| 1155 |
+
|
| 1156 |
+
sva = {
|
| 1157 |
+
"message": "Möglicher Kongruenzfehler: Singular-Subjekt mit pluralischer Verbform.",
|
| 1158 |
+
"rule_id": "HEURISTIC_SUBJ_VERB_AGREEMENT",
|
| 1159 |
+
"category": "Grammar",
|
| 1160 |
+
"incorrect_text": f"{verb_token.get('word')}" if verb_token else "",
|
| 1161 |
+
"replacements": replacements,
|
| 1162 |
+
"offset": None,
|
| 1163 |
+
"length": None,
|
| 1164 |
+
"context": None,
|
| 1165 |
+
"short_message": "Subjekt–Verb-Kongruenz"
|
| 1166 |
+
}
|
| 1167 |
+
results["grammar_check"] = [sva]
|
| 1168 |
+
except Exception as e:
|
| 1169 |
+
print(f"SVA Heuristic failed: {e}") # Don't crash, just log
|
| 1170 |
+
pass
|
| 1171 |
+
|
| 1172 |
+
# --- 3. Token-by-Token Deep Dive (Pattern & OdeNet) ---
|
| 1173 |
+
print("[Comprehensive Analysis] Running Token Deep Dive...")
|
| 1174 |
+
FUNCTION_POS = {"DET","ADP","AUX","PUNCT","SCONJ","CCONJ","PART","PRON","NUM","SYM","X"}
|
| 1175 |
+
deep_dive = []
|
| 1176 |
+
|
| 1177 |
+
if not spacy_json_output:
|
| 1178 |
+
print("[Comprehensive Analysis] No spaCy tokens to analyze. Skipping deep dive.")
|
| 1179 |
+
else:
|
| 1180 |
+
for token in spacy_json_output:
|
| 1181 |
+
word = token.get("word")
|
| 1182 |
+
lemma = token.get("lemma")
|
| 1183 |
+
pos = (token.get("pos") or "").upper()
|
| 1184 |
+
if not word:
|
| 1185 |
+
continue
|
| 1186 |
+
|
| 1187 |
+
print(f"[Deep Dive] Analyzing token: '{word}' (Lemma: '{lemma}')")
|
| 1188 |
+
token_analysis = {"word": word, "spacy_pos": pos, "spacy_lemma": lemma}
|
| 1189 |
+
|
| 1190 |
+
skip_for_pattern = pos in FUNCTION_POS
|
| 1191 |
+
skip_for_wn = pos in FUNCTION_POS or lemma in {None, "", "--"}
|
| 1192 |
+
|
| 1193 |
+
# 3a. Pattern.de Inflection Analysis
|
| 1194 |
+
if PATTERN_DE_AVAILABLE and not skip_for_pattern:
|
| 1195 |
+
try:
|
| 1196 |
+
# --- FIX: Use spaCy POS to guide Pattern analysis ---
|
| 1197 |
+
desired_type = None
|
| 1198 |
+
if pos in {"VERB","AUX"}: desired_type = "verb"
|
| 1199 |
+
elif pos in {"ADJ","ADV"}: desired_type = "adjective"
|
| 1200 |
+
elif pos in {"NOUN","PROPN"}: desired_type = "noun"
|
| 1201 |
+
|
| 1202 |
+
pattern_word = lemma if desired_type in {"verb","adjective"} and lemma and lemma != "--" else word
|
| 1203 |
+
|
| 1204 |
+
pattern_info = pattern_get_all_inflections(pattern_word)
|
| 1205 |
+
token_analysis["pattern_hint"] = pattern_info.get("parser_hint", {})
|
| 1206 |
+
|
| 1207 |
+
analyses = pattern_info.get("analyses", {})
|
| 1208 |
+
if desired_type and desired_type in analyses:
|
| 1209 |
+
# Filter to only the analysis that matches spaCy's POS
|
| 1210 |
+
token_analysis["pattern_analyses"] = { desired_type: analyses[desired_type] }
|
| 1211 |
+
else:
|
| 1212 |
+
# Otherwise, show all (or none if empty)
|
| 1213 |
+
token_analysis["pattern_analyses"] = analyses
|
| 1214 |
+
# --- End Fix ---
|
| 1215 |
+
|
| 1216 |
+
except Exception as e:
|
| 1217 |
+
token_analysis["pattern_analyses"] = {"error": f"Pattern.de failed: {e}"}
|
| 1218 |
+
else:
|
| 1219 |
+
token_analysis["pattern_analyses"] = {"info": "Skipped (function word or non-content POS)"}
|
| 1220 |
+
|
| 1221 |
+
# 3b. OdeNet Thesaurus (using the lemma)
|
| 1222 |
+
lookup_word = lemma if lemma and lemma != "--" else word # Use lemma, but fallback to word
|
| 1223 |
+
if WN_AVAILABLE and not skip_for_wn:
|
| 1224 |
+
try:
|
| 1225 |
+
thesaurus_info = odenet_get_thesaurus_info(lookup_word)
|
| 1226 |
+
token_analysis["thesaurus_senses"] = thesaurus_info.get("senses", [])
|
| 1227 |
+
if not token_analysis["thesaurus_senses"]:
|
| 1228 |
+
token_analysis["thesaurus_info"] = thesaurus_info.get("info", "No senses found.")
|
| 1229 |
+
except Exception as e:
|
| 1230 |
+
token_analysis["thesaurus_senses"] = {"error": f"OdeNet failed: {e}"}
|
| 1231 |
+
else:
|
| 1232 |
+
token_analysis["thesaurus_senses"] = []
|
| 1233 |
+
token_analysis["thesaurus_info"] = "Skipped (function word or missing lemma)."
|
| 1234 |
+
|
| 1235 |
+
deep_dive.append(token_analysis)
|
| 1236 |
+
|
| 1237 |
+
results["token_deep_dive"] = deep_dive
|
| 1238 |
+
print("[Comprehensive Analysis] Analysis complete.")
|
| 1239 |
+
return results
|
| 1240 |
+
|
| 1241 |
+
# ============================================================================
|
| 1242 |
+
# 8. GRADIO UI CREATION
|
| 1243 |
+
# ============================================================================
|
| 1244 |
+
|
| 1245 |
+
def create_spacy_tab():
|
| 1246 |
+
"""Creates the UI for the spaCy tab."""
|
| 1247 |
+
config = SPACY_UI_TEXT["en"]
|
| 1248 |
+
model_choices = list(SPACY_MODEL_INFO.keys())
|
| 1249 |
+
|
| 1250 |
+
with gr.Row():
|
| 1251 |
+
ui_lang_radio = gr.Radio(["DE", "EN", "ES"], label=config["ui_lang_label"], value="EN")
|
| 1252 |
+
model_lang_radio = gr.Radio(
|
| 1253 |
+
choices=[(SPACY_MODEL_INFO[k][0], k) for k in model_choices],
|
| 1254 |
+
label=config["model_lang_label"],
|
| 1255 |
+
value=model_choices[0]
|
| 1256 |
+
)
|
| 1257 |
+
|
| 1258 |
+
markdown_title = gr.Markdown(config["title"])
|
| 1259 |
+
markdown_subtitle = gr.Markdown(config["subtitle"])
|
| 1260 |
+
text_input = gr.Textbox(label=config["input_label"], placeholder=config["input_placeholder"], lines=5)
|
| 1261 |
+
analyze_button = gr.Button(config["button_text"], variant="primary")
|
| 1262 |
+
|
| 1263 |
+
with gr.Tabs():
|
| 1264 |
+
with gr.Tab(config["tab_graphic"]) as tab_graphic:
|
| 1265 |
+
html_dep_out = gr.HTML(label=config["html_label"])
|
| 1266 |
+
with gr.Tab(config["tab_ner"]) as tab_ner:
|
| 1267 |
+
html_ner_out = gr.HTML(label=config["ner_label"])
|
| 1268 |
+
with gr.Tab(config["tab_table"]) as tab_table:
|
| 1269 |
+
df_out = gr.DataFrame(label=config["table_label"], headers=config["table_headers"], interactive=False)
|
| 1270 |
+
with gr.Tab(config["tab_json"]) as tab_json:
|
| 1271 |
+
json_out = gr.JSON(label=config["json_label"])
|
| 1272 |
+
|
| 1273 |
+
analyze_button.click(fn=spacy_get_analysis,
|
| 1274 |
+
inputs=[ui_lang_radio, model_lang_radio, text_input],
|
| 1275 |
+
outputs=[df_out, json_out, html_dep_out, html_ner_out, analyze_button],
|
| 1276 |
+
api_name="get_morphology") # Preserved original API name
|
| 1277 |
+
|
| 1278 |
+
ui_lang_radio.change(fn=spacy_update_ui,
|
| 1279 |
+
inputs=ui_lang_radio,
|
| 1280 |
+
outputs=[markdown_title, markdown_subtitle, ui_lang_radio, model_lang_radio,
|
| 1281 |
+
text_input, analyze_button, tab_graphic, tab_table, tab_json, tab_ner,
|
| 1282 |
+
html_dep_out, df_out, json_out, html_ner_out])
|
| 1283 |
+
|
| 1284 |
+
def create_languagetool_tab():
|
| 1285 |
+
"""Creates the UI for the LanguageTool tab."""
|
| 1286 |
+
gr.Markdown("# 🇩🇪 German Grammar & Spelling Checker")
|
| 1287 |
+
gr.Markdown("Powered by `language-tool-python`. This service checks German text for grammatical errors and spelling mistakes.")
|
| 1288 |
+
|
| 1289 |
+
with gr.Column():
|
| 1290 |
+
text_input = gr.Textbox(
|
| 1291 |
+
label="German Text to Check",
|
| 1292 |
+
placeholder="e.g., Ich sehe dem Mann. Das ist ein Huas.",
|
| 1293 |
+
lines=5
|
| 1294 |
+
)
|
| 1295 |
+
check_button = gr.Button("Check Text", variant="primary")
|
| 1296 |
+
|
| 1297 |
+
output = gr.JSON(label="Detected Errors (JSON)")
|
| 1298 |
+
|
| 1299 |
+
check_button.click(
|
| 1300 |
+
fn=lt_check_grammar,
|
| 1301 |
+
inputs=[text_input],
|
| 1302 |
+
outputs=[output],
|
| 1303 |
+
api_name="check_grammar" # Preserved original API name
|
| 1304 |
+
)
|
| 1305 |
+
|
| 1306 |
+
gr.Examples(
|
| 1307 |
+
[["Das ist ein Huas."], ["Ich sehe dem Mann."],
|
| 1308 |
+
["Die Katze schlafen auf dem Tisch."], ["Er fragt ob er gehen kann."]],
|
| 1309 |
+
inputs=[text_input], outputs=[output], fn=lt_check_grammar
|
| 1310 |
+
)
|
| 1311 |
+
|
| 1312 |
+
def create_odenet_tab():
|
| 1313 |
+
"""Creates the UI for the OdeNet tab."""
|
| 1314 |
+
gr.Markdown("# 🇩🇪 German Thesaurus (WordNet) Service")
|
| 1315 |
+
gr.Markdown("Powered by `wn` and `OdeNet (odenet:1.4)`. Finds synonyms, antonyms, and other semantic relations for German words.")
|
| 1316 |
+
|
| 1317 |
+
with gr.Column():
|
| 1318 |
+
word_input = gr.Textbox(
|
| 1319 |
+
label="German Word",
|
| 1320 |
+
placeholder="e.g., Haus, schnell, gut, Katze"
|
| 1321 |
+
)
|
| 1322 |
+
check_button = gr.Button("Find Relations", variant="primary")
|
| 1323 |
+
|
| 1324 |
+
output = gr.JSON(label="Thesaurus Information (JSON)")
|
| 1325 |
+
|
| 1326 |
+
check_button.click(
|
| 1327 |
+
fn=odenet_get_thesaurus_info,
|
| 1328 |
+
inputs=[word_input],
|
| 1329 |
+
outputs=[output],
|
| 1330 |
+
api_name="get_thesaurus" # Preserved original API name
|
| 1331 |
+
)
|
| 1332 |
+
|
| 1333 |
+
gr.Examples(
|
| 1334 |
+
[["Hund"], ["gut"], ["laufen"], ["Haus"], ["schnell"]],
|
| 1335 |
+
inputs=[word_input], outputs=[output], fn=odenet_get_thesaurus_info
|
| 1336 |
+
)
|
| 1337 |
+
|
| 1338 |
+
def create_pattern_tab():
|
| 1339 |
+
"""Creates the UI for the Pattern.de tab."""
|
| 1340 |
+
gr.Markdown("# 🇩🇪 Complete German Word Inflection System")
|
| 1341 |
+
gr.Markdown("Powered by `PatternLite`. Generates complete inflection tables (declension, conjugation) for German words.")
|
| 1342 |
+
|
| 1343 |
+
with gr.Column():
|
| 1344 |
+
word_input = gr.Textbox(
|
| 1345 |
+
label="German Word",
|
| 1346 |
+
placeholder="z.B. Haus, gehen, schön, besser, lief"
|
| 1347 |
+
)
|
| 1348 |
+
generate_button = gr.Button("Generate All Forms", variant="primary")
|
| 1349 |
+
|
| 1350 |
+
output = gr.JSON(label="Complete Inflection Analysis")
|
| 1351 |
+
|
| 1352 |
+
generate_button.click(
|
| 1353 |
+
fn=pattern_get_all_inflections,
|
| 1354 |
+
inputs=[word_input],
|
| 1355 |
+
outputs=[output],
|
| 1356 |
+
api_name="get_all_inflections" # Preserved original API name
|
| 1357 |
+
)
|
| 1358 |
+
|
| 1359 |
+
gr.Examples(
|
| 1360 |
+
[["Haus"], ["gehen"], ["schön"], ["besser"], ["ging"], ["schnellem"], ["Katze"]],
|
| 1361 |
+
inputs=[word_input], outputs=[output], fn=pattern_get_all_inflections
|
| 1362 |
+
)
|
| 1363 |
+
|
| 1364 |
+
def create_combined_tab():
|
| 1365 |
+
"""Creates the UI for the new Comprehensive Analyzer tab."""
|
| 1366 |
+
gr.Markdown("# 🇩🇪 Comprehensive German Text Analyzer")
|
| 1367 |
+
gr.Markdown("This tool combines all available libraries (spaCy, LanguageTool, Pattern, OdeNet) to give a deep analysis of a German text. Results are in JSON format.")
|
| 1368 |
+
with gr.Column():
|
| 1369 |
+
text_input = gr.Textbox(
|
| 1370 |
+
label="German Text",
|
| 1371 |
+
placeholder="e.g., Die schnelle Katze springt über den faulen Hund.",
|
| 1372 |
+
lines=5
|
| 1373 |
+
)
|
| 1374 |
+
analyze_button = gr.Button("Run Comprehensive Analysis", variant="primary")
|
| 1375 |
+
|
| 1376 |
+
output = gr.JSON(label="Comprehensive Analysis (JSON)")
|
| 1377 |
+
|
| 1378 |
+
analyze_button.click(
|
| 1379 |
+
fn=comprehensive_german_analysis,
|
| 1380 |
+
inputs=[text_input],
|
| 1381 |
+
outputs=[output],
|
| 1382 |
+
api_name="comprehensive_analysis" # New API endpoint
|
| 1383 |
+
)
|
| 1384 |
+
|
| 1385 |
+
gr.Examples(
|
| 1386 |
+
[["Die Katze schlafen auf dem Tisch."], ["Das ist ein Huas."], ["Ich laufe schnell."]],
|
| 1387 |
+
inputs=[text_input], outputs=[output], fn=comprehensive_german_analysis
|
| 1388 |
+
)
|
| 1389 |
+
|
| 1390 |
+
# --- Main UI Builder ---
|
| 1391 |
+
def create_consolidated_interface():
|
| 1392 |
+
"""Builds the final Gradio app with all tabs."""
|
| 1393 |
+
with gr.Blocks(title="Consolidated Linguistics Hub", theme=gr.themes.Soft()) as demo:
|
| 1394 |
+
gr.Markdown("# 🏛️ Consolidated Linguistics Hub")
|
| 1395 |
+
gr.Markdown("One interface for all linguistic tools: spaCy, LanguageTool, Pattern.de, and OdeNet.")
|
| 1396 |
+
|
| 1397 |
+
with gr.Tabs():
|
| 1398 |
+
# New Combined Tab first
|
| 1399 |
+
with gr.Tab("🚀 Comprehensive Analyzer (DE)"):
|
| 1400 |
+
create_combined_tab()
|
| 1401 |
+
|
| 1402 |
+
# Original Tabs
|
| 1403 |
+
with gr.Tab("🔬 spaCy Analyzer (Multi-lingual)"):
|
| 1404 |
+
create_spacy_tab()
|
| 1405 |
+
|
| 1406 |
+
with gr.Tab("✅ Grammar Check (DE)"):
|
| 1407 |
+
create_languagetool_tab()
|
| 1408 |
+
|
| 1409 |
+
with gr.Tab("📚 Inflections (DE)"):
|
| 1410 |
+
create_pattern_tab()
|
| 1411 |
+
|
| 1412 |
+
with gr.Tab("📖 Thesaurus (DE)"):
|
| 1413 |
+
create_odenet_tab()
|
| 1414 |
+
|
| 1415 |
+
return demo
|
| 1416 |
+
|
| 1417 |
+
# ============================================================================
|
| 1418 |
+
# 9. MAIN EXECUTION BLOCK
|
| 1419 |
+
# ============================================================================
|
| 1420 |
+
|
| 1421 |
+
if __name__ == "__main__":
|
| 1422 |
+
print("\n" + "="*70)
|
| 1423 |
+
print("CONSOLIDATED LINGUISTICS HUB (STARTING)")
|
| 1424 |
+
print("="*70 + "\n")
|
| 1425 |
+
|
| 1426 |
+
# --- 1. Initialize spaCy Models ---
|
| 1427 |
+
print("--- Initializing spaCy Models ---")
|
| 1428 |
+
spacy_initialize_models()
|
| 1429 |
+
print("--- spaCy Done ---\n")
|
| 1430 |
+
|
| 1431 |
+
# --- 2. Initialize OdeNet Worker ---
|
| 1432 |
+
print("--- Initializing OdeNet Worker ---")
|
| 1433 |
+
if WN_AVAILABLE:
|
| 1434 |
+
try:
|
| 1435 |
+
odenet_start_worker()
|
| 1436 |
+
print("✓ OdeNet worker is starting/ready.")
|
| 1437 |
+
except Exception as e:
|
| 1438 |
+
print(f"✗ FAILED to start OdeNet worker: {e}")
|
| 1439 |
+
print(" The 'Thesaurus' and 'Comprehensive' tabs may fail.")
|
| 1440 |
+
else:
|
| 1441 |
+
print("INFO: OdeNet ('wn') library not available, skipping worker.")
|
| 1442 |
+
print("--- OdeNet Done ---\n")
|
| 1443 |
+
|
| 1444 |
+
# --- 3. Check LanguageTool ---
|
| 1445 |
+
print("--- Checking LanguageTool ---")
|
| 1446 |
+
if not LT_AVAILABLE:
|
| 1447 |
+
print("WARNING: language-tool-python not available. 'Grammar' tab will fail.")
|
| 1448 |
+
else:
|
| 1449 |
+
print("✓ LanguageTool library is available (will lazy-load on first use).")
|
| 1450 |
+
print("--- LanguageTool Done ---\n")
|
| 1451 |
+
|
| 1452 |
+
# --- 4. Check Pattern.de ---
|
| 1453 |
+
print("--- Checking Pattern.de ---")
|
| 1454 |
+
if not PATTERN_DE_AVAILABLE:
|
| 1455 |
+
print("WARNING: pattern.de library not available. 'Inflections' tab will fail.")
|
| 1456 |
+
else:
|
| 1457 |
+
print("✓ Pattern.de library is available.")
|
| 1458 |
+
print("--- Pattern.de Done ---\n")
|
| 1459 |
+
|
| 1460 |
+
print("="*70)
|
| 1461 |
+
print("All services initialized. Launching Gradio Hub...")
|
| 1462 |
+
print("="*70 + "\n")
|
| 1463 |
+
|
| 1464 |
+
# --- 5. Launch Gradio ---
|
| 1465 |
+
demo = create_consolidated_interface()
|
| 1466 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|