cstr commited on
Commit
ed93d76
·
verified ·
1 Parent(s): 5c5cf64

update intro, singleton conceptnet client

Browse files
Files changed (1) hide show
  1. app.py +92 -16
app.py CHANGED
@@ -1,25 +1,62 @@
1
  # ============================================================================
2
- # GERMAN LINGUISTICS HUB (CONSOLIDATED APP V3)
3
  #
4
  # This script combines multiple NLP tools into a single Gradio interface.
5
  #
 
6
  # TABS & FUNCTIONALITY:
7
- # 1. Comprehensive Analyzer (DE):
8
- # - CONTEXTUAL analysis of full sentences.
9
- # - Ranks all semantics by relevance to the sentence.
10
- # 2. Word Encyclopedia (DE): (NEW!)
 
11
  # - NON-CONTEXTUAL analysis of single words.
12
- # - Finds ALL grammatical (Pattern) and semantic (OdeNet, ConceptNet)
13
- # possibilities, cross-validated and grouped by Part-of-Speech.
14
- # - Ideal for enriching word lists.
15
- # 3. spaCy Analyzer (Multi-lingual): Direct spaCy output.
16
- # 4. Grammar Check (DE): LanguageTool.
17
- # 5. Inflections (DE): Direct Pattern.de output.
18
- # 6. Thesaurus (DE): Direct OdeNet output.
19
- # 7. ConceptNet (Direct): Direct ConceptNet API output.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # ============================================================================
21
 
22
-
23
  # ============================================================================
24
  # 1. CONSOLIDATED IMPORTS
25
  # ============================================================================
@@ -188,6 +225,9 @@ WIKTIONARY_AVAILABLE = False
188
  CONCEPTNET_CACHE: Dict[Tuple[str, str], Any] = {}
189
  CONCEPTNET_LOCK = threading.Lock()
190
 
 
 
 
191
  # --- HanTa Tagger Cache & Lock ---
192
  HANTA_TAGGER_INSTANCE: Optional[HanoverTagger] = None
193
  HANTA_TAGGER_LOCK = threading.Lock()
@@ -1320,6 +1360,29 @@ def word_appears_in_inflections(word: str, inflections: Dict[str, Any], pos_type
1320
  # ============================================================================
1321
  # 6b. CONCEPTNET HELPER LOGIC (V2 - ROBUST PARSER)
1322
  # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1323
  def conceptnet_get_relations(word: str, language: str = 'de') -> Dict[str, Any]:
1324
  """
1325
  Fetches relations from the cstr/conceptnet_normalized Gradio API.
@@ -1346,7 +1409,9 @@ def conceptnet_get_relations(word: str, language: str = 'de') -> Dict[str, Any]:
1346
 
1347
  try:
1348
  # --- 2. Call Gradio API ---
1349
- client = Client("cstr/conceptnet_normalized")
 
 
1350
 
1351
  selected_relations = [
1352
  "RelatedTo", "IsA", "PartOf", "HasA", "UsedFor",
@@ -3521,10 +3586,21 @@ if __name__ == "__main__":
3521
  print("✓ Requests library is available.")
3522
  print("--- Requests Done ---\n")
3523
 
 
 
 
 
 
 
 
 
 
 
 
3524
  print("="*70)
3525
  print("All services initialized. Launching Gradio Hub...")
3526
  print("="*70 + "\n")
3527
 
3528
- # --- 8. Launch Gradio ---
3529
  demo = create_consolidated_interface()
3530
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
1
  # ============================================================================
2
+ # GERMAN LINGUISTICS HUB (CONSOLIDATED APP V22)
3
  #
4
  # This script combines multiple NLP tools into a single Gradio interface.
5
  #
6
+ # ============================================================================
7
  # TABS & FUNCTIONALITY:
8
+ # ============================================================================
9
+ #
10
+ # --- PRIMARY TABS ---
11
+ #
12
+ # 1. Word Encyclopedia (DE):
13
  # - NON-CONTEXTUAL analysis of single words.
14
+ # - Multi-engine dispatcher with user selection and automatic fallback:
15
+ # (Wiktionary -> DWDSmor -> HanTa -> IWNLP)
16
+ # - Aggregates all grammatical (Wiktionary, Pattern) and semantic
17
+ # (Wiktionary, OdeNet, ConceptNet) possibilities, grouped by Part-of-Speech.
18
+ # - Validates and filters artifacts (e.g., "abgeschnitten", "lauf").
19
+ #
20
+ # 2. Comprehensive Analyzer (DE):
21
+ # - CONTEXTUAL analysis of full sentences.
22
+ # - Uses the Word Encyclopedia's dispatcher for robust lemma analysis.
23
+ # - Ranks all semantic senses (Wiktionary, OdeNet) by relevance to the sentence.
24
+ #
25
+ # --- STANDALONE TOOL TABS ---
26
+ #
27
+ # 3. spaCy Analyzer (Multi-lingual):
28
+ # - Direct, raw spaCy output (NER, POS, dependencies) for multiple languages.
29
+ #
30
+ # 4. Grammar Check (DE):
31
+ # - Direct LanguageTool output.
32
+ #
33
+ # --- RAW ENGINE TABS (for debugging & comparison) ---
34
+ #
35
+ # 5. Engine: Wiktionary (DE):
36
+ # - Standalone access to the Wiktionary DB (Primary) engine.
37
+ #
38
+ # 6. Engine: DWDSmor (DE):
39
+ # - Standalone access to the DWDSmor (Fallback 1) engine.
40
+ #
41
+ # 7. Engine: HanTa (DE):
42
+ # - Standalone access to the HanTa (Fallback 2) engine.
43
+ #
44
+ # 8. Engine: IWNLP-spaCy (DE):
45
+ # - Standalone access to the IWNLP-spaCy (Fallback 3) engine.
46
+ #
47
+ # --- RAW COMPONENT TABS (for debugging & comparison) ---
48
+ #
49
+ # 9. Component: Inflections (DE):
50
+ # - Direct access to the `pattern.de` library.
51
+ #
52
+ # 10. Component: Thesaurus (DE):
53
+ # - Direct access to the `OdeNet` library.
54
+ #
55
+ # 11. Component: ConceptNet (Direct):
56
+ # - Direct access to the ConceptNet API.
57
+ #
58
  # ============================================================================
59
 
 
60
  # ============================================================================
61
  # 1. CONSOLIDATED IMPORTS
62
  # ============================================================================
 
225
  CONCEPTNET_CACHE: Dict[Tuple[str, str], Any] = {}
226
  CONCEPTNET_LOCK = threading.Lock()
227
 
228
+ CONCEPTNET_CLIENT: Optional[Client] = None
229
+ CONCEPTNET_CLIENT_LOCK = threading.Lock()
230
+
231
  # --- HanTa Tagger Cache & Lock ---
232
  HANTA_TAGGER_INSTANCE: Optional[HanoverTagger] = None
233
  HANTA_TAGGER_LOCK = threading.Lock()
 
1360
  # ============================================================================
1361
  # 6b. CONCEPTNET HELPER LOGIC (V2 - ROBUST PARSER)
1362
  # ============================================================================
1363
+ def get_conceptnet_client() -> Optional[Client]:
1364
+ """ Thread-safe function to get a single instance of the Gradio Client. """
1365
+ global CONCEPTNET_CLIENT
1366
+ if not GRADIO_CLIENT_AVAILABLE:
1367
+ return None
1368
+
1369
+ if CONCEPTNET_CLIENT:
1370
+ return CONCEPTNET_CLIENT
1371
+
1372
+ with CONCEPTNET_CLIENT_LOCK:
1373
+ if CONCEPTNET_CLIENT:
1374
+ return CONCEPTNET_CLIENT
1375
+ try:
1376
+ print("Initializing Gradio Client for ConceptNet...")
1377
+ client = Client("cstr/conceptnet_normalized")
1378
+ print("✓ Gradio Client for ConceptNet initialized.")
1379
+ CONCEPTNET_CLIENT = client
1380
+ return CONCEPTNET_CLIENT
1381
+ except Exception as e:
1382
+ print(f"✗ CRITICAL: Failed to initialize ConceptNet Gradio Client: {e}")
1383
+ traceback.print_exc()
1384
+ return None
1385
+
1386
  def conceptnet_get_relations(word: str, language: str = 'de') -> Dict[str, Any]:
1387
  """
1388
  Fetches relations from the cstr/conceptnet_normalized Gradio API.
 
1409
 
1410
  try:
1411
  # --- 2. Call Gradio API ---
1412
+ client = get_conceptnet_client() # <-- USE HELPER
1413
+ if not client:
1414
+ return {"error": "ConceptNet Gradio Client is not available."}
1415
 
1416
  selected_relations = [
1417
  "RelatedTo", "IsA", "PartOf", "HasA", "UsedFor",
 
3586
  print("✓ Requests library is available.")
3587
  print("--- Requests Done ---\n")
3588
 
3589
+ # --- 8. Initialize ConceptNet Client ---
3590
+ print("--- Initializing ConceptNet Client ---")
3591
+ if GRADIO_CLIENT_AVAILABLE:
3592
+ try:
3593
+ get_conceptnet_client() # Call the function to load the client
3594
+ except Exception as e:
3595
+ print(f"✗ FAILED to start ConceptNet Client: {e}")
3596
+ else:
3597
+ print("INFO: gradio_client not available, skipping ConceptNet client.")
3598
+ print("--- ConceptNet Client Done ---\n")
3599
+
3600
  print("="*70)
3601
  print("All services initialized. Launching Gradio Hub...")
3602
  print("="*70 + "\n")
3603
 
3604
+ # --- 9. Launch Gradio ---
3605
  demo = create_consolidated_interface()
3606
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)