Pengfa Li commited on
Commit
49d1d0a
·
verified ·
1 Parent(s): bf7222e

Update Gradio_en.py

Browse files
Files changed (1) hide show
  1. Gradio_en.py +260 -115
Gradio_en.py CHANGED
@@ -3,9 +3,126 @@ import gradio as gr
3
  import pandas as pd
4
  import json
5
  import random
 
 
 
 
 
6
  from LLM import zero_shot
7
  from prompt_generate import generate_prompt_with_examples as generate_prompt
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def get_model_options():
10
  """Get available model series options"""
11
  return ['gpt', 'llama', 'qwen', 'deepSeek', 'gemini', 'claude']
@@ -26,7 +143,7 @@ def get_prompt_templates():
26
  """Get predefined prompt templates"""
27
  templates = {
28
  "Custom": "",
29
- "Zero-shot Basic Extraction": """You are a professional and experienced expert in engineering geology. Your task is to extract "entity-relation-entity" triples from the given input text. There are 24 types of relations: "Lithology", "Paleontological", "Thickness", "Outcrop", "Develop", "Exposed", "Distribution pattern", "Contained", "Coordinates", "Age", "Integration of contacts", "Unconformity contact", "Fault contact", "Belongs to", "Elevation", "Located at", "Exposed area", "Geotectonic position", "Stratigraphical zoning", "Consolidated contact", "Administrative area", "Engulfed", "Length", "Invade", Please follow these specifications for extraction:
30
  1. Output format:
31
  Strictly follow JSON array format, no additional text, each element contains:
32
  [
@@ -39,7 +156,7 @@ Strictly follow JSON array format, no additional text, each element contains:
39
  2. Complex relationship handling:
40
  - If the same entity participates in multiple relationships, list different triples separately""",
41
 
42
- "knowledge-injected Enhanced Extraction": """You are a professional and experienced expert in engineering geology. Your task is to extract "entity-relation-entity" triples from the given input text. There are 24 types of relations: "Lithology", "Paleontological", "Thickness", "Outcrop", "Develop", "Exposed", "Distribution pattern", "Contained", "Coordinates", "Age", "Integration of contacts", "Unconformity contact", "Fault contact", "Belongs to", "Elevation", "Located at", "Exposed area", "Geotectonic position", "Stratigraphical zoning", "Consolidated contact", "Administrative area", "Engulfed", "Length", "Invade", Please follow these specifications for extraction:
43
  1. Output format:
44
  Strictly follow JSON array format, no additional text, each element contains:
45
  [
@@ -52,53 +169,53 @@ Strictly follow JSON array format, no additional text, each element contains:
52
  2. Complex relationship handling:
53
  - If the same entity participates in multiple relationships, list different triples separately
54
  3. Relationship explanations:
55
- Outcrop at: Refers to rocks or strata exposed at the surface or near-surface, not covered or buried. Example: (Late Ordovician-Silurian intrusive rocks, outcrop at, southern investigation area).
56
 
57
- Located in: Establishes the subordinate relationship of geological units within a larger spatial framework (administrative region/tectonic unit). Example: (Kumuqi Silurian basaltic basic rocks, located in, central-western investigation area)
58
 
59
- Conformable contact: Indicates contact relationships formed by continuous deposition of upper and lower strata, reflecting gradational lithological characteristics without significant depositional hiatus. Example: (Solake Formation, conformable contact, Middle Ordovician Lin Formation).
60
 
61
- Unconformable contact: Describes stratigraphic contact interfaces with depositional gaps, including contact features with angular differences or lithological abrupt changes. Example: (Tongziyan Formation, unconformable contact, Maokou Formation).
62
 
63
- Paraconformable contact: Specifically refers to parallel unconformity types with consistent attitudes, emphasizing depositional sequence interruption but without structural deformation. Example: (Solake Formation, paraconformable contact, Middle Ordovician Lin Formation).
64
 
65
- Fault contact: Two strata are separated by fault zones or fault planes, often accompanied by dynamic crushing and other structural phenomena. Example: (Solake Formation, fault contact, Upper Ordovician Lapai Spring Formation).
66
 
67
- Distribution pattern: Depicts spatial distribution characteristics of geological units, including geometric morphology and extension direction combinations. Example: (Carboniferous, distribution pattern, banded).
68
 
69
- Tectonic position: Locates geological units' attribution in plate tectonic framework, associated with orogenic belts or tectonic unit divisions. Example: (Carboniferous, tectonic position, northern margin of Gondwana tectonic belt).
70
 
71
- Stratigraphic division: Characterizes hierarchical attribution and zoning attributes of stratigraphic units in regional stratigraphic division systems. Example: (Carboniferous, stratigraphic division, Gondwana).
72
 
73
- Exposed strata: Specifically refers to actually exposed stratigraphic entities in a region, emphasizing observable surface geological units. Example: (Hongliugou gold-copper mining area, exposed strata, Nanhua-Lower Ordovician Hongliugou Group).
74
 
75
  Lithology: Defines material composition and structural characteristics of rocks, including hierarchical descriptive elements of composite lithology. Example: (Late Ordovician-Silurian syenite, lithology, altered syenite).
76
 
77
  Thickness: Quantifies vertical dimensions of strata/rock bodies, including dimensional expressions with absolute values and relative descriptions. Example: (syenite, thickness, 35.60 m).
78
 
79
- Area: Characterizes horizontal distribution range of geological units, presented in standardized form combining numerical values and units. Example: (intrusive rocks, outcrop area, 54 m2)
80
 
81
  Coordinates: Specifically refers to geographical spatial positioning data recording geological feature points. Example: (Solake copper-gold mine site, coordinates, 90°11′47″E).
82
 
83
  Length: Describes spatial extension dimensions of linear geological bodies. Example: Triple (Shibien fault zone, length, 20m) can be extracted.
84
 
85
- Contains: Indicates compositional inclusion relationships of main materials, specifically referring to mineral composition or fossil occurrence states, different from everyday meaning. Example: (medium gray-black massive chert, contains, chert bands).
86
 
87
  Age: Establishes correspondence between geological units and standard geological chronological systems. Example: (Hongliugou gold-copper mining area, age, Early-Middle Permian).
88
 
89
- Administrative division: Defines subordinate hierarchy and territorial attribution of geological entities in administrative management systems. Example: (investigation area, administrative division, Chayang County).
90
 
91
- Development: Describes manifestation degree and formation state intensity of geological structures or depositional features. Example: (Lanhuaweng Formation, development, horizontal bedding).
92
 
93
- Paleontology: Records fossil biological information occurring in strata, requiring complete Latin scientific names and classification features. Example: (strata, paleontology, Lumu et al).
94
 
95
  Elevation: Quantifies elevation data of geological feature points relative to sea level, retaining measurement reference identification. Example: (Solake copper-gold mine site, elevation, 2800m).
96
 
97
- Belongs to: Establishes type attribution of geological units in classification systems. Example: (mining area, belongs to, polymetallic mineralization subarea).
98
 
99
- Engulf: Characterizes spatial replacement processes of intrusive bodies on country rocks, reflecting transformation effects of magmatic activities. Example: (Nintendo Rock Formation, engulf, Jurassic granite).
100
 
101
- Intrude: Describes geological processes of magmatic rock bodies penetrating country rocks, including accompanying phenomena such as contact metamorphism. Example: (Gaozhou Shell Stone Formation, intrude, gneissic granite).
102
 
103
  4. Other key points:
104
  All triple relationships must be one of the above 24 types
@@ -110,10 +227,10 @@ def get_qa_prompt_templates():
110
  """Get QA module prompt templates"""
111
  templates = {
112
  "Custom": "",
113
- "Zero-shot True/False": "Please judge true or false based on the given text.",
114
- "Zero-shot Q&A": "Please answer the question based on the given text.",
115
- "COT True/False": "Please first judge true or false, and provide your reasoning basis.",
116
- "COT Q&A": "Please first answer the question, and provide your reasoning basis.",
117
  }
118
  return templates
119
 
@@ -131,37 +248,76 @@ def load_train_data():
131
  _text_series = _train_data['text']
132
  _label_series = _train_data['triple_list']
133
  except Exception as e:
134
- print(f"Failed to load training data: {e}")
135
  return False
136
  return True
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def generate_random_context_prompt(user_text, num_examples):
139
- """Generate random context prompts"""
140
  if not load_train_data():
141
  return "Unable to load training data"
142
 
143
  try:
144
  random_prompt = generate_prompt(_text_series, _label_series, num_examples)
 
145
  return f"Here are geological description text and triple extraction examples:\n\n{random_prompt}\nPlease extract triples based on the examples:\n{user_text}"
146
  except Exception as e:
147
- return f"Failed to generate random context prompt: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  def update_model_names(model_series):
150
  """Update model name dropdown list when model series changes"""
151
  names = get_common_model_names(model_series)
152
- return gr.Dropdown(choices=names, value=names[0] if names else "", label="Model Name", allow_custom_value=True)
153
 
154
  def update_prompt_content(template_name):
155
  """Update content when prompt template changes"""
156
  templates = get_prompt_templates()
157
  content = templates.get(template_name, "")
158
- return gr.Textbox(value=content, label="Prompt Content", lines=15, max_lines=25)
159
 
160
  def update_qa_prompt_content(template_name):
161
  """Update content when QA prompt template changes"""
162
  templates = get_qa_prompt_templates()
163
  content = templates.get(template_name, "")
164
- return gr.Textbox(value=content, label="QA Prompt Content", lines=3, max_lines=10)
165
 
166
  def call_llm_model(model_series, model_name, prompt_content, user_content, context_type, num_examples):
167
  """LLM model wrapper function (triple extraction)"""
@@ -172,18 +328,24 @@ def call_llm_model(model_series, model_name, prompt_content, user_content, conte
172
  if not user_content:
173
  return "Please input text content to process"
174
 
175
- # Combine complete input content based on context type
176
- if context_type == "No Context":
177
  if prompt_content.strip():
178
  full_content = prompt_content.strip() + "\n\n" + user_content
179
  else:
180
  full_content = user_content
181
- elif context_type == "Random Context":
182
  context_prompt = generate_random_context_prompt(user_content, num_examples)
183
  if prompt_content.strip():
184
  full_content = prompt_content.strip() + "\n\n" + context_prompt
185
  else:
186
  full_content = context_prompt
 
 
 
 
 
 
187
  else:
188
  if prompt_content.strip():
189
  full_content = prompt_content.strip() + "\n\n" + user_content
@@ -215,18 +377,18 @@ def call_qa_model(model_series, model_name, qa_prompt_content, geological_text,
215
  return "Please input geological text"
216
 
217
  if not question_or_statement:
218
- if qa_type == "True/False":
219
  return "Please input factual statement to judge"
220
  else:
221
  return "Please input question to answer"
222
 
223
  # Combine complete input content
224
- if qa_type == "True/False":
225
  if qa_prompt_content.strip():
226
  full_content = f"{qa_prompt_content.strip()}\n\nGeological text:\n{geological_text}\n\nStatement to judge:\n{question_or_statement}"
227
  else:
228
  full_content = f"Geological text:\n{geological_text}\n\nStatement to judge:\n{question_or_statement}"
229
- else: # Q&A
230
  if qa_prompt_content.strip():
231
  full_content = f"{qa_prompt_content.strip()}\n\nGeological text:\n{geological_text}\n\nQuestion:\n{question_or_statement}"
232
  else:
@@ -251,13 +413,13 @@ def create_interface():
251
  """Create Gradio interface"""
252
 
253
  with gr.Blocks(title="GeoLLM Model Interface", theme=gr.themes.Soft()) as demo:
254
- gr.Markdown("# 🚀 GeoLLM Geological Intelligence Platform")
255
- gr.Markdown("Professional geological text analysis tool integrating triple extraction and intelligent Q&A functions")
256
 
257
  # Add tabs
258
  with gr.Tabs():
259
  # Triple extraction module
260
- with gr.TabItem("🔗 Triple Extraction", elem_id="triple_extraction"):
261
  with gr.Row():
262
  with gr.Column(scale=1):
263
  # Model selection area
@@ -272,18 +434,11 @@ def create_interface():
272
  model_name = gr.Dropdown(
273
  choices=get_common_model_names("gpt"),
274
  value="gpt-3.5-turbo",
275
- label="Model Name",
276
- info="Select specific model name, or input manually",
277
  allow_custom_value=True
278
  )
279
 
280
- # Custom model name input box
281
- custom_model_name = gr.Textbox(
282
- label="Custom Model Name (Optional)",
283
- placeholder="If your desired model is not in the options above, please input here",
284
- info="Input here will override the selection above"
285
- )
286
-
287
  # Prompt template selection
288
  gr.Markdown("## 📝 Prompt Template")
289
  prompt_template = gr.Dropdown(
@@ -293,13 +448,13 @@ def create_interface():
293
  info="Select predefined prompt template or customize"
294
  )
295
 
296
- # Context type selection
297
- gr.Markdown("## 🎯 Context Configuration")
298
  context_type = gr.Dropdown(
299
- choices=["No Context", "Random Context"],
300
- value="No Context",
301
- label="Context Type",
302
- info="Choose whether to use context examples"
303
  )
304
 
305
  num_examples = gr.Slider(
@@ -312,14 +467,14 @@ def create_interface():
312
  )
313
 
314
  with gr.Column(scale=2):
315
- # Prompt content area
316
- gr.Markdown("## 🎯 Prompt Content")
317
  prompt_content = gr.Textbox(
318
- label="Prompt Content",
319
  placeholder="Select template or customize your prompt...",
320
  lines=15,
321
  max_lines=25,
322
- info="Will be sent to the model as system prompt"
323
  )
324
 
325
  # User input area
@@ -349,19 +504,18 @@ def create_interface():
349
  gr.Markdown("## 💡 Usage Examples")
350
  gr.Examples(
351
  examples=[
352
- ["gpt", "gpt-3.5-turbo", "No Context", 2, "The Noriba Gari Bao Formation originally refers to gray-green thick-bedded medium- to fine-grained lithic feldspar sandstone, feldspar quartz sandstone, feldspar sandstone occasionally interbedded with siltstone, clay rock and micritic limestone, only bivalve fossils are seen, and continuous deposition with the overlying Ninety Road Class Formation."],
353
- ["gemini", "gemini-1.5-pro-002", "Random Context", 3, "The Quemo Cuo Formation has only a small outcrop in the Sewang Yongqu area in the southwest corner of the map sheet within the survey area, with an area of less than 10m2 and a thickness greater than 29.25m."],
354
- ["claude", "claude-3-5-haiku-20241022", "No Context", 2, "Hecosmilia sp. scabbard coral was collected from limestone; Complexastraea sp. and Radulopccten sp. scraping sea fan; Oscillopha sp., dated to the Middle Jurassic."],
355
- ["deepSeek", "deepseek-ai/DeepSeek-V3", "Random Context", 3, "Late Triassic granite is mainly distributed in the Ladi Gongma Mianche Ri Ahri Qu area of the survey area. Regionally controlled by NW-SE trending regional faults within the structural melange zone, it is distributed in long strips. The intrusive bodies have good gregariousness and excellent zonal extensibility, with 8 exposed intrusive bodies covering an area of about 227m2."],
356
  ],
357
  inputs=[model_series, model_name, context_type, num_examples, user_content]
358
  )
359
 
360
  # Event handling
361
- def submit_request(series, name, custom_name, template, prompt, content, ctx_type, num_ex):
362
- # Use custom model name if provided
363
- final_model_name = custom_name.strip() if custom_name.strip() else name
364
- return call_llm_model(series, final_model_name, prompt, content, ctx_type, num_ex)
365
 
366
  # Update model name options
367
  model_series.change(
@@ -370,7 +524,7 @@ def create_interface():
370
  outputs=[model_name]
371
  )
372
 
373
- # Update prompt content
374
  prompt_template.change(
375
  fn=update_prompt_content,
376
  inputs=[prompt_template],
@@ -380,7 +534,7 @@ def create_interface():
380
  # Submit button event
381
  submit_btn.click(
382
  fn=submit_request,
383
- inputs=[model_series, model_name, custom_model_name, prompt_template, prompt_content, user_content, context_type, num_examples],
384
  outputs=[output]
385
  )
386
 
@@ -393,12 +547,12 @@ def create_interface():
393
  # Enter key submission
394
  user_content.submit(
395
  fn=submit_request,
396
- inputs=[model_series, model_name, custom_model_name, prompt_template, prompt_content, user_content, context_type, num_examples],
397
  outputs=[output]
398
  )
399
 
400
  # QA module
401
- with gr.TabItem("❓ Intelligent Q&A", elem_id="qa_module"):
402
  with gr.Row():
403
  with gr.Column(scale=1):
404
  # Model selection area
@@ -413,23 +567,16 @@ def create_interface():
413
  qa_model_name = gr.Dropdown(
414
  choices=get_common_model_names("gpt"),
415
  value="gpt-3.5-turbo",
416
- label="Model Name",
417
- info="Select specific model name, or input manually",
418
  allow_custom_value=True
419
  )
420
 
421
- # Custom model name input box
422
- qa_custom_model_name = gr.Textbox(
423
- label="Custom Model Name (Optional)",
424
- placeholder="If your desired model is not in the options above, please input here",
425
- info="Input here will override the selection above"
426
- )
427
-
428
  # QA type selection
429
- gr.Markdown("## 🎯 Q&A Type")
430
  qa_type = gr.Dropdown(
431
- choices=["True/False", "Q&A"],
432
- value="True/False",
433
  label="Task Type",
434
  info="Choose between judging true/false or answering questions"
435
  )
@@ -438,31 +585,31 @@ def create_interface():
438
  gr.Markdown("## 📝 Prompt Template")
439
  qa_prompt_template = gr.Dropdown(
440
  choices=list(get_qa_prompt_templates().keys()),
441
- value="Zero-shot True/False",
442
  label="Select QA Prompt Template",
443
  info="Select predefined prompt template or customize"
444
  )
445
 
446
  with gr.Column(scale=2):
447
- # QA Prompt content area
448
- gr.Markdown("## 🎯 Prompt Content")
449
  qa_prompt_content = gr.Textbox(
450
- label="QA Prompt Content",
451
  value="Please judge true or false based on the given text.",
452
  placeholder="Select template or customize your prompt...",
453
  lines=3,
454
  max_lines=10,
455
- info="Will be sent to the model as system prompt"
456
  )
457
 
458
  # Geological text input area
459
  gr.Markdown("## 📄 Geological Text")
460
  geological_text = gr.Textbox(
461
- label="Geological Background Text",
462
  placeholder="Please input geological description text as background...",
463
  lines=8,
464
  max_lines=15,
465
- info="Provides contextual information for answering questions or judging facts"
466
  )
467
 
468
  # Question or statement input area
@@ -492,50 +639,49 @@ def create_interface():
492
  # Example area
493
  gr.Markdown("## 💡 Usage Examples")
494
 
495
- # True/False examples
496
- with gr.Accordion("True/False Examples", open=False):
497
  gr.Examples(
498
  examples=[
499
- ["gpt", "gpt-3.5-turbo", "True/False", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes. There are 58 newly discovered geological disaster points, accounting for 30.5% of the total. Among the 190 collapses, landslides, debris flows and other sudden geological disasters in Huoshan County, most are caused by human factors. There are 163 geological disasters caused by human factors, accounting for 85.8%; there are 27 disasters formed by natural factors, accounting for 14.2%.", "In the sudden geological disasters in Huoshan County, the number of landslides exceeds the number of collapses."],
500
- ["deepSeek", "deepseek-ai/DeepSeek-V3", "True/False", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes.", "The total number of geological disaster points in Huoshan County exceeds 200."],
501
  ],
502
  inputs=[qa_model_series, qa_model_name, qa_type, geological_text, question_or_statement]
503
  )
504
 
505
- # Q&A examples
506
- with gr.Accordion("Q&A Examples", open=False):
507
  gr.Examples(
508
  examples=[
509
- ["gpt", "gpt-3.5-turbo", "Q&A", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes. There are 58 newly discovered geological disaster points, accounting for 30.5% of the total.", "How many sudden geological disaster points are there in Huoshan County in total?"],
510
- ["claude", "claude-3-5-haiku-20241022", "Q&A", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes.", "Among the geological disasters in Huoshan County, which type of disaster has the largest number?"],
511
  ],
512
  inputs=[qa_model_series, qa_model_name, qa_type, geological_text, question_or_statement]
513
  )
514
 
515
  # QA event handling
516
- def submit_qa_request(series, name, custom_name, q_type, template, prompt, geo_text, question):
517
- # Use custom model name if provided
518
- final_model_name = custom_name.strip() if custom_name.strip() else name
519
- return call_qa_model(series, final_model_name, prompt, geo_text, question, q_type)
520
 
521
  def update_qa_prompt_on_type_change(qa_type_value):
522
  """Update prompt template options and content when QA type changes"""
523
- if qa_type_value == "True/False":
524
- new_choices = ["Custom", "Zero-shot True/False", "COT True/False"]
525
- new_value = "Zero-shot True/False"
526
  new_prompt = "Please judge true or false based on the given text."
527
  new_placeholder = "Please input statement to judge..."
528
  new_label = "Statement"
529
- else: # Q&A
530
- new_choices = ["Custom", "Zero-shot Q&A", "COT Q&A"]
531
- new_value = "Zero-shot Q&A"
532
  new_prompt = "Please answer the question based on the given text."
533
  new_placeholder = "Please input question to answer..."
534
  new_label = "Question"
535
 
536
  return (
537
  gr.Dropdown(choices=new_choices, value=new_value, label="Select QA Prompt Template"),
538
- gr.Textbox(value=new_prompt, label="QA Prompt Content", lines=3, max_lines=10),
539
  gr.Textbox(label=new_label, placeholder=new_placeholder, lines=3, max_lines=8)
540
  )
541
 
@@ -546,7 +692,7 @@ def create_interface():
546
  outputs=[qa_model_name]
547
  )
548
 
549
- # Update QA prompt content
550
  qa_prompt_template.change(
551
  fn=update_qa_prompt_content,
552
  inputs=[qa_prompt_template],
@@ -563,7 +709,7 @@ def create_interface():
563
  # QA submit button event
564
  qa_submit_btn.click(
565
  fn=submit_qa_request,
566
- inputs=[qa_model_series, qa_model_name, qa_custom_model_name, qa_type, qa_prompt_template, qa_prompt_content, geological_text, question_or_statement],
567
  outputs=[qa_output]
568
  )
569
 
@@ -573,16 +719,15 @@ def create_interface():
573
  outputs=[geological_text, question_or_statement, qa_output]
574
  )
575
 
576
- # QA enter key submission
577
  question_or_statement.submit(
578
  fn=submit_qa_request,
579
- inputs=[qa_model_series, qa_model_name, qa_custom_model_name, qa_type, qa_prompt_template, qa_prompt_content, geological_text, question_or_statement],
580
  outputs=[qa_output]
581
  )
582
 
583
  return demo
584
 
585
-
586
  if __name__ == "__main__":
587
  # Launch interface with password protection
588
  demo = create_interface()
@@ -590,6 +735,6 @@ if __name__ == "__main__":
590
  server_port=7860,
591
  share=True,
592
  debug=True,
593
- auth=("geollm", "research2025"),
594
- auth_message="Please enter credentials to access GeoLLM Geological Intelligence Platform"
595
  )
 
3
  import pandas as pd
4
  import json
5
  import random
6
+ import numpy as np
7
+ import faiss
8
+ from transformers import AutoTokenizer, AutoModel
9
+ import torch
10
+ from collections import defaultdict
11
  from LLM import zero_shot
12
  from prompt_generate import generate_prompt_with_examples as generate_prompt
13
 
14
+ # Global KNN retriever instance
15
+ _knn_retriever = None
16
+
17
+ class EntityLevelRetriever:
18
+ def __init__(self, model_name='bert-base-uncased'):
19
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ self.model = AutoModel.from_pretrained(model_name)
21
+ self.index = faiss.IndexFlatL2(768)
22
+ self.entity_db = []
23
+ self.metadata = []
24
+ self.train_data = []
25
+
26
+ def _get_entity_span(self, text, entity):
27
+ start = text.find(entity)
28
+ if start == -1:
29
+ return None
30
+ return (start, start + len(entity))
31
+
32
+ def _generate_entity_embedding(self, text, entity):
33
+ span = self._get_entity_span(text, entity)
34
+ if not span:
35
+ return None
36
+
37
+ inputs = self.tokenizer(text, return_tensors='pt', truncation=True)
38
+ with torch.no_grad():
39
+ outputs = self.model(**inputs)
40
+
41
+ char_to_token = lambda x: inputs.char_to_token(x)
42
+ start_token = char_to_token(span[0])
43
+ end_token = char_to_token(span[1]-1)
44
+
45
+ if not start_token or not end_token:
46
+ return None
47
+
48
+ entity_embedding = outputs.last_hidden_state[0, start_token:end_token+1].mean(dim=0).numpy()
49
+ return entity_embedding.astype('float32')
50
+
51
+ def build_index(self, train_path):
52
+ with open(train_path, 'r', encoding='utf-8') as f:
53
+ dataset = json.load(f)
54
+
55
+ # Use data from index 500-999 (501-1000 in 1-based indexing)
56
+ self.train_data = dataset[500:1000]
57
+
58
+ for item in self.train_data:
59
+ text = item['text']
60
+ for triple in item['triple_list']:
61
+ for entity in [triple[0], triple[2]]:
62
+ embedding = self._generate_entity_embedding(text, entity)
63
+ if embedding is not None:
64
+ self.entity_db.append(embedding)
65
+ self.metadata.append({
66
+ 'entity': entity,
67
+ 'type': triple[1],
68
+ 'context': text,
69
+ 'full_item': item
70
+ })
71
+
72
+ if self.entity_db:
73
+ self.index.add(np.array(self.entity_db))
74
+
75
+ def search_similar_texts(self, query_text, top_k=3):
76
+ """Search for similar texts based on entity embeddings"""
77
+ if not self.train_data:
78
+ return []
79
+
80
+ # Extract entities from query text (simplified approach)
81
+ # For better results, you might want to use NER or other entity extraction methods
82
+ query_entities = self._extract_potential_entities(query_text)
83
+
84
+ context_scores = defaultdict(float)
85
+ context_items = {}
86
+
87
+ for entity in query_entities:
88
+ embedding = self._generate_entity_embedding(query_text, entity)
89
+ if embedding is None:
90
+ continue
91
+
92
+ distances, indices = self.index.search(np.array([embedding]), top_k * 2)
93
+
94
+ for j in range(len(indices[0])):
95
+ idx = indices[0][j]
96
+ if 0 <= idx < len(self.metadata):
97
+ ctx_info = self.metadata[idx]
98
+ distance = distances[0][j]
99
+ context = ctx_info['context']
100
+
101
+ # Weight by inverse distance
102
+ score = 1 / (1 + distance)
103
+ context_scores[context] += score
104
+ context_items[context] = ctx_info['full_item']
105
+
106
+ # Sort by score and return top_k
107
+ scored_contexts = sorted(context_scores.items(), key=lambda x: x[1], reverse=True)
108
+
109
+ results = []
110
+ for context, score in scored_contexts[:top_k]:
111
+ if context in context_items:
112
+ results.append(context_items[context])
113
+
114
+ return results
115
+
116
+ def _extract_potential_entities(self, text):
117
+ """Simple entity extraction - you can improve this with better NER"""
118
+ # Split by common delimiters and filter meaningful terms
119
+ import re
120
+ # Extract potential geological terms (capitalized words, formations, etc.)
121
+ entities = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', text)
122
+ # Also include technical terms
123
+ entities.extend(re.findall(r'\b\w+(?:stone|rock|formation|group|member)\b', text, re.IGNORECASE))
124
+ return list(set(entities))
125
+
126
  def get_model_options():
127
  """Get available model series options"""
128
  return ['gpt', 'llama', 'qwen', 'deepSeek', 'gemini', 'claude']
 
143
  """Get predefined prompt templates"""
144
  templates = {
145
  "Custom": "",
146
+ "Zero-shot prompting": """You are a professional and experienced expert in engineering geology. Your task is to extract "entity-relation-entity" triples from the given input text. There are 24 types of relations: "Lithology", "Paleontological", "Thickness", "Outcrop", "Develop", "Exposed", "Distribution pattern", "Contained", "Coordinates", "Age", "Integration of contacts", "Unconformity contact", "Fault contact", "Belongs to", "Elevation", "Located at", "Exposed area", "Geotectonic position", "Stratigraphical zoning", "Consolidated contact", "Administrative area", "Engulfed", "Length", "Invade", Please follow these specifications for extraction:
147
  1. Output format:
148
  Strictly follow JSON array format, no additional text, each element contains:
149
  [
 
156
  2. Complex relationship handling:
157
  - If the same entity participates in multiple relationships, list different triples separately""",
158
 
159
+ "Knowledge-injected prompting": """You are a professional and experienced expert in engineering geology. Your task is to extract "entity-relation-entity" triples from the given input text. There are 24 types of relations: "Lithology", "Paleontological", "Thickness", "Outcrop", "Develop", "Exposed", "Distribution pattern", "Contained", "Coordinates", "Age", "Integration of contacts", "Unconformity contact", "Fault contact", "Belongs to", "Elevation", "Located at", "Exposed area", "Geotectonic position", "Stratigraphical zoning", "Consolidated contact", "Administrative area", "Engulfed", "Length", "Invade", Please follow these specifications for extraction:
160
  1. Output format:
161
  Strictly follow JSON array format, no additional text, each element contains:
162
  [
 
169
  2. Complex relationship handling:
170
  - If the same entity participates in multiple relationships, list different triples separately
171
  3. Relationship explanations:
172
+ Exposed: Refers to rocks or strata exposed at the surface or near-surface, not covered or buried. Example: (Late Ordovician-Silurian intrusive rocks, Exposed, southern investigation area).
173
 
174
+ Located at: Establishes the subordinate relationship of geological units within a larger spatial framework (administrative region/tectonic unit). Example: (Kumuqi Silurian basaltic basic rocks, Located at, central-western investigation area)
175
 
176
+ Integration of contacts: Indicates contact relationships formed by continuous deposition of upper and lower strata, reflecting gradational lithological characteristics without significant depositional hiatus. Example: (Solake Formation, Integration of contacts, Middle Ordovician Lin Formation).
177
 
178
+ Unconformity contact: Describes stratigraphic contact interfaces with depositional gaps, including contact features with angular differences or lithological abrupt changes. Example: (Tongziyan Formation, Unconformity contact, Maokou Formation).
179
 
180
+ Consolidated contact: Specifically refers to parallel unconformity types with consistent attitudes, emphasizing depositional sequence interruption but without structural deformation. Example: (Solake Formation, Consolidated contact, Middle Ordovician Lin Formation).
181
 
182
+ Fault contact: Two strata are separated by fault zones or fault planes, often accompanied by dynamic crushing and other structural phenomena. Example: (Solake Formation, Fault contact, Upper Ordovician Lapai Spring Formation).
183
 
184
+ Distribution pattern: Depicts spatial distribution characteristics of geological units, including geometric morphology and extension direction combinations. Example: (Carboniferous, Distribution pattern, banded).
185
 
186
+ Geotectonic position: Locates geological units' attribution in plate tectonic framework, associated with orogenic belts or tectonic unit divisions. Example: (Carboniferous, Geotectonic position, northern margin of Gondwana tectonic belt).
187
 
188
+ Stratigraphical zoning: Characterizes hierarchical attribution and zoning attributes of stratigraphic units in regional stratigraphic division systems. Example: (Carboniferous, Stratigraphical zoning, Gondwana).
189
 
190
+ Outcrop: Specifically refers to actually exposed stratigraphic entities in a region, emphasizing observable surface geological units. Example: (Hongliugou gold-copper mining area, Outcrop, Nanhua-Lower Ordovician Hongliugou Group).
191
 
192
  Lithology: Defines material composition and structural characteristics of rocks, including hierarchical descriptive elements of composite lithology. Example: (Late Ordovician-Silurian syenite, lithology, altered syenite).
193
 
194
  Thickness: Quantifies vertical dimensions of strata/rock bodies, including dimensional expressions with absolute values and relative descriptions. Example: (syenite, thickness, 35.60 m).
195
 
196
+ Exposed area: Characterizes horizontal distribution range of geological units, presented in standardized form combining numerical values and units. Example: (intrusive rocks, Exposed area, 54 m2)
197
 
198
  Coordinates: Specifically refers to geographical spatial positioning data recording geological feature points. Example: (Solake copper-gold mine site, coordinates, 90°11′47″E).
199
 
200
  Length: Describes spatial extension dimensions of linear geological bodies. Example: Triple (Shibien fault zone, length, 20m) can be extracted.
201
 
202
+ Contained: Indicates compositional inclusion relationships of main materials, specifically referring to mineral composition or fossil occurrence states, different from everyday meaning. Example: (medium gray-black massive chert, Contained, chert bands).
203
 
204
  Age: Establishes correspondence between geological units and standard geological chronological systems. Example: (Hongliugou gold-copper mining area, age, Early-Middle Permian).
205
 
206
+ Administrative area: Defines subordinate hierarchy and territorial attribution of geological entities in administrative management systems. Example: (investigation area, Administrative area, Chayang County).
207
 
208
+ Develop: Describes manifestation degree and formation state intensity of geological structures or depositional features. Example: (Lanhuaweng Formation, Develop, horizontal bedding).
209
 
210
+ Paleontological: Records fossil biological information occurring in strata, requiring complete Latin scientific names and classification features. Example: (strata, Paleontological, Lumu et al).
211
 
212
  Elevation: Quantifies elevation data of geological feature points relative to sea level, retaining measurement reference identification. Example: (Solake copper-gold mine site, elevation, 2800m).
213
 
214
+ Paleontological: Establishes type attribution of geological units in classification systems. Example: (mining area, belongs to, polymetallic mineralization subarea).
215
 
216
+ Engulfed: Characterizes spatial replacement processes of intrusive bodies on country rocks, reflecting transformation effects of magmatic activities. Example: (Nintendo Rock Formation, Engulfed, Jurassic granite).
217
 
218
+ Invade: Describes geological processes of magmatic rock bodies penetrating country rocks, including accompanying phenomena such as contact metamorphism. Example: (Gaozhou Shell Stone Formation, Invade, gneissic granite).
219
 
220
  4. Other key points:
221
  All triple relationships must be one of the above 24 types
 
227
  """Get QA module prompt templates"""
228
  templates = {
229
  "Custom": "",
230
+ "Yes/No QA": "Please judge true or false based on the given text.",
231
+ "Factoid QA": "Please answer the question based on the given text.",
232
+ "CoT prompting of Yes/No QA": "Please first judge true or false, and provide your reasoning basis.",
233
+ "CoT prompting of Factoid QA": "Please first answer the question, and provide your reasoning basis.",
234
  }
235
  return templates
236
 
 
248
  _text_series = _train_data['text']
249
  _label_series = _train_data['triple_list']
250
  except Exception as e:
251
+ # print(f"Failed to load training data: {e}")
252
  return False
253
  return True
254
 
255
+ def initialize_knn_retriever():
256
+ """Initialize the KNN retriever"""
257
+ global _knn_retriever
258
+ if _knn_retriever is None:
259
+ try:
260
+ # print("Initializing KNN retriever...")
261
+ _knn_retriever = EntityLevelRetriever()
262
+ _knn_retriever.build_index('./data/train_triples.json')
263
+ # print("KNN retriever initialized successfully!")
264
+ except Exception as e:
265
+ # print(f"Failed to initialize KNN retriever: {e}")
266
+ _knn_retriever = None
267
+ return _knn_retriever is not None
268
+
269
  def generate_random_context_prompt(user_text, num_examples):
270
+ """Generate Random sampling prompts"""
271
  if not load_train_data():
272
  return "Unable to load training data"
273
 
274
  try:
275
  random_prompt = generate_prompt(_text_series, _label_series, num_examples)
276
+ print(random_prompt)
277
  return f"Here are geological description text and triple extraction examples:\n\n{random_prompt}\nPlease extract triples based on the examples:\n{user_text}"
278
  except Exception as e:
279
+ return f"Failed to generate Random sampling prompt: {e}"
280
+
281
+ def generate_knn_context_prompt(user_text, num_examples):
282
+ """Generate KNN-based sampling prompts"""
283
+ global _knn_retriever
284
+
285
+ if not initialize_knn_retriever():
286
+ return "Unable to initialize KNN retriever"
287
+
288
+ try:
289
+ similar_items = _knn_retriever.search_similar_texts(user_text, num_examples)
290
+
291
+ if not similar_items:
292
+ return f"No similar examples found, performing zero-shot extraction:\n{user_text}"
293
+
294
+ examples_text = ""
295
+ for i, item in enumerate(similar_items):
296
+ examples_text += f"Example {i+1}:\n"
297
+ examples_text += f"Text: {item['text']}\n"
298
+ examples_text += f"Triples: {json.dumps(item['triple_list'], ensure_ascii=False)}\n\n"
299
+ print(examples_text)
300
+ return f"Here are geological description text and triple extraction examples based on KNN similarity:\n\n{examples_text}Please extract triples based on the examples:\n{user_text}"
301
+
302
+ except Exception as e:
303
+ return f"Failed to generate KNN-based sampling prompt: {e}"
304
 
305
  def update_model_names(model_series):
306
  """Update model name dropdown list when model series changes"""
307
  names = get_common_model_names(model_series)
308
+ return gr.Dropdown(choices=names, value=names[0] if names else "", label="Select the specific model", allow_custom_value=True)
309
 
310
  def update_prompt_content(template_name):
311
  """Update content when prompt template changes"""
312
  templates = get_prompt_templates()
313
  content = templates.get(template_name, "")
314
+ return gr.Textbox(value=content, label="Prompt ", lines=15, max_lines=25)
315
 
316
  def update_qa_prompt_content(template_name):
317
  """Update content when QA prompt template changes"""
318
  templates = get_qa_prompt_templates()
319
  content = templates.get(template_name, "")
320
+ return gr.Textbox(value=content, label="QA Prompt ", lines=3, max_lines=10)
321
 
322
  def call_llm_model(model_series, model_name, prompt_content, user_content, context_type, num_examples):
323
  """LLM model wrapper function (triple extraction)"""
 
328
  if not user_content:
329
  return "Please input text content to process"
330
 
331
+ # Combine complete input content based on Demonstration type
332
+ if context_type == "No demonstration":
333
  if prompt_content.strip():
334
  full_content = prompt_content.strip() + "\n\n" + user_content
335
  else:
336
  full_content = user_content
337
+ elif context_type == "Random sampling":
338
  context_prompt = generate_random_context_prompt(user_content, num_examples)
339
  if prompt_content.strip():
340
  full_content = prompt_content.strip() + "\n\n" + context_prompt
341
  else:
342
  full_content = context_prompt
343
+ elif context_type == "KNN-based sampling":
344
+ context_prompt = generate_knn_context_prompt(user_content, num_examples)
345
+ if prompt_content.strip():
346
+ full_content = prompt_content.strip() + "\n\n" + context_prompt
347
+ else:
348
+ full_content = context_prompt
349
  else:
350
  if prompt_content.strip():
351
  full_content = prompt_content.strip() + "\n\n" + user_content
 
377
  return "Please input geological text"
378
 
379
  if not question_or_statement:
380
+ if qa_type == "Yes/No QA":
381
  return "Please input factual statement to judge"
382
  else:
383
  return "Please input question to answer"
384
 
385
  # Combine complete input content
386
+ if qa_type == "Yes/No QA":
387
  if qa_prompt_content.strip():
388
  full_content = f"{qa_prompt_content.strip()}\n\nGeological text:\n{geological_text}\n\nStatement to judge:\n{question_or_statement}"
389
  else:
390
  full_content = f"Geological text:\n{geological_text}\n\nStatement to judge:\n{question_or_statement}"
391
+ else: # Factoid QA
392
  if qa_prompt_content.strip():
393
  full_content = f"{qa_prompt_content.strip()}\n\nGeological text:\n{geological_text}\n\nQuestion:\n{question_or_statement}"
394
  else:
 
413
  """Create Gradio interface"""
414
 
415
  with gr.Blocks(title="GeoLLM Model Interface", theme=gr.themes.Soft()) as demo:
416
+ gr.Markdown("# 🚀 GeoLLM-Toolkit: An Interactive Platform for Geological Text Understanding and Knowledge Extraction")
417
+ gr.Markdown("A domain-specific, modular framework designed to operationalize large language models (LLMs) for advanced geological natural language processing")
418
 
419
  # Add tabs
420
  with gr.Tabs():
421
  # Triple extraction module
422
+ with gr.TabItem("🔗 KG Triple Extraction", elem_id="triple_extraction"):
423
  with gr.Row():
424
  with gr.Column(scale=1):
425
  # Model selection area
 
434
  model_name = gr.Dropdown(
435
  choices=get_common_model_names("gpt"),
436
  value="gpt-3.5-turbo",
437
+ label="Select the specific model",
438
+ info="Select specific model name",
439
  allow_custom_value=True
440
  )
441
 
 
 
 
 
 
 
 
442
  # Prompt template selection
443
  gr.Markdown("## 📝 Prompt Template")
444
  prompt_template = gr.Dropdown(
 
448
  info="Select predefined prompt template or customize"
449
  )
450
 
451
+ # Demonstration type selection
452
+ gr.Markdown("## 🎯 Few-Shot Prompting Settings")
453
  context_type = gr.Dropdown(
454
+ choices=["No demonstration", "Random sampling", "KNN-based sampling"],
455
+ value="No demonstration",
456
+ label="Demonstration type",
457
+ info="Choose the method for selecting context examples"
458
  )
459
 
460
  num_examples = gr.Slider(
 
467
  )
468
 
469
  with gr.Column(scale=2):
470
+ # Prompt area
471
+ gr.Markdown("## 🎯 Prompt")
472
  prompt_content = gr.Textbox(
473
+ label="Prompt ",
474
  placeholder="Select template or customize your prompt...",
475
  lines=15,
476
  max_lines=25,
477
+ info="Defines the task-specific prompt during geological NLP tasks"
478
  )
479
 
480
  # User input area
 
504
  gr.Markdown("## 💡 Usage Examples")
505
  gr.Examples(
506
  examples=[
507
+ ["gpt", "gpt-3.5-turbo", "No demonstration", 2, "The Noriba Gari Bao Formation originally refers to gray-green thick-bedded medium- to fine-grained lithic feldspar sandstone, feldspar quartz sandstone, feldspar sandstone occasionally interbedded with siltstone, clay rock and micritic limestone, only bivalve fossils are seen, and continuous deposition with the overlying Ninety Road Class Formation."],
508
+ ["gemini", "gemini-1.5-pro-002", "Random sampling", 3, "The Quemo Cuo Formation has only a small outcrop in the Sewang Yongqu area in the southwest corner of the map sheet within the survey area, with an area of less than 10m2 and a thickness greater than 29.25m."],
509
+ ["claude", "claude-3-5-haiku-20241022", "KNN-based sampling", 2, "Hecosmilia sp. scabbard coral was collected from limestone; Complexastraea sp. and Radulopccten sp. scraping sea fan; Oscillopha sp., dated to the Middle Jurassic."],
510
+ ["deepSeek", "deepseek-ai/DeepSeek-V3", "KNN-based sampling", 3, "Late Triassic granite is mainly distributed in the Ladi Gongma Mianche Ri Ahri Qu area of the survey area. Regionally controlled by NW-SE trending regional faults within the structural melange zone, it is distributed in long strips. The intrusive bodies have good gregariousness and excellent zonal extensibility, with 8 exposed intrusive bodies covering an area of about 227m2."],
511
  ],
512
  inputs=[model_series, model_name, context_type, num_examples, user_content]
513
  )
514
 
515
  # Event handling
516
+ def submit_request(series, name, template, prompt, content, ctx_type, num_ex):
517
+ # Use the selected model name directly
518
+ return call_llm_model(series, name, prompt, content, ctx_type, num_ex)
 
519
 
520
  # Update model name options
521
  model_series.change(
 
524
  outputs=[model_name]
525
  )
526
 
527
+ # Update Prompt
528
  prompt_template.change(
529
  fn=update_prompt_content,
530
  inputs=[prompt_template],
 
534
  # Submit button event
535
  submit_btn.click(
536
  fn=submit_request,
537
+ inputs=[model_series, model_name, prompt_template, prompt_content, user_content, context_type, num_examples],
538
  outputs=[output]
539
  )
540
 
 
547
  # Enter key submission
548
  user_content.submit(
549
  fn=submit_request,
550
+ inputs=[model_series, model_name, prompt_template, prompt_content, user_content, context_type, num_examples],
551
  outputs=[output]
552
  )
553
 
554
  # QA module
555
+ with gr.TabItem("❓ Geological Q&A", elem_id="qa_module"):
556
  with gr.Row():
557
  with gr.Column(scale=1):
558
  # Model selection area
 
567
  qa_model_name = gr.Dropdown(
568
  choices=get_common_model_names("gpt"),
569
  value="gpt-3.5-turbo",
570
+ label="Select the specific model",
571
+ info="Select specific model name",
572
  allow_custom_value=True
573
  )
574
 
 
 
 
 
 
 
 
575
  # QA type selection
576
+ gr.Markdown("## 🎯 Geological Q&A")
577
  qa_type = gr.Dropdown(
578
+ choices=["Yes/No QA", "Factoid QA"],
579
+ value="Yes/No QA",
580
  label="Task Type",
581
  info="Choose between judging true/false or answering questions"
582
  )
 
585
  gr.Markdown("## 📝 Prompt Template")
586
  qa_prompt_template = gr.Dropdown(
587
  choices=list(get_qa_prompt_templates().keys()),
588
+ value="Yes/No QA",
589
  label="Select QA Prompt Template",
590
  info="Select predefined prompt template or customize"
591
  )
592
 
593
  with gr.Column(scale=2):
594
+ # QA Prompt area
595
+ gr.Markdown("## 🎯 Prompt")
596
  qa_prompt_content = gr.Textbox(
597
+ label="QA Prompt ",
598
  value="Please judge true or false based on the given text.",
599
  placeholder="Select template or customize your prompt...",
600
  lines=3,
601
  max_lines=10,
602
+ info="Defines the task-specific prompt during geological NLP tasks"
603
  )
604
 
605
  # Geological text input area
606
  gr.Markdown("## 📄 Geological Text")
607
  geological_text = gr.Textbox(
608
+ label="Contextual Text in Geological Domain",
609
  placeholder="Please input geological description text as background...",
610
  lines=8,
611
  max_lines=15,
612
+ info="Provides contextual information for Q&A tasks"
613
  )
614
 
615
  # Question or statement input area
 
639
  # Example area
640
  gr.Markdown("## 💡 Usage Examples")
641
 
642
+ # Yes/No QA examples
643
+ with gr.Accordion("Yes/No QA Examples", open=False):
644
  gr.Examples(
645
  examples=[
646
+ ["gpt", "gpt-3.5-turbo", "Yes/No QA", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes. There are 58 newly discovered geological disaster points, accounting for 30.5% of the total. Among the 190 collapses, landslides, debris flows and other sudden geological disasters in Huoshan County, most are caused by human factors. There are 163 geological disasters caused by human factors, accounting for 85.8%; there are 27 disasters formed by natural factors, accounting for 14.2%.", "In the sudden geological disasters in Huoshan County, the number of landslides exceeds the number of collapses."],
647
+ ["deepSeek", "deepseek-ai/DeepSeek-V3", "Yes/No QA", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes.", "The total number of geological disaster points in Huoshan County exceeds 200."],
648
  ],
649
  inputs=[qa_model_series, qa_model_name, qa_type, geological_text, question_or_statement]
650
  )
651
 
652
+ # Factoid QA examples
653
+ with gr.Accordion("Factoid QA Examples", open=False):
654
  gr.Examples(
655
  examples=[
656
+ ["gpt", "gpt-3.5-turbo", "Factoid QA", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes. There are 58 newly discovered geological disaster points, accounting for 30.5% of the total.", "How many sudden geological disaster points are there in Huoshan County in total?"],
657
+ ["claude", "claude-3-5-haiku-20241022", "Factoid QA", "Sudden geological disasters in Huoshan County are mainly collapses, landslides, and debris flows. A total of 190 sudden geological disaster points (including hidden danger points) have been identified, including 74 collapses, 96 landslides, 14 debris flows, and 6 unstable slopes.", "Among the geological disasters in Huoshan County, which type of disaster has the largest number?"],
658
  ],
659
  inputs=[qa_model_series, qa_model_name, qa_type, geological_text, question_or_statement]
660
  )
661
 
662
  # QA event handling
663
+ def submit_qa_request(series, name, q_type, template, prompt, geo_text, question):
664
+ # Use the selected model name directly (no custom name override)
665
+ return call_qa_model(series, name, prompt, geo_text, question, q_type)
 
666
 
667
  def update_qa_prompt_on_type_change(qa_type_value):
668
  """Update prompt template options and content when QA type changes"""
669
+ if qa_type_value == "Yes/No QA":
670
+ new_choices = ["Custom", "Yes/No QA", "CoT prompting of Yes/No QA"]
671
+ new_value = "Yes/No QA"
672
  new_prompt = "Please judge true or false based on the given text."
673
  new_placeholder = "Please input statement to judge..."
674
  new_label = "Statement"
675
+ else: # Factoid QA
676
+ new_choices = ["Custom", "Factoid QA", "CoT prompting of Factoid QA"]
677
+ new_value = "Factoid QA"
678
  new_prompt = "Please answer the question based on the given text."
679
  new_placeholder = "Please input question to answer..."
680
  new_label = "Question"
681
 
682
  return (
683
  gr.Dropdown(choices=new_choices, value=new_value, label="Select QA Prompt Template"),
684
+ gr.Textbox(value=new_prompt, label="QA Prompt ", lines=3, max_lines=10),
685
  gr.Textbox(label=new_label, placeholder=new_placeholder, lines=3, max_lines=8)
686
  )
687
 
 
692
  outputs=[qa_model_name]
693
  )
694
 
695
+ # Update QA Prompt
696
  qa_prompt_template.change(
697
  fn=update_qa_prompt_content,
698
  inputs=[qa_prompt_template],
 
709
  # QA submit button event
710
  qa_submit_btn.click(
711
  fn=submit_qa_request,
712
+ inputs=[qa_model_series, qa_model_name, qa_type, qa_prompt_template, qa_prompt_content, geological_text, question_or_statement],
713
  outputs=[qa_output]
714
  )
715
 
 
719
  outputs=[geological_text, question_or_statement, qa_output]
720
  )
721
 
722
+ # QA回车键提交
723
  question_or_statement.submit(
724
  fn=submit_qa_request,
725
+ inputs=[qa_model_series, qa_model_name, qa_type, qa_prompt_template, qa_prompt_content, geological_text, question_or_statement],
726
  outputs=[qa_output]
727
  )
728
 
729
  return demo
730
 
 
731
  if __name__ == "__main__":
732
  # Launch interface with password protection
733
  demo = create_interface()
 
735
  server_port=7860,
736
  share=True,
737
  debug=True,
738
+ # auth=("geollm", "research2025"),
739
+ # auth_message="Please enter credentials to access GeoLLM Geological Intelligence Platform"
740
  )