GeoLLM / app.py
AshkanTaghipour's picture
Switch map from PNG (3.2MB) to JPG (521KB) for faster loading
228915a verified
"""
GeoLLM Demo — Mineral Exploration Geology Assistant
Gradio chat interface for GeoLLM-Qwen3.5-0.8B running on CPU with transformers.
Designed for HuggingFace Spaces free tier (2 vCPUs, 16 GB RAM).
"""
import threading
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
MODEL_ID = "AshkanTaghipour/GeoLLM-Qwen3.5-0.8B"
SYSTEM_PROMPT = (
"You are a specialist geologist and exploration consultant with over "
"10 years of experience in Western Australian and Queensland mineral "
"exploration. You provide expert advice on geological interpretation, "
"exploration methods, deposit models, geochemistry, geophysics, and "
"drilling strategies. You answer like a knowledgeable colleague — concise, "
"technically specific, and grounded in real geological data."
)
EXAMPLES = [
"What geophysical methods target komatiite-hosted nickel sulphides in the Eastern Goldfields?",
"What are the key pathfinder elements for orogenic gold in the Yilgarn Craton?",
"How would you design a soil geochemistry survey for lithium pegmatite exploration?",
"What structural controls are important for VMS base metal deposits?",
"Explain the difference between IOCG and orogenic gold deposit models.",
]
# ---------------------------------------------------------------------------
# Model loading
# ---------------------------------------------------------------------------
print(f"Loading {MODEL_ID} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True,
)
model.eval()
print("Model ready.")
# ---------------------------------------------------------------------------
# Inference
# ---------------------------------------------------------------------------
def respond(message, chat_history):
"""Stream a response for a geology question."""
if not message.strip():
yield "", chat_history
return
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user_msg, assistant_msg in chat_history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False,
)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(
tokenizer, skip_prompt=True, skip_special_tokens=True,
)
generate_kwargs = dict(
**inputs,
max_new_tokens=512,
temperature=0.6,
top_p=0.95,
do_sample=True,
streamer=streamer,
)
thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
chat_history = chat_history + [[message, ""]]
for new_text in streamer:
chat_history[-1][1] += new_text
yield "", chat_history
thread.join()
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
theme = gr.themes.Soft(
font=gr.themes.GoogleFont("Sora"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
)
with gr.Blocks(theme=theme, title="GeoLLM") as demo:
# --- Header ---
gr.Markdown(
"# \u26cf\ufe0f GeoLLM — Mineral Exploration Geology Assistant"
)
# --- Intro + Map side by side ---
with gr.Row():
with gr.Column(scale=3):
gr.Markdown(
"\U0001f30f **Domain-adapted LLM for mineral exploration geology.**\n\n"
"Ask questions about deposit models, geochemistry, geophysics, "
"drilling strategies, and regional geology — the model answers "
"like a knowledgeable colleague.\n\n"
"\U0001f4da Trained on **479 expert QA pairs** from "
"~300 Western Australian exploration reports "
"([WAMEX](https://www.dmp.wa.gov.au/WAMEX-Minerals-Exploration-1476.aspx)).\n\n"
"\U0001f9e0 **Model:** "
"[GeoLLM-Qwen3.5-0.8B](https://huggingface.co/AshkanTaghipour/GeoLLM-Qwen3.5-0.8B) "
"(0.8B params)  |\u00a0 "
"\U0001f4bb **Code:** "
"[GitHub](https://github.com/AshkanTaghipour/GeoLLM-Qwen3.5-FineTune)  |\u00a0 "
"\U0001f4e6 **Dataset:** "
"[HuggingFace](https://huggingface.co/datasets/AshkanTaghipour/mineral-exploration-geology-qa)\n\n"
"\u23f3 *Running on free CPU — responses may take 30\u201360 seconds.*"
)
with gr.Column(scale=1, min_width=200):
gr.Image(
value="wa_mining_map.jpg",
show_label=False,
show_download_button=False,
show_share_button=False,
container=False,
height=220,
)
gr.Markdown(
"<center>\U0001f4cd <b>Focus: Western Australia</b><br>"
"Yilgarn \u00b7 Pilbara \u00b7 Murchison \u00b7 Gascoyne</center>"
)
# --- Chat ---
chatbot = gr.Chatbot(height=450)
msg = gr.Textbox(
placeholder="\u270d\ufe0f Ask a geology question...",
show_label=False,
container=False,
)
with gr.Row():
submit_btn = gr.Button("\U0001f680 Submit", variant="primary")
clear_btn = gr.Button("\U0001f5d1\ufe0f Clear")
# --- Examples ---
gr.Markdown("**\U0001f4a1 Example questions:**")
with gr.Row():
for ex in EXAMPLES:
btn = gr.Button(ex, size="sm")
btn.click(
lambda e=ex: e, inputs=[], outputs=[msg], api_name=False,
).then(
respond, inputs=[msg, chatbot], outputs=[msg, chatbot], api_name=False,
)
# --- Footer ---
gr.Markdown(
"---\n"
"Built by **[Ashkan Taghipour](https://github.com/AshkanTaghipour)** "
"\u00b7 Powered by [Qwen3.5](https://huggingface.co/Qwen) + "
"[LoRA fine-tuning](https://github.com/AshkanTaghipour/GeoLLM-Qwen3.5-FineTune)"
)
# --- Wire up actions (api_name=False avoids Gradio schema bug) ---
msg.submit(respond, [msg, chatbot], [msg, chatbot], api_name=False)
submit_btn.click(respond, [msg, chatbot], [msg, chatbot], api_name=False)
clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg], api_name=False)
if __name__ == "__main__":
demo.launch()