Spaces:

AshkanTaghipour
/

GeoLLM

Sleeping

App Files Files Community

GeoLLM / app.py

AshkanTaghipour

Switch map from PNG (3.2MB) to JPG (521KB) for faster loading

228915a verified 20 days ago

raw

history blame contribute delete

6.98 kB

	"""
	GeoLLM Demo — Mineral Exploration Geology Assistant

	Gradio chat interface for GeoLLM-Qwen3.5-0.8B running on CPU with transformers.
	Designed for HuggingFace Spaces free tier (2 vCPUs, 16 GB RAM).
	"""

	import threading

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	# ---------------------------------------------------------------------------
	# Configuration
	# ---------------------------------------------------------------------------

	MODEL_ID = "AshkanTaghipour/GeoLLM-Qwen3.5-0.8B"

	SYSTEM_PROMPT = (
	"You are a specialist geologist and exploration consultant with over "
	"10 years of experience in Western Australian and Queensland mineral "
	"exploration. You provide expert advice on geological interpretation, "
	"exploration methods, deposit models, geochemistry, geophysics, and "
	"drilling strategies. You answer like a knowledgeable colleague — concise, "
	"technically specific, and grounded in real geological data."
	)

	EXAMPLES = [
	"What geophysical methods target komatiite-hosted nickel sulphides in the Eastern Goldfields?",
	"What are the key pathfinder elements for orogenic gold in the Yilgarn Craton?",
	"How would you design a soil geochemistry survey for lithium pegmatite exploration?",
	"What structural controls are important for VMS base metal deposits?",
	"Explain the difference between IOCG and orogenic gold deposit models.",
	]


	# ---------------------------------------------------------------------------
	# Model loading
	# ---------------------------------------------------------------------------

	print(f"Loading {MODEL_ID} ...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True,
	)
	model.eval()
	print("Model ready.")

	# ---------------------------------------------------------------------------
	# Inference
	# ---------------------------------------------------------------------------


	def respond(message, chat_history):
	"""Stream a response for a geology question."""
	if not message.strip():
	yield "", chat_history
	return

	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	for user_msg, assistant_msg in chat_history:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=False,
	)
	inputs = tokenizer(text, return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(
	tokenizer, skip_prompt=True, skip_special_tokens=True,
	)

	generate_kwargs = dict(
	**inputs,
	max_new_tokens=512,
	temperature=0.6,
	top_p=0.95,
	do_sample=True,
	streamer=streamer,
	)

	thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
	thread.start()

	chat_history = chat_history + [[message, ""]]
	for new_text in streamer:
	chat_history[-1][1] += new_text
	yield "", chat_history

	thread.join()


	# ---------------------------------------------------------------------------
	# Gradio UI
	# ---------------------------------------------------------------------------

	theme = gr.themes.Soft(
	font=gr.themes.GoogleFont("Sora"),
	font_mono=gr.themes.GoogleFont("JetBrains Mono"),
	)

	with gr.Blocks(theme=theme, title="GeoLLM") as demo:

	# --- Header ---
	gr.Markdown(
	"# \u26cf\ufe0f GeoLLM — Mineral Exploration Geology Assistant"
	)

	# --- Intro + Map side by side ---
	with gr.Row():
	with gr.Column(scale=3):
	gr.Markdown(
	"\U0001f30f Domain-adapted LLM for mineral exploration geology.\n\n"
	"Ask questions about deposit models, geochemistry, geophysics, "
	"drilling strategies, and regional geology — the model answers "
	"like a knowledgeable colleague.\n\n"
	"\U0001f4da Trained on 479 expert QA pairs from "
	"~300 Western Australian exploration reports "
	"([WAMEX](https://www.dmp.wa.gov.au/WAMEX-Minerals-Exploration-1476.aspx)).\n\n"
	"\U0001f9e0 Model: "
	"[GeoLLM-Qwen3.5-0.8B](https://huggingface.co/AshkanTaghipour/GeoLLM-Qwen3.5-0.8B) "
	"(0.8B params)  \|\u00a0 "
	"\U0001f4bb Code: "
	"[GitHub](https://github.com/AshkanTaghipour/GeoLLM-Qwen3.5-FineTune)  \|\u00a0 "
	"\U0001f4e6 Dataset: "
	"[HuggingFace](https://huggingface.co/datasets/AshkanTaghipour/mineral-exploration-geology-qa)\n\n"
	"\u23f3 Running on free CPU — responses may take 30\u201360 seconds."
	)
	with gr.Column(scale=1, min_width=200):
	gr.Image(
	value="wa_mining_map.jpg",
	show_label=False,
	show_download_button=False,
	show_share_button=False,
	container=False,
	height=220,
	)
	gr.Markdown(
	"<center>\U0001f4cd <b>Focus: Western Australia</b><br>"
	"Yilgarn \u00b7 Pilbara \u00b7 Murchison \u00b7 Gascoyne</center>"
	)

	# --- Chat ---
	chatbot = gr.Chatbot(height=450)
	msg = gr.Textbox(
	placeholder="\u270d\ufe0f Ask a geology question...",
	show_label=False,
	container=False,
	)

	with gr.Row():
	submit_btn = gr.Button("\U0001f680 Submit", variant="primary")
	clear_btn = gr.Button("\U0001f5d1\ufe0f Clear")

	# --- Examples ---
	gr.Markdown("\U0001f4a1 Example questions:")
	with gr.Row():
	for ex in EXAMPLES:
	btn = gr.Button(ex, size="sm")
	btn.click(
	lambda e=ex: e, inputs=[], outputs=[msg], api_name=False,
	).then(
	respond, inputs=[msg, chatbot], outputs=[msg, chatbot], api_name=False,
	)

	# --- Footer ---
	gr.Markdown(
	"---\n"
	"Built by [Ashkan Taghipour](https://github.com/AshkanTaghipour) "
	"\u00b7 Powered by [Qwen3.5](https://huggingface.co/Qwen) + "
	"[LoRA fine-tuning](https://github.com/AshkanTaghipour/GeoLLM-Qwen3.5-FineTune)"
	)

	# --- Wire up actions (api_name=False avoids Gradio schema bug) ---
	msg.submit(respond, [msg, chatbot], [msg, chatbot], api_name=False)
	submit_btn.click(respond, [msg, chatbot], [msg, chatbot], api_name=False)
	clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg], api_name=False)

	if __name__ == "__main__":
	demo.launch()