Spaces:

Shirjannn
/

Derma

Runtime error

App Files Files Community

Derma / app.py

Shirjannn

Update app.py

a483276 verified 5 months ago

raw

history blame contribute delete

3.56 kB

	# ===============================
	# Derma Space: Dataset + Gradio Chatbot
	# ===============================

	import json
	import random
	import os
	import gradio as gr
	from datasets import load_dataset
	from huggingface_hub import HfApi, login, upload_file

	# ------------------------
	# 1️⃣ ورود با Secret
	# ------------------------
	hf_token = os.environ.get("HF_TOKEN", None)
	if hf_token is None:
	raise ValueError("HF_TOKEN not found in Secrets. Please add it in Space settings.")
	login(token=hf_token)

	# ------------------------
	# 2️⃣ ساخت دیتاست ترکیبی امن
	# ------------------------
	def build_dataset():
	print("Creating a small general dataset...")
	general_examples = [
	{"domain":"general", "context":"Hello, how are you?", "response":"I'm good, thank you!"},
	{"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
	]

	# ----- Dermatology QA (Mreeb)
	print("Loading Dermatology QA (Mreeb)...")
	derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
	print("Columns in Mreeb dataset:", derma.column_names)

	derma_examples = []
	for item in derma:
	q = item.get('prompt') # Mreeb uses 'prompt' instead of 'question'
	a = item.get('response')
	if q and a:
	derma_examples.append({"domain":"dermatology","context":q,"response":a})

	all_examples = general_examples + derma_examples
	random.shuffle(all_examples)

	# ذخیره به JSONL
	output_file = "derma_chat_mix.jsonl"
	with open(output_file, 'w', encoding='utf-8') as f:
	for ex in all_examples:
	f.write(json.dumps(ex, ensure_ascii=False) + "\n")
	print(f"✅ Dataset saved locally as {output_file} ({len(all_examples)} examples)")

	# آپلود به HF
	repo_id = "username/Derma" # تغییر بده به نام کاربری خودت
	api = HfApi()
	api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
	upload_file(
	path_or_fileobj=output_file,
	path_in_repo=output_file,
	repo_id=repo_id,
	repo_type="dataset",
	commit_message="Initial upload of text-based chat dataset"
	)
	print(f"✅ Dataset uploaded: https://huggingface.co/datasets/{repo_id}")

	# ------------------------
	# 3️⃣ چت ساده با Gradio
	# ------------------------
	def simple_chat(user_input):
	with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
	data = [json.loads(line) for line in f]

	best_match = None
	max_overlap = 0
	for item in data:
	overlap = len(set(user_input.lower().split()) & set(item['context'].lower().split()))
	if overlap > max_overlap:
	max_overlap = overlap
	best_match = item['response']

	if best_match:
	return best_match
	else:
	return "Sorry, I don't have a good answer for that. Try another question!"

	# ------------------------
	# 4️⃣ راه‌اندازی Gradio
	# ------------------------
	iface = gr.Interface(
	fn=simple_chat,
	inputs=gr.Textbox(lines=2, placeholder="Ask about dermatology or chat casually..."),
	outputs=gr.Textbox(label="Derma ChatBot"),
	title="Derma ChatBot",
	description="A simple English chatbot combining general conversation + dermatology QA."
	)

	# ------------------------
	# 5️⃣ اجرای دیتاست + رابط
	# ------------------------
	if __name__ == "__main__":
	if not os.path.exists("derma_chat_mix.jsonl"):
	build_dataset()
	iface.launch()