Derma / app.py
Shirjannn's picture
Update app.py
a483276 verified
# ===============================
# Derma Space: Dataset + Gradio Chatbot
# ===============================
import json
import random
import os
import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, login, upload_file
# ------------------------
# 1️⃣ ورود با Secret
# ------------------------
hf_token = os.environ.get("HF_TOKEN", None)
if hf_token is None:
raise ValueError("HF_TOKEN not found in Secrets. Please add it in Space settings.")
login(token=hf_token)
# ------------------------
# 2️⃣ ساخت دیتاست ترکیبی امن
# ------------------------
def build_dataset():
print("Creating a small general dataset...")
general_examples = [
{"domain":"general", "context":"Hello, how are you?", "response":"I'm good, thank you!"},
{"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
]
# ----- Dermatology QA (Mreeb)
print("Loading Dermatology QA (Mreeb)...")
derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
print("Columns in Mreeb dataset:", derma.column_names)
derma_examples = []
for item in derma:
q = item.get('prompt') # Mreeb uses 'prompt' instead of 'question'
a = item.get('response')
if q and a:
derma_examples.append({"domain":"dermatology","context":q,"response":a})
all_examples = general_examples + derma_examples
random.shuffle(all_examples)
# ذخیره به JSONL
output_file = "derma_chat_mix.jsonl"
with open(output_file, 'w', encoding='utf-8') as f:
for ex in all_examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"✅ Dataset saved locally as {output_file} ({len(all_examples)} examples)")
# آپلود به HF
repo_id = "username/Derma" # تغییر بده به نام کاربری خودت
api = HfApi()
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
upload_file(
path_or_fileobj=output_file,
path_in_repo=output_file,
repo_id=repo_id,
repo_type="dataset",
commit_message="Initial upload of text-based chat dataset"
)
print(f"✅ Dataset uploaded: https://huggingface.co/datasets/{repo_id}")
# ------------------------
# 3️⃣ چت ساده با Gradio
# ------------------------
def simple_chat(user_input):
with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
data = [json.loads(line) for line in f]
best_match = None
max_overlap = 0
for item in data:
overlap = len(set(user_input.lower().split()) & set(item['context'].lower().split()))
if overlap > max_overlap:
max_overlap = overlap
best_match = item['response']
if best_match:
return best_match
else:
return "Sorry, I don't have a good answer for that. Try another question!"
# ------------------------
# 4️⃣ راه‌اندازی Gradio
# ------------------------
iface = gr.Interface(
fn=simple_chat,
inputs=gr.Textbox(lines=2, placeholder="Ask about dermatology or chat casually..."),
outputs=gr.Textbox(label="Derma ChatBot"),
title="Derma ChatBot",
description="A simple English chatbot combining general conversation + dermatology QA."
)
# ------------------------
# 5️⃣ اجرای دیتاست + رابط
# ------------------------
if __name__ == "__main__":
if not os.path.exists("derma_chat_mix.jsonl"):
build_dataset()
iface.launch()