File size: 3,561 Bytes
387028e 88a2e7f 387028e a502e92 88a2e7f 1cbb046 387028e 2d218e5 88a2e7f a502e92 1cbb046 a502e92 88a2e7f a502e92 a483276 a502e92 a483276 1cbb046 88a2e7f 1cbb046 88a2e7f 1cbb046 387028e 1cbb046 387028e a502e92 387028e a502e92 387028e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# ===============================
# Derma Space: Dataset + Gradio Chatbot
# ===============================
import json
import random
import os
import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, login, upload_file
# ------------------------
# 1️⃣ ورود با Secret
# ------------------------
hf_token = os.environ.get("HF_TOKEN", None)
if hf_token is None:
raise ValueError("HF_TOKEN not found in Secrets. Please add it in Space settings.")
login(token=hf_token)
# ------------------------
# 2️⃣ ساخت دیتاست ترکیبی امن
# ------------------------
def build_dataset():
print("Creating a small general dataset...")
general_examples = [
{"domain":"general", "context":"Hello, how are you?", "response":"I'm good, thank you!"},
{"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
]
# ----- Dermatology QA (Mreeb)
print("Loading Dermatology QA (Mreeb)...")
derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
print("Columns in Mreeb dataset:", derma.column_names)
derma_examples = []
for item in derma:
q = item.get('prompt') # Mreeb uses 'prompt' instead of 'question'
a = item.get('response')
if q and a:
derma_examples.append({"domain":"dermatology","context":q,"response":a})
all_examples = general_examples + derma_examples
random.shuffle(all_examples)
# ذخیره به JSONL
output_file = "derma_chat_mix.jsonl"
with open(output_file, 'w', encoding='utf-8') as f:
for ex in all_examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"✅ Dataset saved locally as {output_file} ({len(all_examples)} examples)")
# آپلود به HF
repo_id = "username/Derma" # تغییر بده به نام کاربری خودت
api = HfApi()
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
upload_file(
path_or_fileobj=output_file,
path_in_repo=output_file,
repo_id=repo_id,
repo_type="dataset",
commit_message="Initial upload of text-based chat dataset"
)
print(f"✅ Dataset uploaded: https://huggingface.co/datasets/{repo_id}")
# ------------------------
# 3️⃣ چت ساده با Gradio
# ------------------------
def simple_chat(user_input):
with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
data = [json.loads(line) for line in f]
best_match = None
max_overlap = 0
for item in data:
overlap = len(set(user_input.lower().split()) & set(item['context'].lower().split()))
if overlap > max_overlap:
max_overlap = overlap
best_match = item['response']
if best_match:
return best_match
else:
return "Sorry, I don't have a good answer for that. Try another question!"
# ------------------------
# 4️⃣ راهاندازی Gradio
# ------------------------
iface = gr.Interface(
fn=simple_chat,
inputs=gr.Textbox(lines=2, placeholder="Ask about dermatology or chat casually..."),
outputs=gr.Textbox(label="Derma ChatBot"),
title="Derma ChatBot",
description="A simple English chatbot combining general conversation + dermatology QA."
)
# ------------------------
# 5️⃣ اجرای دیتاست + رابط
# ------------------------
if __name__ == "__main__":
if not os.path.exists("derma_chat_mix.jsonl"):
build_dataset()
iface.launch()
|