File size: 3,561 Bytes
387028e
 
 
 
 
 
 
 
 
 
 
88a2e7f
387028e
 
 
 
 
 
 
 
a502e92
88a2e7f
1cbb046
387028e
2d218e5
 
 
 
88a2e7f
a502e92
1cbb046
 
a502e92
88a2e7f
a502e92
 
a483276
 
a502e92
 
 
a483276
1cbb046
88a2e7f
1cbb046
 
 
 
 
 
88a2e7f
1cbb046
387028e
1cbb046
 
 
 
 
 
 
 
 
 
387028e
 
 
 
 
 
 
a502e92
387028e
 
 
 
 
 
 
a502e92
387028e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# ===============================
# Derma Space: Dataset + Gradio Chatbot
# ===============================

import json
import random
import os
import gradio as gr
from datasets import load_dataset
from huggingface_hub import HfApi, login, upload_file

# ------------------------
# 1️⃣ ورود با Secret
# ------------------------
hf_token = os.environ.get("HF_TOKEN", None)
if hf_token is None:
    raise ValueError("HF_TOKEN not found in Secrets. Please add it in Space settings.")
login(token=hf_token)

# ------------------------
# 2️⃣ ساخت دیتاست ترکیبی امن
# ------------------------
def build_dataset():
    print("Creating a small general dataset...")
    general_examples = [
        {"domain":"general", "context":"Hello, how are you?", "response":"I'm good, thank you!"},
        {"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
    ]

    # ----- Dermatology QA (Mreeb)
    print("Loading Dermatology QA (Mreeb)...")
    derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
    print("Columns in Mreeb dataset:", derma.column_names)

    derma_examples = []
    for item in derma:
        q = item.get('prompt')  # Mreeb uses 'prompt' instead of 'question'
        a = item.get('response')
        if q and a:
            derma_examples.append({"domain":"dermatology","context":q,"response":a})

    all_examples = general_examples + derma_examples
    random.shuffle(all_examples)

    # ذخیره به JSONL
    output_file = "derma_chat_mix.jsonl"
    with open(output_file, 'w', encoding='utf-8') as f:
        for ex in all_examples:
            f.write(json.dumps(ex, ensure_ascii=False) + "\n")
    print(f"✅ Dataset saved locally as {output_file} ({len(all_examples)} examples)")

    # آپلود به HF
    repo_id = "username/Derma"  # تغییر بده به نام کاربری خودت
    api = HfApi()
    api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
    upload_file(
        path_or_fileobj=output_file,
        path_in_repo=output_file,
        repo_id=repo_id,
        repo_type="dataset",
        commit_message="Initial upload of text-based chat dataset"
    )
    print(f"✅ Dataset uploaded: https://huggingface.co/datasets/{repo_id}")

# ------------------------
# 3️⃣ چت ساده با Gradio
# ------------------------
def simple_chat(user_input):
    with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]

    best_match = None
    max_overlap = 0
    for item in data:
        overlap = len(set(user_input.lower().split()) & set(item['context'].lower().split()))
        if overlap > max_overlap:
            max_overlap = overlap
            best_match = item['response']

    if best_match:
        return best_match
    else:
        return "Sorry, I don't have a good answer for that. Try another question!"

# ------------------------
# 4️⃣ راه‌اندازی Gradio
# ------------------------
iface = gr.Interface(
    fn=simple_chat,
    inputs=gr.Textbox(lines=2, placeholder="Ask about dermatology or chat casually..."),
    outputs=gr.Textbox(label="Derma ChatBot"),
    title="Derma ChatBot",
    description="A simple English chatbot combining general conversation + dermatology QA."
)

# ------------------------
# 5️⃣ اجرای دیتاست + رابط
# ------------------------
if __name__ == "__main__":
    if not os.path.exists("derma_chat_mix.jsonl"):
        build_dataset()
    iface.launch()