Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ if hf_token is None:
|
|
| 18 |
login(token=hf_token)
|
| 19 |
|
| 20 |
# ------------------------
|
| 21 |
-
# 2️⃣ ساخت دیتاست ترکیبی
|
| 22 |
# ------------------------
|
| 23 |
def build_dataset():
|
| 24 |
print("Creating a small general dataset...")
|
|
@@ -27,15 +27,25 @@ def build_dataset():
|
|
| 27 |
{"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
|
| 28 |
]
|
| 29 |
|
|
|
|
| 30 |
print("Loading Dermatology QA (Mreeb)...")
|
| 31 |
derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
|
| 32 |
-
|
| 33 |
-
for item in derma]
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
print("Loading MedQuAD subset...")
|
| 36 |
medquad = load_dataset("pythonafroz/MedQuAD")['train']
|
| 37 |
derma_keywords = ["skin", "eczema", "psoriasis", "dermatitis", "melanoma", "acne", "rash"]
|
| 38 |
-
medquad_derma = [{"domain":"dermatology",
|
|
|
|
|
|
|
| 39 |
for item in medquad if any(k in item['question'].lower() for k in derma_keywords)]
|
| 40 |
random.shuffle(medquad_derma)
|
| 41 |
medquad_derma = medquad_derma[:500]
|
|
@@ -70,10 +80,9 @@ def build_dataset():
|
|
| 70 |
# 3️⃣ چت ساده با Gradio
|
| 71 |
# ------------------------
|
| 72 |
def simple_chat(user_input):
|
| 73 |
-
# جستجو در دیتاست برای پاسخ نزدیک (ساده)
|
| 74 |
with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
|
| 75 |
data = [json.loads(line) for line in f]
|
| 76 |
-
|
| 77 |
best_match = None
|
| 78 |
max_overlap = 0
|
| 79 |
for item in data:
|
|
@@ -81,7 +90,7 @@ def simple_chat(user_input):
|
|
| 81 |
if overlap > max_overlap:
|
| 82 |
max_overlap = overlap
|
| 83 |
best_match = item['response']
|
| 84 |
-
|
| 85 |
if best_match:
|
| 86 |
return best_match
|
| 87 |
else:
|
|
|
|
| 18 |
login(token=hf_token)
|
| 19 |
|
| 20 |
# ------------------------
|
| 21 |
+
# 2️⃣ ساخت دیتاست ترکیبی امن
|
| 22 |
# ------------------------
|
| 23 |
def build_dataset():
|
| 24 |
print("Creating a small general dataset...")
|
|
|
|
| 27 |
{"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
|
| 28 |
]
|
| 29 |
|
| 30 |
+
# ----- Dermatology QA (Mreeb)
|
| 31 |
print("Loading Dermatology QA (Mreeb)...")
|
| 32 |
derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
|
| 33 |
+
print("Columns in Mreeb dataset:", derma.column_names)
|
|
|
|
| 34 |
|
| 35 |
+
derma_examples = []
|
| 36 |
+
for item in derma:
|
| 37 |
+
q = item.get('question') or item.get('Question') or item.get('Q')
|
| 38 |
+
a = item.get('answer') or item.get('Answer') or item.get('A')
|
| 39 |
+
if q and a:
|
| 40 |
+
derma_examples.append({"domain":"dermatology","context":q,"response":a})
|
| 41 |
+
|
| 42 |
+
# ----- MedQuAD subset
|
| 43 |
print("Loading MedQuAD subset...")
|
| 44 |
medquad = load_dataset("pythonafroz/MedQuAD")['train']
|
| 45 |
derma_keywords = ["skin", "eczema", "psoriasis", "dermatitis", "melanoma", "acne", "rash"]
|
| 46 |
+
medquad_derma = [{"domain":"dermatology",
|
| 47 |
+
"context":item['question'],
|
| 48 |
+
"response":item['answer']}
|
| 49 |
for item in medquad if any(k in item['question'].lower() for k in derma_keywords)]
|
| 50 |
random.shuffle(medquad_derma)
|
| 51 |
medquad_derma = medquad_derma[:500]
|
|
|
|
| 80 |
# 3️⃣ چت ساده با Gradio
|
| 81 |
# ------------------------
|
| 82 |
def simple_chat(user_input):
|
|
|
|
| 83 |
with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
|
| 84 |
data = [json.loads(line) for line in f]
|
| 85 |
+
|
| 86 |
best_match = None
|
| 87 |
max_overlap = 0
|
| 88 |
for item in data:
|
|
|
|
| 90 |
if overlap > max_overlap:
|
| 91 |
max_overlap = overlap
|
| 92 |
best_match = item['response']
|
| 93 |
+
|
| 94 |
if best_match:
|
| 95 |
return best_match
|
| 96 |
else:
|