Shirjannn commited on
Commit
a502e92
·
verified ·
1 Parent(s): 387028e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -18,7 +18,7 @@ if hf_token is None:
18
  login(token=hf_token)
19
 
20
  # ------------------------
21
- # 2️⃣ ساخت دیتاست ترکیبی
22
  # ------------------------
23
  def build_dataset():
24
  print("Creating a small general dataset...")
@@ -27,15 +27,25 @@ def build_dataset():
27
  {"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
28
  ]
29
 
 
30
  print("Loading Dermatology QA (Mreeb)...")
31
  derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
32
- derma_examples = [{"domain":"dermatology","context":item['question'],"response":item['answer']}
33
- for item in derma]
34
 
 
 
 
 
 
 
 
 
35
  print("Loading MedQuAD subset...")
36
  medquad = load_dataset("pythonafroz/MedQuAD")['train']
37
  derma_keywords = ["skin", "eczema", "psoriasis", "dermatitis", "melanoma", "acne", "rash"]
38
- medquad_derma = [{"domain":"dermatology","context":item['question'],"response":item['answer']}
 
 
39
  for item in medquad if any(k in item['question'].lower() for k in derma_keywords)]
40
  random.shuffle(medquad_derma)
41
  medquad_derma = medquad_derma[:500]
@@ -70,10 +80,9 @@ def build_dataset():
70
  # 3️⃣ چت ساده با Gradio
71
  # ------------------------
72
  def simple_chat(user_input):
73
- # جستجو در دیتاست برای پاسخ نزدیک (ساده)
74
  with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
75
  data = [json.loads(line) for line in f]
76
-
77
  best_match = None
78
  max_overlap = 0
79
  for item in data:
@@ -81,7 +90,7 @@ def simple_chat(user_input):
81
  if overlap > max_overlap:
82
  max_overlap = overlap
83
  best_match = item['response']
84
-
85
  if best_match:
86
  return best_match
87
  else:
 
18
  login(token=hf_token)
19
 
20
  # ------------------------
21
+ # 2️⃣ ساخت دیتاست ترکیبی امن
22
  # ------------------------
23
  def build_dataset():
24
  print("Creating a small general dataset...")
 
27
  {"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."}
28
  ]
29
 
30
+ # ----- Dermatology QA (Mreeb)
31
  print("Loading Dermatology QA (Mreeb)...")
32
  derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train']
33
+ print("Columns in Mreeb dataset:", derma.column_names)
 
34
 
35
+ derma_examples = []
36
+ for item in derma:
37
+ q = item.get('question') or item.get('Question') or item.get('Q')
38
+ a = item.get('answer') or item.get('Answer') or item.get('A')
39
+ if q and a:
40
+ derma_examples.append({"domain":"dermatology","context":q,"response":a})
41
+
42
+ # ----- MedQuAD subset
43
  print("Loading MedQuAD subset...")
44
  medquad = load_dataset("pythonafroz/MedQuAD")['train']
45
  derma_keywords = ["skin", "eczema", "psoriasis", "dermatitis", "melanoma", "acne", "rash"]
46
+ medquad_derma = [{"domain":"dermatology",
47
+ "context":item['question'],
48
+ "response":item['answer']}
49
  for item in medquad if any(k in item['question'].lower() for k in derma_keywords)]
50
  random.shuffle(medquad_derma)
51
  medquad_derma = medquad_derma[:500]
 
80
  # 3️⃣ چت ساده با Gradio
81
  # ------------------------
82
  def simple_chat(user_input):
 
83
  with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f:
84
  data = [json.loads(line) for line in f]
85
+
86
  best_match = None
87
  max_overlap = 0
88
  for item in data:
 
90
  if overlap > max_overlap:
91
  max_overlap = overlap
92
  best_match = item['response']
93
+
94
  if best_match:
95
  return best_match
96
  else: