Paul720810 commited on
Commit
f80782b
·
verified ·
1 Parent(s): 2251faa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -144,7 +144,7 @@ class TextToSQLSystem:
144
  try:
145
  dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
146
  dataset = dataset.filter(lambda ex: isinstance(ex.get("messages"), list) and len(ex["messages"]) >= 2)
147
- corpus = [item['messages']['content'] for item in dataset]
148
  self._log(f"正在編碼 {len(corpus)} 個問題...")
149
  all_embeddings = torch.cat([self._encode_texts(corpus[i:i+32]) for i in range(0, len(corpus), 32)], dim=0).numpy()
150
  index = faiss.IndexFlatIP(all_embeddings.shape[1])
@@ -191,8 +191,8 @@ class TextToSQLSystem:
191
  if idx >= len(self.dataset): continue
192
  item = self.dataset[idx]
193
  if not (isinstance(item.get('messages'), list) and len(item['messages']) >= 2): continue
194
- q_content = (item['messages']['content'] or '').strip()
195
- a_content = (item['messages'].get('content') or '').strip()
196
  if not q_content or not a_content: continue
197
  clean_q = re.sub(r"以下是一個SQL查詢任務:\s*指令:\s*", "", q_content).strip()
198
  if clean_q in seen_questions: continue
 
144
  try:
145
  dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
146
  dataset = dataset.filter(lambda ex: isinstance(ex.get("messages"), list) and len(ex["messages"]) >= 2)
147
+ corpus = [item['messages'][0]['content'] for item in dataset]
148
  self._log(f"正在編碼 {len(corpus)} 個問題...")
149
  all_embeddings = torch.cat([self._encode_texts(corpus[i:i+32]) for i in range(0, len(corpus), 32)], dim=0).numpy()
150
  index = faiss.IndexFlatIP(all_embeddings.shape[1])
 
191
  if idx >= len(self.dataset): continue
192
  item = self.dataset[idx]
193
  if not (isinstance(item.get('messages'), list) and len(item['messages']) >= 2): continue
194
+ q_content = (item['messages'][0].get('content') or '').strip()
195
+ a_content = (item['messages'][1].get('content') or '').strip()
196
  if not q_content or not a_content: continue
197
  clean_q = re.sub(r"以下是一個SQL查詢任務:\s*指令:\s*", "", q_content).strip()
198
  if clean_q in seen_questions: continue