Paul720810 commited on
Commit
04ae5ea
·
verified ·
1 Parent(s): 2b8ddf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -108,7 +108,7 @@ class TextToSQLSystem:
108
  try:
109
  dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
110
  dataset = dataset.filter(lambda ex: isinstance(ex.get("messages"), list) and len(ex["messages"]) >= 2)
111
- corpus = [item['messages']['content'] for item in dataset]
112
  self._log(f"正在編碼 {len(corpus)} 個問題...")
113
  all_embeddings = torch.cat([self._encode_texts(corpus[i:i+32]) for i in range(0, len(corpus), 32)], dim=0).numpy()
114
  index = faiss.IndexFlatIP(all_embeddings.shape)
@@ -153,8 +153,8 @@ class TextToSQLSystem:
153
  if idx >= len(self.dataset): continue
154
  item = self.dataset[idx]
155
  if not (isinstance(item.get('messages'), list) and len(item['messages']) >= 2): continue
156
- q_content = (item['messages'].get('content') or '').strip()
157
- a_content = (item['messages'].get('content') or '').strip()
158
  if not q_content or not a_content: continue
159
  clean_q = re.sub(r"以下是一個SQL查詢任務:\s*指令:\s*", "", q_content).strip()
160
  if clean_q in seen_questions: continue
 
108
  try:
109
  dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
110
  dataset = dataset.filter(lambda ex: isinstance(ex.get("messages"), list) and len(ex["messages"]) >= 2)
111
+ corpus = [item['messages'][0]['content'] for item in dataset]
112
  self._log(f"正在編碼 {len(corpus)} 個問題...")
113
  all_embeddings = torch.cat([self._encode_texts(corpus[i:i+32]) for i in range(0, len(corpus), 32)], dim=0).numpy()
114
  index = faiss.IndexFlatIP(all_embeddings.shape)
 
153
  if idx >= len(self.dataset): continue
154
  item = self.dataset[idx]
155
  if not (isinstance(item.get('messages'), list) and len(item['messages']) >= 2): continue
156
+ q_content = (item['messages'][0].get('content') or '').strip()
157
+ a_content = (item['messages'][1].get('content') or '').strip()
158
  if not q_content or not a_content: continue
159
  clean_q = re.sub(r"以下是一個SQL查詢任務:\s*指令:\s*", "", q_content).strip()
160
  if clean_q in seen_questions: continue