Zaious commited on
Commit
aa8349d
·
verified ·
1 Parent(s): 77eeda3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -139
app.py CHANGED
@@ -5,30 +5,16 @@ import pandas as pd
5
  import numpy as np
6
  import os
7
  from openai import OpenAI
8
- from typing import List, Dict
9
  import pickle
10
- import time
11
- from sklearn.metrics.pairwise import cosine_similarity
12
- from huggingface_hub import HfApi, hf_hub_download, upload_file
13
- from pathlib import Path
14
  import faiss
15
 
16
  # Initialize OpenAI client
17
  client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
18
 
19
- def get_embedding(text: str, model="text-embedding-ada-002"):
20
- """Get embeddings for a text using OpenAI's API"""
21
- try:
22
- text = text.replace("\n", " ")
23
- response = client.embeddings.create(
24
- input=[text],
25
- model=model
26
- )
27
- return response.data[0].embedding
28
- except Exception as e:
29
- print(f"Error getting embedding: {e}")
30
- return None
31
-
32
  def initialize_embeddings_from_pkl(pkl_path: str, faiss_path: str):
33
  """從 PKL 檔案載入嵌入,並加載 FAISS 索引"""
34
  if not os.path.exists(pkl_path):
@@ -47,21 +33,34 @@ def initialize_embeddings_from_pkl(pkl_path: str, faiss_path: str):
47
  print(f"FAISS index loaded with {index.ntotal} embeddings.")
48
  return index, embeddings
49
 
50
- # Load CSV data
51
- #df = pd.read_csv("item_new.csv", encoding='utf-8')
52
-
53
 
54
- # Load embeddings from FAISS
55
  print("Initializing embeddings...")
56
- faiss_path = "product_index.faiss" # Path to FAISS index file
57
- pkl_path = "product_embeddings.pkl" # Path to embeddings file
58
-
59
- # 初始化嵌入
60
  faiss_index, product_embeddings_array = initialize_embeddings_from_pkl(pkl_path, faiss_path)
 
61
  print(f"Loaded embeddings with shape: {len(product_embeddings_array)} x {len(product_embeddings_array[0])}")
62
  print("Embeddings initialized")
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def find_similar_products(query_embedding, top_k=8):
66
  """Find most similar products using FAISS index"""
67
  if faiss_index is None:
@@ -74,18 +73,19 @@ def find_similar_products(query_embedding, top_k=8):
74
  distances, indices = faiss_index.search(query_embedding, top_k)
75
 
76
  # Retrieve metadata for matching products
77
- matching_products = [product_metadata_list[idx] for idx in indices[0]]
78
- matching_distances = distances[0]
79
 
80
- return matching_products, matching_distances
81
 
82
- # Rest of the code remains the same...
 
83
  def analyze_query_and_find_products(query: str) -> str:
84
  if not query.strip():
85
  return "請輸入您的問題或搜尋需求"
86
 
87
  try:
88
- # First, analyze the query to understand intent
89
  analysis_messages = [
90
  {"role": "system", "content": f"""You are a knowledgeable shopping assistant.
91
  When given a query:
@@ -105,40 +105,15 @@ def analyze_query_and_find_products(query: str) -> str:
105
 
106
  analysis = analysis_response.choices[0].message.content
107
 
108
- print("現在要比對向量的是<analysis>:" + analysis)
109
 
110
- # Get embedding for the query
111
  query_embedding = get_embedding(query + " " + analysis)
112
 
113
  # Find similar products
114
- matching_products, similarities = find_similar_products(query_embedding)
115
  print(f"Found {len(matching_products)} matching products")
116
 
117
- # Get recommendations based on found products
118
- product_descriptions = "\n".join([
119
- f"- {row['item_id']} ({row['item_name']})"
120
- for _, row in matching_products.iterrows()
121
- ])
122
-
123
- recommendation_messages = [
124
- {"role": "system", "content": f"""Based on the query and available products,
125
- provide helpful recommendations and tips. Consider:
126
- 1. How the products can be used
127
- 2. What to look for when choosing
128
- 3. Alternative options if available
129
- Respond in Traditional Chinese."""},
130
- {"role": "user", "content": f"""Query: {query}
131
- Analysis: {analysis}
132
- Available products: {product_descriptions}"""}
133
- ]
134
-
135
- recommendation_response = client.chat.completions.create(
136
- model="gpt-4o",
137
- messages=recommendation_messages,
138
- temperature=0.7,
139
- max_tokens=250
140
- )
141
-
142
  # Format the response
143
  response_parts = [
144
  "🔍 需求分析:",
@@ -146,54 +121,40 @@ def analyze_query_and_find_products(query: str) -> str:
146
  "\n📦 相關商品推薦:\n"
147
  ]
148
 
149
- for (_, product), similarity in zip(matching_products.iterrows(), similarities):
150
- confidence = similarity * 100
151
  product_info = f"""
152
- • {product['item_name']}
153
- 分類: {product['tags']}
154
- 規格: {product['description']}
155
- 相關度: {confidence:.1f}%"""
 
156
  response_parts.append(product_info)
157
 
158
- response_parts.extend([
159
- "\n💡 購物建議:",
160
- recommendation_response.choices[0].message.content
161
- ])
162
-
163
  return "\n".join(response_parts)
164
 
165
  except Exception as e:
166
  print(f"Error in search: {str(e)}")
167
  return f"搜尋發生錯誤: {str(e)}"
168
 
169
- # Add system status message
 
170
  def get_system_status():
171
  """Get system initialization status"""
172
- # 判斷是否加載了 FAISS 索引
173
  embeddings_loaded = faiss_index is not None
174
  embedding_count = faiss_index.ntotal if embeddings_loaded else 0
175
-
176
- # 假設 count 是從 .pkl 文件中提取的筆數
177
- pkl_path = "product_embeddings.pkl"
178
- try:
179
- with open(pkl_path, "rb") as f:
180
- embeddings = pickle.load(f)
181
- if isinstance(embeddings, list):
182
- count = len(embeddings)
183
- elif isinstance(embeddings, np.ndarray):
184
- count = embeddings.shape[0]
185
- else:
186
- count = 0 # 資料格式不明時設為 0
187
- except FileNotFoundError:
188
- count = 0 # 如果文件不存在,設為 0
189
-
190
  return {
191
  "embeddings_loaded": embeddings_loaded,
192
  "embedding_count": embedding_count,
193
- "product_count": count # 新增 count
194
  }
195
 
196
- # Modified interface with status
 
197
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
198
  gr.Markdown(
199
  """
@@ -218,68 +179,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
218
 
219
  # Main interface
220
  with gr.Column():
221
- # Input area
222
  input_text = gr.Textbox(
223
  label="請輸入您的問題或需求",
224
- placeholder="您可以詢問任何商品相關的問題,例如:\n- 想找一些適合做便當的食材\n- 需要營養均衡的食材\n- 想買一些新鮮的海鮮\n- 有什麼適合老人家的食物",
225
  lines=3
226
  )
227
-
228
- # Buttons
229
- with gr.Row():
230
- submit_btn = gr.Button("搜尋", variant="primary")
231
- clear_btn = gr.Button("清除")
232
-
233
- # Output area
234
  output_text = gr.Textbox(
235
  label="分析結果與建議",
236
- lines=25
237
  )
 
238
 
239
- # Clear function
240
- def clear_inputs():
241
- return {"input_text": "", "output_text": ""}
242
-
243
- # Setup button actions
244
  submit_btn.click(
245
  fn=analyze_query_and_find_products,
246
  inputs=input_text,
247
- outputs=output_text,
248
- api_name="search" # This enables API access
249
- )
250
-
251
- clear_btn.click(
252
- fn=clear_inputs,
253
- inputs=[],
254
- outputs=[input_text, output_text],
255
- api_name="clear"
256
  )
257
 
258
- # Examples section
259
- gr.Markdown("### 搜尋範例")
260
- with gr.Row():
261
- examples = gr.Examples(
262
- examples=[
263
- ["想找一些適合做便當的食材"],
264
- ["需要一些營養均衡的食物"],
265
- ["有沒有適合老人家吃的食物?"],
266
- ["想買一些新鮮的海鮮,有什麼推薦?"],
267
- ["最近感冒了,有什麼食材可以幫助恢復?"],
268
- ],
269
- inputs=input_text,
270
- outputs=output_text,
271
- fn=analyze_query_and_find_products,
272
- cache_examples=True
273
- )
274
-
275
- # Footer
276
- gr.Markdown(
277
- """
278
- ---
279
- 系統���用AI語意分析技術,能更好地理解您的需求並提供相關建議。
280
- 如有任何問題或建議,歡迎反饋。
281
- """
282
- )
283
 
284
  # Launch the app
285
- demo.launch()
 
5
  import numpy as np
6
  import os
7
  from openai import OpenAI
 
8
  import pickle
 
 
 
 
9
  import faiss
10
 
11
  # Initialize OpenAI client
12
  client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
13
 
14
+ # Load Excel data
15
+ df = pd.read_excel("your_file.xlsx") # 修改為你的 xlsx 文件路徑
16
+
17
+ # Initialize FAISS Index
 
 
 
 
 
 
 
 
 
18
  def initialize_embeddings_from_pkl(pkl_path: str, faiss_path: str):
19
  """從 PKL 檔案載入嵌入,並加載 FAISS 索引"""
20
  if not os.path.exists(pkl_path):
 
33
  print(f"FAISS index loaded with {index.ntotal} embeddings.")
34
  return index, embeddings
35
 
36
+ # Paths to FAISS index and PKL file
37
+ faiss_path = "product_index.faiss" # 修改為你的 FAISS 索引文件路徑
38
+ pkl_path = "product_embeddings.pkl" # 修改為你的嵌入文件路徑
39
 
40
+ # Initialize embeddings
41
  print("Initializing embeddings...")
 
 
 
 
42
  faiss_index, product_embeddings_array = initialize_embeddings_from_pkl(pkl_path, faiss_path)
43
+ assert faiss_index.ntotal == len(df), "FAISS 索引與 xlsx 文件的行數不一致!"
44
  print(f"Loaded embeddings with shape: {len(product_embeddings_array)} x {len(product_embeddings_array[0])}")
45
  print("Embeddings initialized")
46
 
47
 
48
+ # Generate embeddings for query
49
+ def get_embedding(text: str, model="text-embedding-ada-002"):
50
+ """Get embeddings for a text using OpenAI's API"""
51
+ try:
52
+ text = text.replace("\n", " ")
53
+ response = client.embeddings.create(
54
+ input=[text],
55
+ model=model
56
+ )
57
+ return response.data[0].embedding
58
+ except Exception as e:
59
+ print(f"Error getting embedding: {e}")
60
+ return None
61
+
62
+
63
+ # Find similar products
64
  def find_similar_products(query_embedding, top_k=8):
65
  """Find most similar products using FAISS index"""
66
  if faiss_index is None:
 
73
  distances, indices = faiss_index.search(query_embedding, top_k)
74
 
75
  # Retrieve metadata for matching products
76
+ matching_products = df.iloc[indices[0]].copy() # df 中提取對應的行
77
+ matching_products["similarity"] = distances[0] # 加入相似度數據
78
 
79
+ return matching_products
80
 
81
+
82
+ # Analyze query and find products
83
  def analyze_query_and_find_products(query: str) -> str:
84
  if not query.strip():
85
  return "請輸入您的問題或搜尋需求"
86
 
87
  try:
88
+ # Analyze the query to understand intent
89
  analysis_messages = [
90
  {"role": "system", "content": f"""You are a knowledgeable shopping assistant.
91
  When given a query:
 
105
 
106
  analysis = analysis_response.choices[0].message.content
107
 
108
+ print("AAA:" + analysis)
109
 
110
+ # Generate embedding for the query
111
  query_embedding = get_embedding(query + " " + analysis)
112
 
113
  # Find similar products
114
+ matching_products = find_similar_products(query_embedding)
115
  print(f"Found {len(matching_products)} matching products")
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  # Format the response
118
  response_parts = [
119
  "🔍 需求分析:",
 
121
  "\n📦 相關商品推薦:\n"
122
  ]
123
 
124
+ for _, row in matching_products.iterrows():
 
125
  product_info = f"""
126
+ • {row['item_name']}
127
+ 描述: {row['description']}
128
+ 分類: {row['tags']}
129
+ 規格: {row['spec']}
130
+ 相似度: {row['similarity']:.2f}"""
131
  response_parts.append(product_info)
132
 
133
+ response_parts.append("\n💡 購物建議:")
134
+ response_parts.append("根據您的需求,以上是推薦的商品!")
135
+
 
 
136
  return "\n".join(response_parts)
137
 
138
  except Exception as e:
139
  print(f"Error in search: {str(e)}")
140
  return f"搜尋發生錯誤: {str(e)}"
141
 
142
+
143
+ # Get system status
144
  def get_system_status():
145
  """Get system initialization status"""
 
146
  embeddings_loaded = faiss_index is not None
147
  embedding_count = faiss_index.ntotal if embeddings_loaded else 0
148
+ product_count = len(df)
149
+
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  return {
151
  "embeddings_loaded": embeddings_loaded,
152
  "embedding_count": embedding_count,
153
+ "product_count": product_count
154
  }
155
 
156
+
157
+ # Gradio Interface
158
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
159
  gr.Markdown(
160
  """
 
179
 
180
  # Main interface
181
  with gr.Column():
 
182
  input_text = gr.Textbox(
183
  label="請輸入您的問題或需求",
184
+ placeholder="例如:需要適合便當的食材",
185
  lines=3
186
  )
 
 
 
 
 
 
 
187
  output_text = gr.Textbox(
188
  label="分析結果與建議",
189
+ lines=20
190
  )
191
+ submit_btn = gr.Button("搜尋")
192
 
 
 
 
 
 
193
  submit_btn.click(
194
  fn=analyze_query_and_find_products,
195
  inputs=input_text,
196
+ outputs=output_text
 
 
 
 
 
 
 
 
197
  )
198
 
199
+ gr.Markdown("--- 系統使用 AI 分析需求並推薦商品。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  # Launch the app
202
+ demo.launch()