File size: 6,798 Bytes
e870574
 
7435910
 
 
 
 
 
 
 
 
 
 
cd108c3
7435910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import logging
logging.basicConfig(level=logging.DEBUG)
import gradio as gr
import pandas as pd
import numpy as np
import os
from openai import OpenAI
from typing import List, Dict
import pickle
import time
from sklearn.metrics.pairwise import cosine_similarity

# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Load CSV data
df = pd.read_csv("500筆食材商品.csv", encoding='utf-8')

def create_product_text(row):
    """Create a comprehensive text representation of a product"""
    return f"{row['item_desc']} {row['item_class1_desc']} {row['item_class2_desc']} {row['item_class3_desc']} {str(row['brand'])} {str(row['spec'])}"

def get_embedding(text: str, model="text-embedding-3-small"):
    """Get embeddings for a text using OpenAI's API"""
    try:
        text = text.replace("\n", " ")
        response = client.embeddings.create(
            input=[text],
            model=model
        )
        return response.data[0].embedding
    except Exception as e:
        print(f"Error getting embedding: {e}")
        return None

def initialize_embeddings():
    """Initialize or load product embeddings"""
    embedding_file = 'product_embeddings.pkl'
    
    if os.path.exists(embedding_file):
        print("Loading existing embeddings...")
        with open(embedding_file, 'rb') as f:
            return pickle.load(f)
    
    print("Creating new embeddings...")
    embeddings = []
    for idx, row in df.iterrows():
        product_text = create_product_text(row)
        embedding = get_embedding(product_text)
        if embedding:
            embeddings.append(embedding)
        else:
            embeddings.append([0] * 1536)  # Default embedding dimension
        time.sleep(0.1)  # Rate limiting for API calls
        
    with open(embedding_file, 'wb') as f:
        pickle.dump(embeddings, f)
    
    return embeddings

# Load or create embeddings
product_embeddings = initialize_embeddings()
product_embeddings_array = np.array(product_embeddings)

def find_similar_products(query_embedding, top_k=8):
    """Find most similar products using cosine similarity"""
    similarities = cosine_similarity(
        [query_embedding],
        product_embeddings_array
    )[0]
    
    top_indices = similarities.argsort()[-top_k:][::-1]
    return df.iloc[top_indices], similarities[top_indices]

def analyze_query_and_find_products(query: str) -> str:
    if not query.strip():
        return "請輸入您的問題或搜尋需求"
    
    try:
        # First, analyze the query to understand intent
        analysis_messages = [
            {"role": "system", "content": f"""You are a knowledgeable shopping assistant. 
             When given a query:
             1. Analyze what the user is looking for
             2. Consider practical needs and context
             3. Think about related use cases
             
             Provide your analysis in Traditional Chinese, focusing on understanding user needs."""},
            {"role": "user", "content": f"Analyze this query and explain what the user needs: {query}"}
        ]
        
        analysis_response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=analysis_messages,
            temperature=0.7,
            max_tokens=500
        )
        
        analysis = analysis_response.choices[0].message.content
        
        # Get embedding for the query
        query_embedding = get_embedding(query + " " + analysis)
        
        # Find similar products
        matching_products, similarities = find_similar_products(query_embedding)
        print(f"Found {len(matching_products)} matching products")
        
        # Get recommendations based on found products
        product_descriptions = "\n".join([
            f"- {row['item_desc']} ({row['item_class1_desc']})"
            for _, row in matching_products.iterrows()
        ])
        
        recommendation_messages = [
            {"role": "system", "content": f"""Based on the query and available products,
             provide helpful recommendations and tips. Consider:
             1. How the products can be used
             2. What to look for when choosing
             3. Alternative options if available
             Respond in Traditional Chinese."""},
            {"role": "user", "content": f"""Query: {query}
            Analysis: {analysis}
            Available products: {product_descriptions}"""}
        ]
        
        recommendation_response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=recommendation_messages,
            temperature=0.7,
            max_tokens=250
        )
        
        # Format the response
        response_parts = [
            "🔍 需求分析:",
            analysis,
            "\n📦 相關商品推薦:\n"
        ]
        
        for (_, product), similarity in zip(matching_products.iterrows(), similarities):
            confidence = similarity * 100
            product_info = f"""
{product['item_desc']}
  分類: {product['item_class1_desc']} > {product['item_class2_desc']}
  規格: {product['spec']}
  價格: NT$ {float(product['sales_amt']):,.0f} / {product['unit']}
  相關度: {confidence:.1f}%"""
            response_parts.append(product_info)
        
        response_parts.extend([
            "\n💡 購物建議:",
            recommendation_response.choices[0].message.content
        ])
        
        return "\n".join(response_parts)
    
    except Exception as e:
        print(f"Error in search: {str(e)}")
        return f"搜尋發生錯誤: {str(e)}"

# Create the Gradio interface
demo = gr.Interface(
    fn=analyze_query_and_find_products,
    inputs=gr.Textbox(
        label="請輸入您的問題或需求",
        placeholder="您可以詢問任何商品相關的問題,例如:\n- 想找一些適合做便當的食材\n- 需要營養均衡的食材\n- 想買一些新鮮的海鮮\n- 有什麼適合老人家的食物",
        lines=3
    ),
    outputs=gr.Textbox(
        label="分析結果與建議",
        lines=25
    ),
    title="🛒 智慧商品推薦系統",
    description="""
    輸入您的問題或需求,系統會:
    1. 分析您的需求
    2. 推薦相關商品
    3. 提供實用建議
    
    系統使用AI語意分析技術,能更好地理解您的需求並提供相關建議。
    """,
    examples=[
        ["想找一些適合做便當的食材"],
        ["需要一些營養均衡的食物"],
        ["有沒有適合老人家吃的食物?"],
        ["想買一些新鮮的海鮮,有什麼推薦?"],
        ["最近感冒了,有什麼食材可以幫助恢復?"],
    ],
    cache_examples=True
)

# Launch the app
demo.launch()