Product_Screener_Embedding

Sleeping

App Files Files Community

Basshole commited on Nov 21, 2024

Commit

88d4bd7

verified ·

1 Parent(s): 2f66663

Upload app.py

Browse files

Files changed (1) hide show

app.py +308 -0

app.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import logging
+logging.basicConfig(level=logging.DEBUG)
+import gradio as gr
+import pandas as pd
+import numpy as np
+import os
+from openai import OpenAI
+from typing import List, Dict
+import pickle
+import time
+from sklearn.metrics.pairwise import cosine_similarity
+from huggingface_hub import HfApi, hf_hub_download, upload_file
+from pathlib import Path
+# Initialize OpenAI client
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+# Hugging Face configuration
+HF_TOKEN = os.environ.get("HF_TOKEN")
+REPO_ID = os.environ.get("REPO_ID")  # format: "username/space-name"
+EMBEDDING_FILE = "product_embeddings.pkl"
+# Initialize Hugging Face API
+hf_api = HfApi(token=HF_TOKEN)
+# Load CSV data
+df = pd.read_csv("item_new.csv", encoding='utf-8')
+def create_product_text(row):
+    """Create a comprehensive text representation of a product"""
+    #return f"{row['item_desc']} {row['item_class1_desc']} {row['item_class2_desc']} {row['item_class3_desc']} {str(row['brand'])} {str(row['spec'])}"
+    return f"{row['item_desc']}"
+def get_embedding(text: str, model="text-embedding-3-small"):
+    """Get embeddings for a text using OpenAI's API"""
+    try:
+        text = text.replace("\n", " ")
+        response = client.embeddings.create(
+            input=[text],
+            model=model
+        )
+        return response.data[0].embedding
+    except Exception as e:
+        print(f"Error getting embedding: {e}")
+        return None
+def download_embeddings():
+    """Try to download embeddings from Hugging Face"""
+    try:
+        local_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=EMBEDDING_FILE,
+            token=HF_TOKEN
+        )
+        with open(local_path, 'rb') as f:
+            return pickle.load(f)
+    except Exception as e:
+        print(f"Error downloading embeddings: {e}")
+        return None
+def upload_embeddings(embeddings):
+    """Upload embeddings to Hugging Face"""
+    try:
+        # Save embeddings locally first
+        temp_path = "temp_embeddings.pkl"
+        with open(temp_path, 'wb') as f:
+            pickle.dump(embeddings, f)
+        # Upload to Hugging Face
+        hf_api.upload_file(
+            path_or_fileobj=temp_path,
+            path_in_repo=EMBEDDING_FILE,
+            repo_id=REPO_ID,
+            token=HF_TOKEN
+        )
+        # Clean up temp file
+        os.remove(temp_path)
+        print("Successfully uploaded embeddings")
+    except Exception as e:
+        print(f"Error uploading embeddings: {e}")
+def initialize_embeddings():
+    """Initialize or load product embeddings"""
+    print("Checking for existing embeddings...")
+    embeddings = download_embeddings()
+    if embeddings is not None:
+        print("Loaded existing embeddings")
+        return embeddings
+    print("Creating new embeddings...")
+    embeddings = []
+    for idx, row in df.iterrows():
+        product_text = create_product_text(row)
+        embedding = get_embedding(product_text)
+        if embedding:
+            embeddings.append(embedding)
+        else:
+            embeddings.append([0] * 1536)  # Default embedding dimension
+        time.sleep(0.1)  # Rate limiting for API calls
+    # Upload new embeddings
+    upload_embeddings(embeddings)
+    return embeddings
+# Load or create embeddings
+print("Initializing embeddings...")
+product_embeddings = initialize_embeddings()
+product_embeddings_array = np.array(product_embeddings)
+print("Embeddings initialized")
+def find_similar_products(query_embedding, top_k=8):
+    """Find most similar products using cosine similarity"""
+    similarities = cosine_similarity(
+        [query_embedding],
+        product_embeddings_array
+    )[0]
+    top_indices = similarities.argsort()[-top_k:][::-1]
+    return df.iloc[top_indices], similarities[top_indices]
+# Rest of the code remains the same...
+def analyze_query_and_find_products(query: str) -> str:
+    if not query.strip():
+        return "請輸入您的問題或搜尋需求"
+    try:
+        # First, analyze the query to understand intent
+        analysis_messages = [
+            {"role": "system", "content": f"""You are a knowledgeable shopping assistant.
+             When given a query:
+             1. Analyze what the user is looking for
+             2. Predict what user will need in a supermarket
+             Provide your analysis in Traditional Chinese, focusing on understanding user needs."""},
+            {"role": "user", "content": f"Analyze this query and explain what the user needs: {query}"}
+        ]
+        analysis_response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=analysis_messages,
+            temperature=0.7,
+            max_tokens=500
+        )
+        analysis = analysis_response.choices[0].message.content
+        # Get embedding for the query
+        query_embedding = get_embedding(query + " " + analysis)
+        # Find similar products
+        matching_products, similarities = find_similar_products(query_embedding)
+        print(f"Found {len(matching_products)} matching products")
+        # Get recommendations based on found products
+        product_descriptions = "\n".join([
+            f"- {row['item_desc']} ({row['item_class1_desc']})"
+            for _, row in matching_products.iterrows()
+        ])
+        recommendation_messages = [
+            {"role": "system", "content": f"""Based on the query and available products,
+             provide helpful recommendations and tips. Consider:
+             1. How the products can be used
+             2. What to look for when choosing
+             3. Alternative options if available
+             Respond in Traditional Chinese."""},
+            {"role": "user", "content": f"""Query: {query}
+            Analysis: {analysis}
+            Available products: {product_descriptions}"""}
+        ]
+        recommendation_response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=recommendation_messages,
+            temperature=0.7,
+            max_tokens=250
+        )
+        # Format the response
+        response_parts = [
+            "🔍 需求分析：",
+            analysis,
+            "\n📦 相關商品推薦：\n"
+        ]
+        for (_, product), similarity in zip(matching_products.iterrows(), similarities):
+            confidence = similarity * 100
+            product_info = f"""
+• {product['item_desc']}
+  分類: {product['item_class1_desc']} > {product['item_class2_desc']}
+  規格: {product['spec']}
+  價格: NT$ {float(product['sales_amt']):,.0f} / {product['unit']}
+  相關度: {confidence:.1f}%"""
+            response_parts.append(product_info)
+        response_parts.extend([
+            "\n💡 購物建議：",
+            recommendation_response.choices[0].message.content
+        ])
+        return "\n".join(response_parts)
+    except Exception as e:
+        print(f"Error in search: {str(e)}")
+        return f"搜尋發生錯誤: {str(e)}"
+# Add system status message
+def get_system_status():
+    """Get system initialization status"""
+    return {
+        "embeddings_loaded": product_embeddings is not None,
+        "embedding_count": len(product_embeddings) if product_embeddings else 0,
+        "product_count": len(df)
+    }
+# Modified interface with status
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🛒 智慧商品推薦系統
+        輸入您的問題或需求，系統會：
+        1. 分析您的需求
+        2. 推薦相關商品
+        3. 提供實用建議
+        """
+    )
+    # System status
+    with gr.Row():
+        status = get_system_status()
+        status_md = f"""
+        ### 系統狀態：
+        - 資料庫商品數：{status['product_count']}
+        - 向量嵌入狀態：{'✅ 已載入' if status['embeddings_loaded'] else '❌ 未載入'}
+        """
+        gr.Markdown(status_md)
+    # Main interface
+    with gr.Column():
+        # Input area
+        input_text = gr.Textbox(
+            label="請輸入您的問題或需求",
+            placeholder="您可以詢問任何商品相關的問題，例如：\n- 想找一些適合做便當的食材\n- 需要營養均衡的食材\n- 想買一些新鮮的海鮮\n- 有什麼適合老人家的食物",
+            lines=3
+        )
+        # Buttons
+        with gr.Row():
+            submit_btn = gr.Button("搜尋", variant="primary")
+            clear_btn = gr.Button("清除")
+        # Output area
+        output_text = gr.Textbox(
+            label="分析結果與建議",
+            lines=25
+        )
+        # Clear function
+        def clear_inputs():
+            return {"input_text": "", "output_text": ""}
+        # Setup button actions
+        submit_btn.click(
+            fn=analyze_query_and_find_products,
+            inputs=input_text,
+            outputs=output_text,
+            api_name="search"  # This enables API access
+        )
+        clear_btn.click(
+            fn=clear_inputs,
+            inputs=[],
+            outputs=[input_text, output_text],
+            api_name="clear"
+        )
+    # Examples section
+    gr.Markdown("### 搜尋範例")
+    with gr.Row():
+        examples = gr.Examples(
+            examples=[
+                ["想找一些適合做便當的食材"],
+                ["需要一些營養均衡的食物"],
+                ["有沒有適合老人家吃的食物？"],
+                ["想買一些新鮮的海鮮，有什麼推薦？"],
+                ["最近感冒了，有什麼食材可以幫助恢復？"],
+            ],
+            inputs=input_text,
+            outputs=output_text,
+            fn=analyze_query_and_find_products,
+            cache_examples=True
+        )
+    # Footer
+    gr.Markdown(
+        """
+        ---
+        系統使用AI語意分析技術，能更好地理解您的需求並提供相關建議。
+        如有任何問題或建議，歡迎反饋。
+        """
+    )
+# Launch the app
+demo.launch()