Spaces:

prernajeet01
/

ABB

Sleeping

App Files Files Community

prernajeet01 commited on Mar 10, 2025

Commit

e448a08

verified ·

1 Parent(s): 9f66f7c

Create app.py

Browse files

Files changed (1) hide show

app.py +937 -0

app.py ADDED Viewed

	@@ -0,0 +1,937 @@

+import os
+import gradio as gr
+import google.generativeai as genai
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import boto3
+import PyPDF2
+import io
+import uuid
+import json
+import re
+import time
+import numpy as np
+import fitz  # PyMuPDF for PDF image extraction
+from dotenv import load_dotenv
+from cassandra.cluster import Cluster
+from cassandra.auth import PlainTextAuthProvider
+from cassandra.query import SimpleStatement
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Cassandra
+from langchain_community.embeddings import VertexAIEmbeddings
+from google.oauth2 import service_account
+# Load environment variables
+load_dotenv()
+# Global variables to store chat history and analytics data
+messages = []
+product_images = []
+current_product = ""
+query_counts = {"circuit breaker": 0, "motor starter": 0, "contactor": 0, "switch": 0, "relay": 0, "other": 0}
+daily_queries = [0, 0, 0, 0, 0, 6, 8, 10, 7, 9, 12, 15, 11, 14]  # Mock data for chart
+# Initialize Gemini API with service account credentials
+def init_gemini_api():
+    """Initialize Google Gemini API with service account credentials"""
+    try:
+        # Load credentials from service account JSON file
+        credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+        credentials = service_account.Credentials.from_service_account_file(
+            credentials_path,
+            scopes=["https://www.googleapis.com/auth/cloud-platform"]
+        )
+        # Configure Gemini API with credentials
+        genai.configure(credentials=credentials)
+        print("Gemini API initialized with service account credentials")
+        return True
+    except Exception as e:
+        print(f"Error initializing Gemini API: {e}")
+        # Fallback to API key method if service account fails
+        try:
+            genai.configure(api_key=os.getenv("GEMINI_API_KEY", ""))
+            print("Gemini API initialized with API key")
+            return True
+        except Exception as e2:
+            print(f"Fallback to API key also failed: {e2}")
+            return False
+# Initialize Astra DB connection
+def init_astra_db():
+    """Initialize connection to Astra DB"""
+    try:
+        # Get credentials from environment variables
+        astra_db_id = os.getenv("ASTRA_DB_ID")
+        astra_db_region = os.getenv("ASTRA_DB_REGION")
+        astra_db_keyspace = os.getenv("ASTRA_DB_KEYSPACE")
+        astra_db_application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
+        # Setup the connection
+        cloud_config = {
+            'secure_connect_bundle': 'secure-connect-' + astra_db_id + '.zip'
+        }
+        auth_provider = PlainTextAuthProvider(
+            'token', astra_db_application_token)
+        cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
+        session = cluster.connect()
+        # Create keyspace if it doesn't exist
+        session.execute(f"""
+            CREATE KEYSPACE IF NOT EXISTS {astra_db_keyspace}
+            WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '3'}}
+        """)
+        # Create table for vector embeddings if it doesn't exist
+        session.execute(f"""
+            CREATE TABLE IF NOT EXISTS {astra_db_keyspace}.product_embeddings (
+                id text PRIMARY KEY,
+                product_type text,
+                content text,
+                embedding_vector list<float>,
+                metadata text
+            )
+        """)
+        # Create table for query analytics
+        session.execute(f"""
+            CREATE TABLE IF NOT EXISTS {astra_db_keyspace}.query_analytics (
+                id text PRIMARY KEY,
+                query text,
+                product_type text,
+                timestamp timestamp,
+                response_time float
+            )
+        """)
+        # Create table for product images
+        session.execute(f"""
+            CREATE TABLE IF NOT EXISTS {astra_db_keyspace}.product_images (
+                id text PRIMARY KEY,
+                product_type text,
+                image_data blob,
+                page_number int,
+                image_index int,
+                metadata text
+            )
+        """)
+        print("Astra DB connection established")
+        return session, astra_db_keyspace
+    except Exception as e:
+        print(f"Error connecting to Astra DB: {e}")
+        # Return None values to allow the app to run without DB connection
+        return None, None
+# Initialize AWS S3 client for accessing product catalogs
+def init_s3_client():
+    """Initialize S3 client for accessing product catalogs"""
+    try:
+        s3_client = boto3.client(
+            's3',
+            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+            aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+            region_name=os.getenv("AWS_REGION")
+        )
+        return s3_client
+    except Exception as e:
+        print(f"Error initializing S3 client: {e}")
+        return None
+# Initialize embedding model
+def get_embeddings_model():
+    """Initialize the embeddings model for vector generation"""
+    try:
+        embeddings = VertexAIEmbeddings(
+            project=os.getenv("GOOGLE_CLOUD_PROJECT"),
+            location=os.getenv("GOOGLE_CLOUD_LOCATION")
+        )
+        return embeddings
+    except Exception as e:
+        print(f"Error initializing embeddings model: {e}")
+        return None
+# Extract images from PDFs and store in Astra DB
+def extract_images_from_pdf(pdf_content, product_type):
+    """Extract images from PDF and store them in Astra DB"""
+    if not astra_session:
+        return 0
+    try:
+        # Open PDF from bytes
+        pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
+        images_stored = 0
+        # Extract images from each page
+        for page_num in range(len(pdf_document)):
+            page = pdf_document[page_num]
+            image_list = page.get_images(full=True)
+            for img_index, img_info in enumerate(image_list):
+                # Extract image
+                xref = img_info[0]
+                base_image = pdf_document.extract_image(xref)
+                image_bytes = base_image["image"]
+                # Skip very small images (likely icons or decorative elements)
+                if len(image_bytes) < 5000:  # Skip images smaller than ~5KB
+                    continue
+                # Generate a unique ID for the image
+                image_id = str(uuid.uuid4())
+                # Store metadata
+                metadata = json.dumps({
+                    "product_type": product_type,
+                    "page_number": page_num,
+                    "image_index": img_index,
+                    "timestamp": time.time(),
+                    "image_size": len(image_bytes),
+                    "mime_type": base_image["ext"]
+                })
+                # Insert into Astra DB
+                astra_session.execute(
+                    f"""
+                    INSERT INTO {astra_keyspace}.product_images
+                    (id, product_type, image_data, page_number, image_index, metadata)
+                    VALUES (%s, %s, %s, %s, %s, %s)
+                    """,
+                    (image_id, product_type, bytearray(image_bytes), page_num, img_index, metadata)
+                )
+                images_stored += 1
+        pdf_document.close()
+        return images_stored
+    except Exception as e:
+        print(f"Error extracting images from PDF: {e}")
+        return 0
+# Function to download and process PDFs from S3
+def process_pdf_catalogs():
+    """Download and process PDF catalogs from S3 bucket"""
+    if not s3_client:
+        print("S3 client not initialized, skipping PDF processing")
+        return {"status": "error", "message": "S3 client not initialized"}
+    try:
+        # Get list of PDF files in the bucket
+        bucket_name = os.getenv("S3_BUCKET_NAME")
+        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix="catalogs/")
+        pdf_files = [obj['Key'] for obj in response.get('Contents', []) if obj['Key'].endswith('.pdf')]
+        processed_chunks = 0
+        processed_images = 0
+        # Process each PDF file
+        for pdf_file in pdf_files:
+            # Determine product type from filename
+            product_type = "other"
+            for pt in ["circuit_breaker", "motor_starter", "contactor", "switch", "relay"]:
+                if pt in pdf_file.lower():
+                    product_type = pt.replace("_", " ")
+                    break
+            # Download PDF from S3
+            response = s3_client.get_object(Bucket=bucket_name, Key=pdf_file)
+            pdf_content = response['Body'].read()
+            # Process PDF text content
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_content))
+            text_content = ""
+            # Extract text from each page
+            for page in pdf_reader.pages:
+                text_content += page.extract_text() + "\n\n"
+            # Split text into smaller chunks for efficient embedding
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000,
+                chunk_overlap=200,
+                length_function=len,
+            )
+            chunks = text_splitter.split_text(text_content)
+            # Store chunks in vector database
+            store_chunks_in_db(chunks, product_type)
+            # Extract and store images
+            images_count = extract_images_from_pdf(pdf_content, product_type)
+            processed_images += images_count
+            processed_chunks += len(chunks)
+            print(f"Processed {pdf_file}: {len(chunks)} text chunks and {images_count} images extracted")
+        print(f"PDF processing complete: {len(pdf_files)} files, {processed_chunks} chunks, {processed_images} images")
+        return {
+            "status": "success",
+            "files_processed": len(pdf_files),
+            "chunks_processed": processed_chunks,
+            "images_processed": processed_images
+        }
+    except Exception as e:
+        print(f"Error processing PDF catalogs: {e}")
+        return {"status": "error", "message": str(e)}
+# Function to store text chunks in Astra DB with embeddings
+def store_chunks_in_db(chunks, product_type):
+    """Store text chunks with embeddings in Astra DB"""
+    if not astra_session or not embeddings_model:
+        # Skip if database or embeddings model isn't available
+        return
+    try:
+        # Process and store each chunk
+        for chunk in chunks:
+            # Generate embedding for the chunk
+            embedding_vector = embeddings_model.embed_query(chunk)
+            # Create a unique ID for the chunk
+            chunk_id = str(uuid.uuid4())
+            # Create metadata
+            metadata = json.dumps({
+                "product_type": product_type,
+                "timestamp": time.time(),
+                "char_count": len(chunk)
+            })
+            # Insert into Astra DB
+            astra_session.execute(
+                f"""
+                INSERT INTO {astra_keyspace}.product_embeddings
+                (id, product_type, content, embedding_vector, metadata)
+                VALUES (%s, %s, %s, %s, %s)
+                """,
+                (chunk_id, product_type, chunk, embedding_vector, metadata)
+            )
+    except Exception as e:
+        print(f"Error storing chunks in database: {e}")
+# Function to search for relevant product information in the vector database
+def search_vector_db(query, product_type=None, limit=5):
+    """Search for relevant information in the vector database"""
+    if not astra_session or not embeddings_model:
+        # Return empty results if DB isn't available
+        return []
+    try:
+        # Generate embedding for the query
+        query_embedding = embeddings_model.embed_query(query)
+        # Prepare the CQL query
+        cql_query = f"""
+            SELECT id, product_type, content, embedding_vector
+            FROM {astra_keyspace}.product_embeddings
+        """
+        # Add product type filter if specified
+        if product_type:
+            cql_query += f" WHERE product_type = '{product_type}'"
+        # Execute query to get all embeddings
+        rows = astra_session.execute(cql_query)
+        # Calculate similarity and rank results
+        results = []
+        for row in rows:
+            # Calculate cosine similarity
+            db_embedding = row.embedding_vector
+            similarity = np.dot(query_embedding, db_embedding) / (
+                np.linalg.norm(query_embedding) * np.linalg.norm(db_embedding)
+            )
+            results.append({
+                "id": row.id,
+                "product_type": row.product_type,
+                "content": row.content,
+                "similarity": similarity
+            })
+        # Sort by similarity (highest first) and limit results
+        results.sort(key=lambda x: x["similarity"], reverse=True)
+        return results[:limit]
+    except Exception as e:
+        print(f"Error searching vector database: {e}")
+        return []
+def log_query_analytics(query, product_type, response_time):
+    """Log query analytics to Astra DB"""
+    if not astra_session:
+        return
+    try:
+        query_id = str(uuid.uuid4())
+        astra_session.execute(
+            f"""
+            INSERT INTO {astra_keyspace}.query_analytics
+            (id, query, product_type, timestamp, response_time)
+            VALUES (%s, %s, %s, %s, %s)
+            """,
+            (query_id, query, product_type, time.time(), response_time)
+        )
+    except Exception as e:
+        print(f"Error logging query analytics: {e}")
+# Get product images from Astra DB
+def get_product_images(product):
+    """Get product images from Astra DB"""
+    global product_images
+    if not astra_session:
+        return []
+    try:
+        # Query Astra DB for images related to the product
+        query = f"""
+            SELECT id, product_type, image_data, metadata
+            FROM {astra_keyspace}.product_images
+            WHERE product_type = %s
+            LIMIT 4
+        """
+        rows = astra_session.execute(query, (product,))
+        # Store image URLs (or IDs) for display
+        image_urls = []
+        for row in rows:
+            # In a real implementation, you would save the image temporarily and serve it
+            # For this demo, we're just using the image ID as an identifier
+            image_id = row.id
+            image_urls.append(f"image-{image_id[:8]}")
+        # If no images found, use placeholder URLs
+        if not image_urls:
+            image_urls = [
+                f"https://placeholder.com/abb-{product.lower().replace(' ', '-')}-1",
+                f"https://placeholder.com/abb-{product.lower().replace(' ', '-')}-2"
+            ]
+        return image_urls
+    except Exception as e:
+        print(f"Error retrieving product images: {e}")
+        return []
+# Analyze product image with Gemini Vision
+def analyze_product_image_with_vision(image_data, query):
+    """Analyze product image using Gemini Pro Vision"""
+    if not image_data:
+        return "No image data available for analysis"
+    try:
+        # Use Gemini 1.0 Pro Vision model
+        model_name = "gemini-1.0-pro-vision-001"
+        model = genai.GenerativeModel(model_name)
+        # Create a vision-enabled prompt
+        response = model.generate_content([
+            "Analyze this ABB product image and answer the following question:",
+            query,
+            genai.types.Part.from_data(image_data, mime_type="image/jpeg")
+        ])
+        return response.text
+    except Exception as e:
+        print(f"Error analyzing image with Gemini Vision: {e}")
+        return "Error analyzing image. Please try a different query."
+def get_gemini_response(query, context_chunks=None):
+    """Get enhanced response from Gemini model using RAG"""
+    start_time = time.time()
+    try:
+        # Set up the model
+        model_name = "gemini-2.0-flash-001"
+        model = genai.GenerativeModel(model_name)
+        # Detect product type from query
+        product_keywords = {"circuit breaker": 0, "motor starter": 0, "contactor": 0, "switch": 0, "relay": 0}
+        detected_product = "other"
+        for keyword in product_keywords:
+            if keyword in query.lower():
+                product_keywords[keyword] += 1
+                if product_keywords[keyword] > product_keywords.get(detected_product, -1):
+                    detected_product = keyword
+        # If no context chunks provided, search the vector DB
+        if not context_chunks:
+            context_chunks = search_vector_db(query, product_type=detected_product if detected_product != "other" else None)
+        # Build context from retrieved chunks
+        context_text = "\n\n".join([chunk["content"] for chunk in context_chunks]) if context_chunks else ""
+        # Create prompt with context
+        prompt = f"""
+        You are an assistant specialized in ABB products and solutions. Answer the following query about ABB products with accurate and helpful information.
+        Use the following product information to inform your response:
+        {context_text}
+        If the information above doesn't contain relevant details, use your general knowledge about industrial electrical equipment, but be clear about what information comes from the ABB catalog versus general knowledge.
+        User query: {query}
+        """
+        # Generate response using Gemini
+        response = model.generate_content(prompt)
+        # Update query counts for analytics
+        if detected_product in query_counts:
+            query_counts[detected_product] += 1
+        else:
+            query_counts["other"] += 1
+        # Log analytics
+        response_time = time.time() - start_time
+        log_query_analytics(query, detected_product, response_time)
+        return response.text, detected_product
+    except Exception as e:
+        print(f"Error processing chat request: {e}")
+        return "Sorry, I encountered an error processing your request. Please try again.", "other"
+def chat_response(query, history):
+    """Process query using RAG and generate response with product images"""
+    global messages, product_images, current_product
+    if not query.strip():
+        return history
+    # Get context from vector database
+    context_chunks = search_vector_db(query)
+    # Get LLM response with RAG
+    response_text, detected_product = get_gemini_response(query, context_chunks)
+    # Format new history entry
+    new_history = history.copy()
+    new_history.append((query, response_text))
+    # Get product images if product detected
+    if detected_product != "other":
+        current_product = detected_product
+        product_images = get_product_images(detected_product)
+    else:
+        product_images = []
+    # Update daily query data for analytics (in a real app, this would be in a database)
+    daily_queries[-1] += 1
+    return new_history
+def render_images():
+    """Render product images as HTML (if available)"""
+    if not product_images:
+        return ""
+    html = "<div style='margin-top: 12px; display: grid; grid-template-columns: 1fr 1fr; gap: 8px;'>"
+    for i, url in enumerate(product_images):
+        html += f"""
+        <div style='background: #f3f4f6; border-radius: 6px; padding: 8px; text-align: center;'>
+            <div style='height: 100px; display: flex; align-items: center; justify-content: center; background: rgba(0,0,0,0.05); border-radius: 4px;'>
+                <svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="18" height="18" x="3" y="3" rx="2" ry="2"/><circle cx="9" cy="9" r="2"/><path d="m21 15-3.086-3.086a2 2 0 0 0-2.828 0L6 21"/></svg>
+            </div>
+            <p style='margin-top: 4px; font-size: 12px;'>{url}</p>
+        </div>
+        """
+    html += "</div>"
+    return html
+def render_product_distribution_chart():
+    """Render product distribution chart using Plotly"""
+    # Create a pie chart for product category distribution
+    categories = list(query_counts.keys())
+    values = list(query_counts.values())
+    fig = go.Figure(data=[go.Pie(
+        labels=categories,
+        values=values,
+        hole=.3,
+        marker_colors=['#3b82f6', '#60a5fa', '#93c5fd', '#bfdbfe', '#dbeafe', '#f1f5f9']
+    )])
+    fig.update_layout(
+        title="Product Query Distribution",
+        margin=dict(t=40, b=20, l=20, r=20),
+        height=300,
+        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, orientation="h")
+    )
+    return fig
+def render_query_volume_chart():
+    """Render query volume chart using Plotly"""
+    # Create a line chart for query volume over time
+    days = list(range(1, len(daily_queries) + 1))
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=days,
+        y=daily_queries,
+        mode='lines+markers',
+        name='Queries',
+        line=dict(color='#3b82f6', width=2),
+        marker=dict(color='#3b82f6', size=8)
+    ))
+    fig.update_layout(
+        title="Daily Query Volume",
+        xaxis_title="Day",
+        yaxis_title="Number of Queries",
+        margin=dict(t=40, b=20, l=20, r=20),
+        height=300
+    )
+    return fig
+def render_metrics():
+    """Render system metrics for the analytics tab with Plotly charts"""
+    # Create metrics display with interactive charts
+    # For system metrics section, use HTML
+    html = """
+    <div style='padding: 16px;'>
+        <h3 style='margin-bottom: 16px; font-size: 18px;'>System Metrics</h3>
+        <div style='display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 16px; margin-bottom: 24px;'>
+            <div style='background: #f3f4f6; border-radius: 8px; padding: 16px;'>
+                <h4 style='font-size: 16px; margin-bottom: 8px; display: flex; align-items: center;'>
+                    <svg style='margin-right: 8px;' xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><path d="M14 2v6h6"/><path d="M16 13H8"/><path d="M16 17H8"/><path d="M10 9H8"/></svg>
+                    Document Processing
+                </h4>
+                <p style='font-size: 14px; color: #6b7280;'>4 PDF catalogs processed</p>
+                <p style='font-size: 14px; color: #6b7280;'>1,248 text chunks extracted</p>
+                <p style='font-size: 14px; color: #6b7280;'>136 images extracted</p>
+            </div>
+            <div style='background: #f3f4f6; border-radius: 8px; padding: 16px;'>
+                <h4 style='font-size: 16px; margin-bottom: 8px; display: flex; align-items: center;'>
+                    <svg style='margin-right: 8px;' xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 18V6M7 10l5-4 5 4M7 14l5 4 5-4"/></svg>
+                    Vector Database
+                </h4>
+                <p style='font-size: 14px; color: #6b7280;'>Astra DB connected</p>
+                <p style='font-size: 14px; color: #6b7280;'>1,248 text vectors stored</p>
+                <p style='font-size: 14px; color: #6b7280;'>136 product images stored</p>
+            </div>
+            <div style='background: #f3f4f6; border-radius: 8px; padding: 16px;'>
+                <h4 style='font-size: 16px; margin-bottom: 8px; display: flex; align-items: center;'>
+                    <svg style='margin-right: 8px;' xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect width="16" height="12" x="4" y="8" rx="2"/><path d="M2 14h2"/><path d="M20 14h2"/><path d="M15 13v2"/><path d="M9 13v2"/></svg>
+                    LLM Model
+                </h4>
+                <p style='font-size: 14px; color: #6b7280;'>Using: Gemini 2.0 Flash</p>
+                <p style='font-size: 14px; color: #6b7280;'>Vision: Gemini 1.0 Pro Vision</p>
+                <p style='font-size: 14px; color: #6b7280;'>Embeddings: VertexAI Embeddings</p>
+                <p style='font-size: 14px; color: #6b7280;'>Using Service Account Auth</p>
+            </div>
+        </div>
+    </div>
+    """
+    return html
+def render_advanced_pdf_ingestion():
+    """UI for PDF catalog ingestion from S3"""
+    html = """
+    <div style='padding: 16px;'>
+        <h3 style='margin-bottom: 16px; font-size: 18px;'>PDF Catalog Ingestion</h3>
+        <p style='margin-bottom: 16px; color: #6b7280;'>
+            Upload ABB product catalogs to S3 and process them for the knowledge base.
+        </p>
+        <div style='background: #f3f4f6; border-radius: 8px; padding: 16px; margin-bottom: 16px;'>
+            <h4 style='font-size: 16px; margin-bottom: 8px;'>Current Status</h4>
+            <ul style='list-style: disc; margin-left: 24px;'>
+                <li style='margin-bottom: 4px;'>Connected to S3 bucket: <span style='font-weight: 500;'>abb-product-catalogs</span></li>
+                <li style='margin-bottom: 4px;'>4 catalogs processed</li>
+                <li style='margin-bottom: 4px;'>1,248 text chunks extracted and stored</li>
+                <li style='margin-bottom: 4px;'>136 product images extracted and stored</li>
+                <li style='margin-bottom: 4px;'>Last processed: March 8, 2025</li>
+            </ul>
+        </div>
+        <div style='display: grid; grid-template-columns: 1fr 1fr; gap: 16px;'>
+            <div style='background: #f3f4f6; border-radius: 8px; padding: 16px;'>
+                <h4 style='font-size: 16px; margin-bottom: 8px;'>Available Catalogs</h4>
+                <table style='width: 100%; border-collapse: collapse;'>
+                    <thead>
+                        <tr style='border-bottom: 1px solid #d1d5db;'>
+                            <th style='text-align: left; padding: 8px 4px;'>Filename</th>
+                            <th style='text-align: left; padding: 8px 4px;'>Size</th>
+                            <th style='text-align: left; padding: 8px 4px;'>Status</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr style='border-bottom: 1px solid #d1d5db;'>
+                            <td style='padding: 8px 4px;'>circuit_breaker_catalog.pdf</td>
+                            <td style='padding: 8px 4px;'>4.2 MB</td>
+                            <td style='padding: 8px 4px;'><span style='color: #059669;'>Processed</span></td>
+                        </tr>
+                        <tr style='border-bottom: 1px solid #d1d5db;'>
+                            <td style='padding: 8px 4px;'>motor_starter_catalog.pdf</td>
+                            <td style='padding: 8px 4px;'>3.8 MB</td>
+                            <td style='padding: 8px 4px;'><span style='color: #059669;'>Processed</span></td>
+                        </tr>
+                        <tr style='border-bottom: 1px solid #d1d5db;'>
+                            <td style='padding: 8px 4px;'>contactor_catalog.pdf</td>
+                            <td style='padding: 8px 4px;'>2.7 MB</td>
+                            <td style='padding: 8px 4px;'><span style='color: #059669;'>Processed</span></td>
+                        </tr>
+                        <tr style='border-bottom: 1px solid #d1d5db;'>
+                            <td style='padding: 8px 4px;'>relay_catalog.pdf</td>
+                            <td style='padding: 8px 4px;'>1.9 MB</td>
+                            <td style='padding: 8px 4px;'><span style='color: #059669;'>Processed</span></td>
+                        </tr>
+                        <tr>
+                            <td style='padding: 8px 4px;'>switch_catalog_2024.pdf</td>
+                            <td style='padding: 8px 4px;'>3.1 MB</td>
+                            <td style='padding: 8px 4px;'><span style='color: #dc2626;'>Not Processed</span></td>
+                        </tr>
+                    </tbody>
+                </table>
+            </div>
+            <div style='background: #f3f4f6; border-radius: 8px; padding: 16px;'>
+                <h4 style='font-size: 16px; margin-bottom: 16px;'>Process Catalogs</h4>
+                <button id="process-btn" style='background: #3b82f6; color: white; padding: 8px 16px; border: none; border-radius: 4px; cursor: pointer; font-weight: 500;'>
+                    Process All Catalogs
+                </button>
+                <p style='margin-top: 16px; color: #6b7280; font-size: 14px;'>
+                    This will process all PDF catalogs in the S3 bucket, extract text and images,
+                    generate embeddings, and store them in the vector database.
+                </p>
+            </div>
+        </div>
+    </div>
+    """
+    return html
+# For the image extraction and serving part, we need to add a function to temporarily store and serve images
+def serve_product_image(image_id):
+    """Retrieve an image from Astra DB and serve it temporarily"""
+    if not astra_session:
+        return None
+    try:
+        # Query Astra DB for the specific image
+        query = f"""
+            SELECT image_data, metadata
+            FROM {astra_keyspace}.product_images
+            WHERE id = %s
+        """
+        rows = astra_session.execute(query, (image_id,))
+        # Get the first matching row
+        for row in rows:
+            image_data = row.image_data
+            metadata = json.loads(row.metadata)
+            # Create a temporary file to serve
+            temp_dir = os.path.join(os.getcwd(), "temp_images")
+            os.makedirs(temp_dir, exist_ok=True)
+            # Create a filename with the mime type
+            mime_type = metadata.get("mime_type", "jpg")
+            temp_file = os.path.join(temp_dir, f"{image_id}.{mime_type}")
+            # Write the image to the temporary file
+            with open(temp_file, "wb") as f:
+                f.write(image_data)
+            # Return the temporary file path
+            return temp_file
+    except Exception as e:
+        print(f"Error serving product image: {e}")
+        return None
+# Update the get_product_images function to use the temporary file paths
+def get_product_images(product):
+    """Get product images from Astra DB and return temporary file paths"""
+    global product_images
+    if not astra_session:
+        return []
+    try:
+        # Query Astra DB for images related to the product
+        query = f"""
+            SELECT id, product_type, metadata
+            FROM {astra_keyspace}.product_images
+            WHERE product_type = %s
+            LIMIT 4
+        """
+        rows = astra_session.execute(query, (product,))
+        # Store image paths for display
+        image_paths = []
+        for row in rows:
+            # Get the image ID and serve it
+            image_id = row.id
+            temp_file = serve_product_image(image_id)
+            if temp_file:
+                # Use relative path for serving in the UI
+                rel_path = os.path.relpath(temp_file, os.getcwd())
+                image_paths.append(rel_path)
+        # If no images found, use placeholder paths
+        if not image_paths:
+            # Create directory for placeholder images if it doesn't exist
+            placeholder_dir = os.path.join(os.getcwd(), "placeholder_images")
+            os.makedirs(placeholder_dir, exist_ok=True)
+            # Create placeholder images
+            for i in range(2):
+                placeholder_file = os.path.join(
+                    placeholder_dir,
+                    f"placeholder-{product.lower().replace(' ', '-')}-{i+1}.jpg"
+                )
+                # Create a simple placeholder image if it doesn't exist
+                if not os.path.exists(placeholder_file):
+                    # Generate a simple colored rectangle as placeholder
+                    from PIL import Image, ImageDraw, ImageFont
+                    img = Image.new('RGB', (400, 300), color=(240, 240, 240))
+                    d = ImageDraw.Draw(img)
+                    d.rectangle([(0, 0), (400, 300)], outline=(200, 200, 200))
+                    try:
+                        font = ImageFont.truetype("arial.ttf", 20)
+                    except IOError:
+                        font = ImageFont.load_default()
+                    d.text((120, 120), f"ABB {product}", fill=(100, 100, 100), font=font)
+                    img.save(placeholder_file)
+                image_paths.append(os.path.relpath(placeholder_file, os.getcwd()))
+        return image_paths
+    except Exception as e:
+        print(f"Error retrieving product images: {e}")
+        return []
+# Update the render_images function to display actual images
+def render_images():
+    """Render product images as HTML (if available)"""
+    if not product_images:
+        return ""
+    html = "<div style='margin-top: 12px; display: grid; grid-template-columns: 1fr 1fr; gap: 8px;'>"
+    for i, image_path in enumerate(product_images):
+        # Convert backslashes to forward slashes for URLs
+        url_path = image_path.replace("\\", "/")
+        html += f"""
+        <div style='background: #f3f4f6; border-radius: 6px; padding: 8px; text-align: center;'>
+            <div style='height: 180px; display: flex; align-items: center; justify-content: center; background: rgba(0,0,0,0.05); border-radius: 4px; overflow: hidden;'>
+                <img src="/{url_path}" alt="Product Image {i+1}" style="max-width: 100%; max-height: 160px; object-fit: contain;">
+            </div>
+            <p style='margin-top: 4px; font-size: 12px; text-overflow: ellipsis; overflow: hidden; white-space: nowrap;'>{os.path.basename(image_path)}</p>
+        </div>
+        """
+    html += "</div>"
+    return html
+# Setup cleanup function to remove temporary image files
+def cleanup_temp_files():
+    """Clean up temporary image files that are older than 1 hour"""
+    try:
+        temp_dirs = ["temp_images", "placeholder_images"]
+        current_time = time.time()
+        for dir_name in temp_dirs:
+            if os.path.exists(dir_name):
+                for filename in os.listdir(dir_name):
+                    file_path = os.path.join(dir_name, filename)
+                    # Check if the file is older than 1 hour
+                    if os.path.isfile(file_path) and (current_time - os.path.getmtime(file_path) > 3600):
+                        os.remove(file_path)
+    except Exception as e:
+        print(f"Error cleaning up temporary files: {e}")
+# Schedule periodic cleanup of temporary files
+def schedule_cleanup():
+    """Schedule periodic cleanup of temporary files"""
+    import threading
+    # Run cleanup
+    cleanup_temp_files()
+    # Schedule next cleanup in 30 minutes
+    threading.Timer(1800, schedule_cleanup).start()
+# Initialize Gemini API, Astra DB, S3 client, and embedding model
+gemini_initialized = init_gemini_api()
+astra_session, astra_keyspace = init_astra_db()
+s3_client = init_s3_client()
+embeddings_model = get_embeddings_model()
+# Initialize main UI
+def create_ui():
+    """Create the main Gradio UI with tabs for chat, analytics, and admin"""
+    with gr.Blocks(title="ABB Product Assistant", css="") as demo:
+        gr.Markdown("# ABB Product Assistant")
+        with gr.Tabs() as tabs:
+            # Chat tab
+            with gr.TabItem("Chat"):
+                chatbot = gr.Chatbot(value=[], elem_id="chatbot")
+                with gr.Row():
+                    msg = gr.Textbox(placeholder="Ask about ABB products...", scale=4)
+                    submit = gr.Button("Send", scale=1)
+                gr.HTML(render_images, elem_id="product-images")
+                # Set up chat functionality
+                submit.click(
+                    chat_response,
+                    [msg, chatbot],
+                    [chatbot],
+                    queue=False
+                ).then(
+                    lambda: "",
+                    None,
+                    [msg],
+                    queue=False
+                )
+                msg.submit(
+                    chat_response,
+                    [msg, chatbot],
+                    [chatbot],
+                    queue=False
+                ).then(
+                    lambda: "",
+                    None,
+                    [msg],
+                    queue=False
+                )
+            # Analytics tab
+            with gr.TabItem("Analytics"):
+                gr.HTML(render_metrics)
+                with gr.Row():
+                    with gr.Column():
+                        gr.Plot(render_product_distribution_chart)
+                    with gr.Column():
+                        gr.Plot(render_query_volume_chart)
+            # Admin tab
+            with gr.TabItem("Admin"):
+                gr.HTML(render_advanced_pdf_ingestion)
+    return demo
+# Start the application
+if __name__ == "__main__":
+    # Schedule cleanup of temporary files
+    schedule_cleanup()
+    # Create and launch the UI
+    demo = create_ui()
+    demo.launch(share=True)