Spaces:

MuneebAbro
/

ecommerce-ner-api

Sleeping

File size: 4,112 Bytes

4734d1a

import gradio as gr
from transformers import pipeline
import re

# Load your fine-tuned model
try:
    ner_pipeline = pipeline("ner", 
                           model="MuneebAbro/ecommerce-ner-model",
                           aggregation_strategy="simple")
    model_loaded = True
except:
    # Fallback to a general NER model if yours isn't ready
    ner_pipeline = pipeline("ner", 
                           model="dbmdz/bert-large-cased-finetuned-conll03-english",
                           aggregation_strategy="simple")
    model_loaded = False

def extract_product_info(text):
    """Extract product information and format results"""
    if not text.strip():
        return "Please enter some text!"
    
    try:
        # Enhanced regex patterns for better extraction
        result = {
            "product_name": "",
            "brand": "",
            "price": "",
            "quantities": []
        }
        
        # Extract brand
        brand_patterns = [
            r'\b(Samsung|Apple|iPhone|Google|OnePlus|Xiaomi|Huawei|Sony|LG|Dell|HP|Lenovo|Microsoft|Nintendo)\b'
        ]
        for pattern in brand_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                result["brand"] = match.group(1)
                break
        
        # Extract product name
        product_patterns = [
            r'(Galaxy\s+\w+(?:\s+\w+)?)',
            r'(iPhone\s+\d+(?:\s+\w+)?)',
            r'(Pixel\s+\d+(?:\s+\w+)?)',
            r'(\w+\s+\d+(?:\s+\w+)?)'
        ]
        for pattern in product_patterns:
            match = re.search(pattern, text)
            if match:
                result["product_name"] = match.group(1).strip()
                break
        
        # Extract price
        price_patterns = [
            r'Price\s*:?\s*\$(\d+(?:,\d{3})*(?:\.\d{2})?)',
            r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)'
        ]
        for pattern in price_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                result["price"] = f"${match.group(1)}"
                break
        
        # Extract quantities
        quantity_matches = re.findall(r'(\d+(?:GB|TB|MB|RAM|Storage))', text, re.IGNORECASE)
        result["quantities"] = [q for q in quantity_matches if q]
        
        # Try NER model too
        if model_loaded:
            ner_results = ner_pipeline(text)
            ner_info = "\n🤖 **NER Model Results:**\n"
            for entity in ner_results:
                if entity.get('score', 0) > 0.3:
                    ner_info += f"- {entity['word']} → {entity['entity_group']} ({entity['score']:.2f})\n"
        else:
            ner_info = "\n🤖 **NER Model:** Using fallback model\n"
        
        # Format output
        output = f"""
🏷️  **Brand:** {result['brand'] or 'Not found'}
📱 **Product:** {result['product_name'] or 'Not found'}
💰 **Price:** {result['price'] or 'Not found'}
📊 **Quantities:** {', '.join(result['quantities']) or 'Not found'}

{ner_info}
"""
        
        return output
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Create interface
demo = gr.Interface(
    fn=extract_product_info,
    inputs=gr.Textbox(
        label="Product Description",
        placeholder="Enter product description here...",
        lines=3,
        value="Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."
    ),
    outputs=gr.Textbox(
        label="Extracted Information", 
        lines=12
    ),
    title="🛒 E-commerce Product Information Extractor",
    description="Extract product names, brands, prices, and quantities from product descriptions using fine-tuned NER model.",
    examples=[
        ["Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."],
        ["iPhone 14 Pro Max 256GB $1299 Apple"],
        ["Google Pixel 7 Pro 12GB RAM $899"],
        ["Sony WH-1000XM4 wireless headphones $349"],
        ["Dell XPS 13 laptop 16GB DDR4 $1299"]
    ],
    theme="default",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()