File size: 4,112 Bytes
4734d1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
from transformers import pipeline
import re

# Load your fine-tuned model
try:
    ner_pipeline = pipeline("ner", 
                           model="MuneebAbro/ecommerce-ner-model",
                           aggregation_strategy="simple")
    model_loaded = True
except:
    # Fallback to a general NER model if yours isn't ready
    ner_pipeline = pipeline("ner", 
                           model="dbmdz/bert-large-cased-finetuned-conll03-english",
                           aggregation_strategy="simple")
    model_loaded = False

def extract_product_info(text):
    """Extract product information and format results"""
    if not text.strip():
        return "Please enter some text!"
    
    try:
        # Enhanced regex patterns for better extraction
        result = {
            "product_name": "",
            "brand": "",
            "price": "",
            "quantities": []
        }
        
        # Extract brand
        brand_patterns = [
            r'\b(Samsung|Apple|iPhone|Google|OnePlus|Xiaomi|Huawei|Sony|LG|Dell|HP|Lenovo|Microsoft|Nintendo)\b'
        ]
        for pattern in brand_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                result["brand"] = match.group(1)
                break
        
        # Extract product name
        product_patterns = [
            r'(Galaxy\s+\w+(?:\s+\w+)?)',
            r'(iPhone\s+\d+(?:\s+\w+)?)',
            r'(Pixel\s+\d+(?:\s+\w+)?)',
            r'(\w+\s+\d+(?:\s+\w+)?)'
        ]
        for pattern in product_patterns:
            match = re.search(pattern, text)
            if match:
                result["product_name"] = match.group(1).strip()
                break
        
        # Extract price
        price_patterns = [
            r'Price\s*:?\s*\$(\d+(?:,\d{3})*(?:\.\d{2})?)',
            r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)'
        ]
        for pattern in price_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                result["price"] = f"${match.group(1)}"
                break
        
        # Extract quantities
        quantity_matches = re.findall(r'(\d+(?:GB|TB|MB|RAM|Storage))', text, re.IGNORECASE)
        result["quantities"] = [q for q in quantity_matches if q]
        
        # Try NER model too
        if model_loaded:
            ner_results = ner_pipeline(text)
            ner_info = "\nπŸ€– **NER Model Results:**\n"
            for entity in ner_results:
                if entity.get('score', 0) > 0.3:
                    ner_info += f"- {entity['word']} β†’ {entity['entity_group']} ({entity['score']:.2f})\n"
        else:
            ner_info = "\nπŸ€– **NER Model:** Using fallback model\n"
        
        # Format output
        output = f"""
🏷️  **Brand:** {result['brand'] or 'Not found'}
πŸ“± **Product:** {result['product_name'] or 'Not found'}
πŸ’° **Price:** {result['price'] or 'Not found'}
πŸ“Š **Quantities:** {', '.join(result['quantities']) or 'Not found'}

{ner_info}
"""
        
        return output
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Create interface
demo = gr.Interface(
    fn=extract_product_info,
    inputs=gr.Textbox(
        label="Product Description",
        placeholder="Enter product description here...",
        lines=3,
        value="Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."
    ),
    outputs=gr.Textbox(
        label="Extracted Information", 
        lines=12
    ),
    title="πŸ›’ E-commerce Product Information Extractor",
    description="Extract product names, brands, prices, and quantities from product descriptions using fine-tuned NER model.",
    examples=[
        ["Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."],
        ["iPhone 14 Pro Max 256GB $1299 Apple"],
        ["Google Pixel 7 Pro 12GB RAM $899"],
        ["Sony WH-1000XM4 wireless headphones $349"],
        ["Dell XPS 13 laptop 16GB DDR4 $1299"]
    ],
    theme="default",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()