Spaces:
Sleeping
Sleeping
File size: 4,112 Bytes
4734d1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from transformers import pipeline
import re
# Load your fine-tuned model
try:
ner_pipeline = pipeline("ner",
model="MuneebAbro/ecommerce-ner-model",
aggregation_strategy="simple")
model_loaded = True
except:
# Fallback to a general NER model if yours isn't ready
ner_pipeline = pipeline("ner",
model="dbmdz/bert-large-cased-finetuned-conll03-english",
aggregation_strategy="simple")
model_loaded = False
def extract_product_info(text):
"""Extract product information and format results"""
if not text.strip():
return "Please enter some text!"
try:
# Enhanced regex patterns for better extraction
result = {
"product_name": "",
"brand": "",
"price": "",
"quantities": []
}
# Extract brand
brand_patterns = [
r'\b(Samsung|Apple|iPhone|Google|OnePlus|Xiaomi|Huawei|Sony|LG|Dell|HP|Lenovo|Microsoft|Nintendo)\b'
]
for pattern in brand_patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
result["brand"] = match.group(1)
break
# Extract product name
product_patterns = [
r'(Galaxy\s+\w+(?:\s+\w+)?)',
r'(iPhone\s+\d+(?:\s+\w+)?)',
r'(Pixel\s+\d+(?:\s+\w+)?)',
r'(\w+\s+\d+(?:\s+\w+)?)'
]
for pattern in product_patterns:
match = re.search(pattern, text)
if match:
result["product_name"] = match.group(1).strip()
break
# Extract price
price_patterns = [
r'Price\s*:?\s*\$(\d+(?:,\d{3})*(?:\.\d{2})?)',
r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)'
]
for pattern in price_patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
result["price"] = f"${match.group(1)}"
break
# Extract quantities
quantity_matches = re.findall(r'(\d+(?:GB|TB|MB|RAM|Storage))', text, re.IGNORECASE)
result["quantities"] = [q for q in quantity_matches if q]
# Try NER model too
if model_loaded:
ner_results = ner_pipeline(text)
ner_info = "\nπ€ **NER Model Results:**\n"
for entity in ner_results:
if entity.get('score', 0) > 0.3:
ner_info += f"- {entity['word']} β {entity['entity_group']} ({entity['score']:.2f})\n"
else:
ner_info = "\nπ€ **NER Model:** Using fallback model\n"
# Format output
output = f"""
π·οΈ **Brand:** {result['brand'] or 'Not found'}
π± **Product:** {result['product_name'] or 'Not found'}
π° **Price:** {result['price'] or 'Not found'}
π **Quantities:** {', '.join(result['quantities']) or 'Not found'}
{ner_info}
"""
return output
except Exception as e:
return f"β Error: {str(e)}"
# Create interface
demo = gr.Interface(
fn=extract_product_info,
inputs=gr.Textbox(
label="Product Description",
placeholder="Enter product description here...",
lines=3,
value="Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."
),
outputs=gr.Textbox(
label="Extracted Information",
lines=12
),
title="π E-commerce Product Information Extractor",
description="Extract product names, brands, prices, and quantities from product descriptions using fine-tuned NER model.",
examples=[
["Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."],
["iPhone 14 Pro Max 256GB $1299 Apple"],
["Google Pixel 7 Pro 12GB RAM $899"],
["Sony WH-1000XM4 wireless headphones $349"],
["Dell XPS 13 laptop 16GB DDR4 $1299"]
],
theme="default",
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch()
|