import gradio as gr from transformers import pipeline import re # Load your fine-tuned model try: ner_pipeline = pipeline("ner", model="MuneebAbro/ecommerce-ner-model", aggregation_strategy="simple") model_loaded = True except: # Fallback to a general NER model if yours isn't ready ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple") model_loaded = False def extract_product_info(text): """Extract product information and format results""" if not text.strip(): return "Please enter some text!" try: # Enhanced regex patterns for better extraction result = { "product_name": "", "brand": "", "price": "", "quantities": [] } # Extract brand brand_patterns = [ r'\b(Samsung|Apple|iPhone|Google|OnePlus|Xiaomi|Huawei|Sony|LG|Dell|HP|Lenovo|Microsoft|Nintendo)\b' ] for pattern in brand_patterns: match = re.search(pattern, text, re.IGNORECASE) if match: result["brand"] = match.group(1) break # Extract product name product_patterns = [ r'(Galaxy\s+\w+(?:\s+\w+)?)', r'(iPhone\s+\d+(?:\s+\w+)?)', r'(Pixel\s+\d+(?:\s+\w+)?)', r'(\w+\s+\d+(?:\s+\w+)?)' ] for pattern in product_patterns: match = re.search(pattern, text) if match: result["product_name"] = match.group(1).strip() break # Extract price price_patterns = [ r'Price\s*:?\s*\$(\d+(?:,\d{3})*(?:\.\d{2})?)', r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)' ] for pattern in price_patterns: match = re.search(pattern, text, re.IGNORECASE) if match: result["price"] = f"${match.group(1)}" break # Extract quantities quantity_matches = re.findall(r'(\d+(?:GB|TB|MB|RAM|Storage))', text, re.IGNORECASE) result["quantities"] = [q for q in quantity_matches if q] # Try NER model too if model_loaded: ner_results = ner_pipeline(text) ner_info = "\nšŸ¤– **NER Model Results:**\n" for entity in ner_results: if entity.get('score', 0) > 0.3: ner_info += f"- {entity['word']} → {entity['entity_group']} ({entity['score']:.2f})\n" else: ner_info = "\nšŸ¤– **NER Model:** Using fallback model\n" # Format output output = f""" šŸ·ļø **Brand:** {result['brand'] or 'Not found'} šŸ“± **Product:** {result['product_name'] or 'Not found'} šŸ’° **Price:** {result['price'] or 'Not found'} šŸ“Š **Quantities:** {', '.join(result['quantities']) or 'Not found'} {ner_info} """ return output except Exception as e: return f"āŒ Error: {str(e)}" # Create interface demo = gr.Interface( fn=extract_product_info, inputs=gr.Textbox( label="Product Description", placeholder="Enter product description here...", lines=3, value="Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199." ), outputs=gr.Textbox( label="Extracted Information", lines=12 ), title="šŸ›’ E-commerce Product Information Extractor", description="Extract product names, brands, prices, and quantities from product descriptions using fine-tuned NER model.", examples=[ ["Samsung Galaxy S23 Ultra, 12GB RAM, 256GB Storage, Price: $1199."], ["iPhone 14 Pro Max 256GB $1299 Apple"], ["Google Pixel 7 Pro 12GB RAM $899"], ["Sony WH-1000XM4 wireless headphones $349"], ["Dell XPS 13 laptop 16GB DDR4 $1299"] ], theme="default", allow_flagging="never" ) if __name__ == "__main__": demo.launch()