Spaces:

Wendgan
/

IndiScan

Sleeping

App Files Files Community

Wendgan commited on Apr 25, 2025

Commit

2ae3f7c

verified ·

1 Parent(s): 45e07b5

Upload 9 files

Browse files

Files changed (9) hide show

README.md +86 -12
app.py +23 -0
app/frontend.py +212 -0
app/main.py +211 -0
init_db.py +35 -0
models/database.py +199 -0
requirements.txt +15 -0
utils/health_score.py +218 -0
utils/scraper.py +178 -0

README.md CHANGED Viewed

@@ -1,12 +1,86 @@
----
-title: IndiScan
-emoji: 💻
-colorFrom: red
-colorTo: indigo
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# IndiScan: Indian Product Health Analyzer 🔍
+IndiScan is a comprehensive health analysis tool that helps users make informed decisions about food and cosmetic products by analyzing ingredients, providing health scores, and comparing prices across Indian e-commerce platforms.
+## Features 🌟
+- **Smart Product Analysis**
+  - Barcode scanning
+  - Image-based ingredient extraction
+  - Manual ingredient entry
+  - Health score calculation (0-1000)
+  - Ingredient risk assessment
+  - Nutrition information analysis
+- **Price Comparison**
+  - Real-time price tracking across:
+    - Amazon India
+    - Blinkit
+    - Zepto
+    - Swiggy Instamart
+- **Admin Controls**
+  - Product database management
+  - CSV import/export
+  - 60-day auto-refresh system
+## Setup 🛠️
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+```bash
+python app.py
+```
+The application will start both the backend API (port 8000) and the Streamlit frontend.
+## Usage 📱
+1. **Scan Products**
+   - Enter a barcode number
+   - Upload a product image
+   - Manually enter ingredients
+2. **View Analysis**
+   - Health score and explanation
+   - Ingredient breakdown
+   - Risk categories
+   - Nutrition information
+   - Price comparison
+3. **Admin Features**
+   - Login with admin credentials
+   - Add/update product information
+   - Export/import database
+   - Monitor data freshness
+## Technology Stack 💻
+- **Backend**: FastAPI
+- **Frontend**: Streamlit
+- **Database**: SQLite
+- **Image Processing**: EasyOCR
+- **Data Analysis**: Pandas, Plotly
+- **Web Scraping**: aiohttp, BeautifulSoup4
+## Contributing 🤝
+Feel free to contribute to this project by:
+1. Forking the repository
+2. Creating a feature branch
+3. Committing your changes
+4. Opening a pull request
+## License 📄
+This project is licensed under the MIT License - see the LICENSE file for details.
+## Acknowledgments 🙏
+- Inspired by the Yuka app
+- Uses OpenFoodFacts data
+- Built with ❤️ for Indian consumers

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import subprocess
+import sys
+from multiprocessing import Process
+def run_backend():
+    from app.main import app
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+def run_frontend():
+    subprocess.run([sys.executable, "app/frontend.py"])
+if __name__ == "__main__":
+    # Start backend in a separate process
+    backend_process = Process(target=run_backend)
+    backend_process.start()
+    # Run frontend in the main process
+    run_frontend()
+    # Wait for backend to finish
+    backend_process.join()

app/frontend.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import streamlit as st
+import requests
+import json
+from PIL import Image
+import io
+import base64
+from datetime import datetime
+import pandas as pd
+import plotly.express as px
+# Configure the app
+st.set_page_config(
+    page_title="IndiScan - Product Health Analyzer",
+    page_icon="🔍",
+    layout="wide"
+)
+# API endpoint
+API_URL = "http://localhost:8000"  # Change this when deploying
+def main():
+    # Sidebar
+    st.sidebar.title("IndiScan 🔍")
+    scan_option = st.sidebar.radio(
+        "Choose scan method:",
+        ["Barcode", "Image Upload", "Manual Entry"]
+    )
+    # Main content
+    st.title("IndiScan - Product Health Analyzer")
+    st.markdown("""
+    Analyze food and cosmetic products for health risks and get detailed insights.
+    Upload an image, enter a barcode, or manually input product details.
+    """)
+    # Admin section in sidebar
+    with st.sidebar.expander("Admin Controls 🔐"):
+        admin_username = st.text_input("Username")
+        admin_password = st.text_input("Password", type="password")
+        if st.button("Login"):
+            try:
+                auth = (admin_username, admin_password)
+                response = requests.get(f"{API_URL}/export", auth=auth)
+                if response.status_code == 200:
+                    st.sidebar.success("Logged in as admin")
+                    st.session_state['admin_auth'] = auth
+                else:
+                    st.sidebar.error("Invalid credentials")
+            except Exception as e:
+                st.sidebar.error(f"Login failed: {str(e)}")
+    # Main content based on selected option
+    if scan_option == "Barcode":
+        barcode_scanner()
+    elif scan_option == "Image Upload":
+        image_scanner()
+    else:
+        manual_entry()
+def barcode_scanner():
+    st.header("Barcode Scanner 📱")
+    barcode = st.text_input("Enter barcode number:")
+    if barcode:
+        try:
+            response = requests.post(f"{API_URL}/scan/barcode", params={"barcode": barcode})
+            if response.status_code == 200:
+                display_results(response.json())
+            else:
+                st.error("Product not found")
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+def image_scanner():
+    st.header("Image Scanner 📸")
+    uploaded_file = st.file_uploader("Upload product image", type=["jpg", "jpeg", "png"])
+    if uploaded_file:
+        try:
+            # Display uploaded image
+            image = Image.open(uploaded_file)
+            st.image(image, caption="Uploaded Image", use_column_width=True)
+            # Process image
+            files = {"file": uploaded_file}
+            response = requests.post(f"{API_URL}/scan/image", files=files)
+            if response.status_code == 200:
+                display_results(response.json())
+            else:
+                st.error("Failed to process image")
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+def manual_entry():
+    st.header("Manual Entry ✍️")
+    col1, col2 = st.columns(2)
+    with col1:
+        product_type = st.selectbox("Product Type", ["Food", "Cosmetic"])
+        ingredients_text = st.text_area("Enter ingredients list (comma-separated or as shown on package):")
+    with col2:
+        if ingredients_text:
+            try:
+                response = requests.post(
+                    f"{API_URL}/analyze/text",
+                    data={
+                        "text": ingredients_text,
+                        "product_type": product_type.lower()
+                    }
+                )
+                if response.status_code == 200:
+                    display_results(response.json())
+                else:
+                    st.error("Failed to analyze ingredients")
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+def display_results(data):
+    # Create three columns for different aspects of the analysis
+    col1, col2, col3 = st.columns([2, 2, 1])
+    with col1:
+        st.subheader("Health Score")
+        score = data.get('health_score', {}).get('score', 0)
+        # Create a gauge chart using plotly
+        fig = px.pie(
+            values=[score, 1000-score],
+            names=['Score', 'Remaining'],
+            hole=0.7,
+            color_discrete_sequence=['#00ff00' if score > 600 else '#ff0000', '#eee']
+        )
+        fig.update_layout(
+            annotations=[dict(text=f"{score}/1000", x=0.5, y=0.5, font_size=20, showarrow=False)],
+            showlegend=False,
+            width=300,
+            height=300
+        )
+        st.plotly_chart(fig)
+        # Display explanation
+        if 'explanation' in data.get('health_score', {}):
+            st.markdown(data['health_score']['explanation'])
+    with col2:
+        st.subheader("Ingredients Analysis")
+        if 'ingredients' in data:
+            ingredients = data['ingredients']
+            st.write(f"Found {len(ingredients)} ingredients:")
+            for i, ingredient in enumerate(ingredients, 1):
+                st.write(f"{i}. {ingredient}")
+        # Display risks if available
+        if 'risks' in data.get('health_score', {}):
+            st.subheader("Risk Categories")
+            risks = data['health_score']['risks']
+            for category, risk_data in risks.items():
+                with st.expander(f"{category.replace('_', ' ').title()}"):
+                    st.write(f"Found in: {', '.join(risk_data['ingredients'])}")
+    with col3:
+        if 'nutrition_info' in data:
+            st.subheader("Nutrition Info")
+            nutrition = data['nutrition_info']
+            for nutrient, value in nutrition.items():
+                st.metric(nutrient.title(), f"{value}g")
+        if 'nutrition_analysis' in data:
+            analysis = data['nutrition_analysis']
+            if analysis['concerns']:
+                st.subheader("⚠️ Concerns")
+                for concern in analysis['concerns']:
+                    st.write(f"- {concern}")
+            if analysis['positives']:
+                st.subheader("✅ Positives")
+                for positive in analysis['positives']:
+                    st.write(f"- {positive}")
+            if analysis['recommendations']:
+                st.subheader("💡 Recommendations")
+                for rec in analysis['recommendations']:
+                    st.write(f"- {rec}")
+    # Price comparison if available
+    if 'prices' in data:
+        st.subheader("Price Comparison")
+        prices_df = pd.DataFrame(data['prices'])
+        fig = px.bar(
+            prices_df,
+            x='platform',
+            y='price',
+            title="Price Comparison Across Platforms",
+            color='platform'
+        )
+        st.plotly_chart(fig)
+        # Display price table
+        st.dataframe(
+            prices_df[['platform', 'price', 'title', 'url']],
+            column_config={
+                "url": st.column_config.LinkColumn("Link")
+            }
+        )
+if __name__ == "__main__":
+    main()

app/main.py ADDED Viewed

	@@ -0,0 +1,211 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Form
+from fastapi.security import HTTPBasic, HTTPBasicCredentials
+from fastapi.middleware.cors import CORSMiddleware
+import easyocr
+import numpy as np
+from PIL import Image
+import io
+import asyncio
+from typing import Optional, List, Dict
+import json
+import os
+from datetime import datetime
+# Import our utilities
+from utils.scraper import Scraper
+from utils.health_score import HealthScoreCalculator
+from models.database import Database
+app = FastAPI(title="IndiScan API")
+security = HTTPBasic()
+db = Database()
+health_calculator = HealthScoreCalculator()
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+# Initialize OCR reader
+reader = easyocr.Reader(['en'])
+def verify_admin(credentials: HTTPBasicCredentials = Depends(security)):
+    is_admin = db.verify_admin(credentials.username, credentials.password)
+    if not is_admin:
+        raise HTTPException(
+            status_code=401,
+            detail="Invalid credentials",
+            headers={"WWW-Authenticate": "Basic"},
+        )
+    return credentials.username
+@app.post("/scan/barcode")
+async def scan_barcode(barcode: str):
+    """Scan a product by barcode"""
+    # First check our database
+    product = db.get_product(barcode)
+    if product:
+        return product
+    # If not found, scrape from various sources
+    async with Scraper() as scraper:
+        prices = await scraper.get_all_prices(barcode)
+        if not prices:
+            raise HTTPException(status_code=404, detail="Product not found")
+        # Use the first result to get product details
+        first_result = prices[0]
+        product_data = {
+            'barcode': barcode,
+            'name': first_result['title'],
+            'prices': prices,
+            'last_updated': datetime.now().isoformat()
+        }
+        # Add to database
+        db.add_product(product_data)
+        return product_data
+@app.post("/scan/image")
+async def scan_image(file: UploadFile = File(...)):
+    """Scan product image for ingredients"""
+    contents = await file.read()
+    image = Image.open(io.BytesIO(contents))
+    # Convert to numpy array for EasyOCR
+    image_np = np.array(image)
+    # Extract text from image
+    results = reader.readtext(image_np)
+    text = ' '.join([result[1] for result in results])
+    # Extract ingredients and nutrition info
+    async with Scraper() as scraper:
+        ingredients = scraper.extract_ingredients(text)
+        nutrition_info = scraper.extract_nutrition_info(text)
+        # Calculate health score
+        health_score = health_calculator.calculate_score(ingredients)
+        nutrition_analysis = health_calculator.analyze_nutrition(nutrition_info)
+        return {
+            'ingredients': ingredients,
+            'nutrition_info': nutrition_info,
+            'health_score': health_score,
+            'nutrition_analysis': nutrition_analysis
+        }
+@app.post("/analyze/text")
+async def analyze_text(text: str = Form(...), product_type: str = Form("food")):
+    """Analyze product from text description"""
+    async with Scraper() as scraper:
+        ingredients = scraper.extract_ingredients(text)
+        nutrition_info = scraper.extract_nutrition_info(text)
+        health_score = health_calculator.calculate_score(ingredients, product_type)
+        nutrition_analysis = health_calculator.analyze_nutrition(nutrition_info)
+        return {
+            'ingredients': ingredients,
+            'nutrition_info': nutrition_info,
+            'health_score': health_score,
+            'nutrition_analysis': nutrition_analysis
+        }
+@app.get("/products/{barcode}")
+async def get_product(barcode: str):
+    """Get product information by barcode"""
+    product = db.get_product(barcode)
+    if not product:
+        raise HTTPException(status_code=404, detail="Product not found")
+    return product
+@app.post("/products/add")
+async def add_product(
+    barcode: str = Form(...),
+    name: str = Form(...),
+    ingredients: str = Form(...),
+    product_type: str = Form("food"),
+    admin_user: str = Depends(verify_admin)
+):
+    """Add or update product information (admin only)"""
+    try:
+        ingredients_list = json.loads(ingredients)
+    except json.JSONDecodeError:
+        ingredients_list = [i.strip() for i in ingredients.split(',')]
+    product_data = {
+        'barcode': barcode,
+        'name': name,
+        'ingredients': ingredients_list,
+        'product_type': product_type,
+        'added_by': admin_user,
+        'is_verified': True,
+        'last_updated': datetime.now().isoformat()
+    }
+    db.add_product(product_data)
+    return {"message": "Product added successfully"}
+@app.get("/products/update")
+async def update_products(admin_user: str = Depends(verify_admin)):
+    """Update products that haven't been updated in 60 days (admin only)"""
+    products_to_update = db.get_products_for_update()
+    async with Scraper() as scraper:
+        for barcode in products_to_update:
+            try:
+                prices = await scraper.get_all_prices(barcode)
+                if prices:
+                    first_result = prices[0]
+                    product_data = {
+                        'barcode': barcode,
+                        'name': first_result['title'],
+                        'prices': prices,
+                        'last_updated': datetime.now().isoformat()
+                    }
+                    db.add_product(product_data)
+            except Exception as e:
+                print(f"Error updating product {barcode}: {str(e)}")
+                continue
+    return {"message": f"Updated {len(products_to_update)} products"}
+@app.get("/export")
+async def export_data(admin_user: str = Depends(verify_admin)):
+    """Export database to CSV (admin only)"""
+    try:
+        export_dir = "data/exports"
+        os.makedirs(export_dir, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        export_path = f"{export_dir}/export_{timestamp}"
+        db.export_to_csv(export_path)
+        return {"message": f"Data exported to {export_path}"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
+@app.post("/import")
+async def import_data(file: UploadFile = File(...), admin_user: str = Depends(verify_admin)):
+    """Import data from CSV (admin only)"""
+    try:
+        contents = await file.read()
+        import_dir = "data/imports"
+        os.makedirs(import_dir, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        import_path = f"{import_dir}/import_{timestamp}"
+        with open(import_path, 'wb') as f:
+            f.write(contents)
+        db.import_from_csv(import_path)
+        return {"message": "Data imported successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

init_db.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from models.database import Database
+from passlib.hash import pbkdf2_sha256
+import os
+def init_database():
+    # Create data directory if it doesn't exist
+    os.makedirs("data", exist_ok=True)
+    # Initialize database
+    db = Database()
+    # Create admin user
+    admin_username = "admin"
+    admin_password = "admin123"  # Change this in production
+    password_hash = pbkdf2_sha256.hash(admin_password)
+    # Add admin user to database
+    conn = db._get_connection()
+    c = conn.cursor()
+    try:
+        c.execute("""
+            INSERT INTO users (username, password_hash, is_admin)
+            VALUES (?, ?, 1)
+        """, (admin_username, password_hash))
+        conn.commit()
+        print(f"Created admin user: {admin_username}")
+        print("Default password: admin123 (please change this in production)")
+    except Exception as e:
+        print(f"Error creating admin user: {str(e)}")
+    finally:
+        conn.close()
+if __name__ == "__main__":
+    init_database()

models/database.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import sqlite3
+import json
+from datetime import datetime, timedelta
+from typing import List, Optional, Dict
+import pandas as pd
+class Database:
+    def __init__(self, db_path: str = "data/indiscan.db"):
+        self.db_path = db_path
+        self.init_db()
+    def init_db(self):
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        # Create products table
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS products (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            barcode TEXT UNIQUE,
+            name TEXT,
+            brand TEXT,
+            category TEXT,
+            ingredients TEXT,
+            nutrition_info TEXT,
+            health_score INTEGER,
+            last_updated TIMESTAMP,
+            image_url TEXT,
+            product_type TEXT,
+            added_by TEXT,
+            is_verified BOOLEAN DEFAULT 0
+        )
+        ''')
+        # Create ingredients table
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS ingredients (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT UNIQUE,
+            risk_score INTEGER,
+            description TEXT,
+            category TEXT,
+            concerns TEXT
+        )
+        ''')
+        # Create users table for admin control
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS users (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            username TEXT UNIQUE,
+            password_hash TEXT,
+            is_admin BOOLEAN DEFAULT 0
+        )
+        ''')
+        # Create price tracking table
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS price_tracking (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            product_id INTEGER,
+            platform TEXT,
+            price REAL,
+            timestamp TIMESTAMP,
+            url TEXT,
+            FOREIGN KEY (product_id) REFERENCES products (id)
+        )
+        ''')
+        conn.commit()
+        conn.close()
+    def add_product(self, product_data: Dict) -> int:
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        product_data['last_updated'] = datetime.now().isoformat()
+        if 'ingredients' in product_data and isinstance(product_data['ingredients'], list):
+            product_data['ingredients'] = json.dumps(product_data['ingredients'])
+        if 'nutrition_info' in product_data and isinstance(product_data['nutrition_info'], dict):
+            product_data['nutrition_info'] = json.dumps(product_data['nutrition_info'])
+        columns = ', '.join(product_data.keys())
+        placeholders = ', '.join(['?' for _ in product_data])
+        values = tuple(product_data.values())
+        try:
+            c.execute(f"INSERT INTO products ({columns}) VALUES ({placeholders})", values)
+            product_id = c.lastrowid
+            conn.commit()
+            return product_id
+        except sqlite3.IntegrityError:
+            # Update existing product
+            update_cols = ', '.join([f"{k}=?" for k in product_data.keys()])
+            c.execute(f"UPDATE products SET {update_cols} WHERE barcode=?",
+                     (*values, product_data['barcode']))
+            conn.commit()
+            return c.lastrowid
+        finally:
+            conn.close()
+    def get_product(self, barcode: str) -> Optional[Dict]:
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        c.execute("SELECT * FROM products WHERE barcode=?", (barcode,))
+        result = c.fetchone()
+        if result:
+            columns = [description[0] for description in c.description]
+            product = dict(zip(columns, result))
+            # Parse JSON strings back to Python objects
+            if product['ingredients']:
+                product['ingredients'] = json.loads(product['ingredients'])
+            if product['nutrition_info']:
+                product['nutrition_info'] = json.loads(product['nutrition_info'])
+            conn.close()
+            return product
+        conn.close()
+        return None
+    def update_prices(self, product_id: int, prices: List[Dict]):
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        timestamp = datetime.now().isoformat()
+        for price_data in prices:
+            c.execute("""
+                INSERT INTO price_tracking (product_id, platform, price, timestamp, url)
+                VALUES (?, ?, ?, ?, ?)
+            """, (product_id, price_data['platform'], price_data['price'], timestamp, price_data['url']))
+        conn.commit()
+        conn.close()
+    def get_products_for_update(self) -> List[str]:
+        """Get products that haven't been updated in 60 days"""
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        sixty_days_ago = (datetime.now() - timedelta(days=60)).isoformat()
+        c.execute("""
+            SELECT barcode FROM products
+            WHERE last_updated < ? OR last_updated IS NULL
+        """, (sixty_days_ago,))
+        barcodes = [row[0] for row in c.fetchall()]
+        conn.close()
+        return barcodes
+    def export_to_csv(self, filepath: str):
+        """Export the database to CSV files"""
+        conn = sqlite3.connect(self.db_path)
+        # Export products
+        pd.read_sql_query("SELECT * FROM products", conn).to_csv(f"{filepath}/products.csv", index=False)
+        # Export ingredients
+        pd.read_sql_query("SELECT * FROM ingredients", conn).to_csv(f"{filepath}/ingredients.csv", index=False)
+        # Export price tracking
+        pd.read_sql_query("SELECT * FROM price_tracking", conn).to_csv(f"{filepath}/price_tracking.csv", index=False)
+        conn.close()
+    def import_from_csv(self, filepath: str):
+        """Import data from CSV files"""
+        conn = sqlite3.connect(self.db_path)
+        # Import products
+        products_df = pd.read_csv(f"{filepath}/products.csv")
+        products_df.to_sql('products', conn, if_exists='append', index=False)
+        # Import ingredients
+        ingredients_df = pd.read_csv(f"{filepath}/ingredients.csv")
+        ingredients_df.to_sql('ingredients', conn, if_exists='append', index=False)
+        # Import price tracking
+        price_df = pd.read_csv(f"{filepath}/price_tracking.csv")
+        price_df.to_sql('price_tracking', conn, if_exists='append', index=False)
+        conn.commit()
+        conn.close()
+    def verify_admin(self, username: str, password_hash: str) -> bool:
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        c.execute("SELECT is_admin FROM users WHERE username=? AND password_hash=?",
+                 (username, password_hash))
+        result = c.fetchone()
+        conn.close()
+        return bool(result and result[0])

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+fastapi==0.109.0
+uvicorn==0.27.0
+streamlit==1.31.0
+pandas==2.2.0
+plotly==5.18.0
+pillow==10.2.0
+easyocr==1.7.1
+beautifulsoup4==4.12.3
+aiohttp==3.9.1
+python-multipart==0.0.6
+python-jose==3.3.0
+passlib==1.7.4
+python-dotenv==1.0.0
+requests==2.31.0
+numpy==1.26.3

utils/health_score.py ADDED Viewed

	@@ -0,0 +1,218 @@

+from typing import List, Dict, Optional
+import json
+import re
+class HealthScoreCalculator:
+    def __init__(self):
+        # Initialize risk categories and their weights
+        self.risk_categories = {
+            'preservatives': {
+                'weight': 2.0,
+                'ingredients': [
+                    'sodium benzoate', 'potassium sorbate', 'sulfites', 'nitrites',
+                    'BHA', 'BHT', 'propylene glycol', 'sodium nitrate'
+                ]
+            },
+            'artificial_colors': {
+                'weight': 1.5,
+                'ingredients': [
+                    'red 40', 'yellow 5', 'yellow 6', 'blue 1', 'blue 2',
+                    'green 3', 'tartrazine', 'brilliant blue'
+                ]
+            },
+            'artificial_sweeteners': {
+                'weight': 1.8,
+                'ingredients': [
+                    'aspartame', 'sucralose', 'saccharin', 'acesulfame k',
+                    'neotame', 'advantame'
+                ]
+            },
+            'harmful_chemicals': {
+                'weight': 2.5,
+                'ingredients': [
+                    'parabens', 'phthalates', 'formaldehyde', 'toluene',
+                    'triclosan', 'lead acetate', 'petroleum'
+                ]
+            }
+        }
+        # Cosmetic-specific categories
+        self.cosmetic_categories = {
+            'irritants': {
+                'weight': 1.7,
+                'ingredients': [
+                    'sodium lauryl sulfate', 'alcohol denat', 'isopropyl alcohol',
+                    'fragrance', 'essential oils'
+                ]
+            },
+            'comedogenic': {
+                'weight': 1.3,
+                'ingredients': [
+                    'coconut oil', 'cocoa butter', 'isopropyl myristate',
+                    'sodium chloride', 'laureth-4'
+                ]
+            }
+        }
+        # Food-specific categories
+        self.food_categories = {
+            'trans_fats': {
+                'weight': 2.2,
+                'ingredients': [
+                    'partially hydrogenated', 'hydrogenated oil',
+                    'shortening', 'margarine'
+                ]
+            },
+            'added_sugars': {
+                'weight': 1.6,
+                'ingredients': [
+                    'high fructose corn syrup', 'corn syrup', 'dextrose',
+                    'maltose', 'sucrose', 'cane sugar', 'brown sugar'
+                ]
+            }
+        }
+    def calculate_ingredient_position_weight(self, position: int, total_ingredients: int) -> float:
+        """Calculate weight based on ingredient position (earlier ingredients have higher weight)"""
+        if total_ingredients == 0:
+            return 1.0
+        return 1.0 + (1.0 - (position / total_ingredients))
+    def identify_risks(self, ingredient: str) -> List[str]:
+        """Identify all risk categories an ingredient belongs to"""
+        risks = []
+        ingredient = ingredient.lower()
+        # Check all categories
+        all_categories = {
+            **self.risk_categories,
+            **self.cosmetic_categories,
+            **self.food_categories
+        }
+        for category, data in all_categories.items():
+            if any(risk_ingredient in ingredient for risk_ingredient in data['ingredients']):
+                risks.append(category)
+        return risks
+    def calculate_score(self, ingredients: List[str], product_type: str = 'food') -> Dict:
+        """Calculate health score and provide risk breakdown"""
+        if not ingredients:
+            return {
+                'score': 500,  # Neutral score if no ingredients
+                'risks': {},
+                'explanation': "No ingredients provided for analysis"
+            }
+        total_ingredients = len(ingredients)
+        risk_points = 0
+        risk_breakdown = {}
+        # Select relevant categories based on product type
+        categories = {**self.risk_categories}
+        if product_type.lower() == 'food':
+            categories.update(self.food_categories)
+        else:
+            categories.update(self.cosmetic_categories)
+        # Analyze each ingredient
+        for position, ingredient in enumerate(ingredients):
+            position_weight = self.calculate_ingredient_position_weight(position, total_ingredients)
+            risks = self.identify_risks(ingredient)
+            for risk in risks:
+                if risk in categories:
+                    risk_weight = categories[risk]['weight']
+                    risk_value = risk_weight * position_weight
+                    risk_points += risk_value
+                    if risk not in risk_breakdown:
+                        risk_breakdown[risk] = {
+                            'ingredients': [],
+                            'total_risk': 0
+                        }
+                    risk_breakdown[risk]['ingredients'].append(ingredient)
+                    risk_breakdown[risk]['total_risk'] += risk_value
+        # Calculate final score (1000 = perfectly healthy, 0 = maximum risk)
+        base_score = 1000
+        risk_multiplier = 100  # Adjust this to control how quickly score decreases
+        final_score = max(0, min(1000, base_score - (risk_points * risk_multiplier)))
+        # Generate explanation
+        explanation = self._generate_explanation(risk_breakdown, final_score)
+        return {
+            'score': int(final_score),
+            'risks': risk_breakdown,
+            'explanation': explanation
+        }
+    def _generate_explanation(self, risk_breakdown: Dict, score: float) -> str:
+        """Generate a human-readable explanation of the health score"""
+        if score > 800:
+            status = "very healthy"
+        elif score > 600:
+            status = "moderately healthy"
+        elif score > 400:
+            status = "moderate risk"
+        elif score > 200:
+            status = "high risk"
+        else:
+            status = "very high risk"
+        explanation = f"This product is considered {status} with a score of {int(score)}."
+        if risk_breakdown:
+            explanation += "\n\nKey concerns:"
+            for risk, data in risk_breakdown.items():
+                risk_name = risk.replace('_', ' ').title()
+                ingredients = ', '.join(data['ingredients'])
+                explanation += f"\n- {risk_name}: Found {len(data['ingredients'])} concerning ingredient(s): {ingredients}"
+        else:
+            explanation += "\n\nNo specific risk factors identified in the ingredients list."
+        return explanation
+    def analyze_nutrition(self, nutrition_info: Dict) -> Dict:
+        """Analyze nutrition information and provide recommendations"""
+        analysis = {
+            'concerns': [],
+            'positives': [],
+            'recommendations': []
+        }
+        # Check calories
+        if 'calories' in nutrition_info:
+            if nutrition_info['calories'] > 400:
+                analysis['concerns'].append("High calorie content")
+            elif nutrition_info['calories'] < 50:
+                analysis['positives'].append("Low calorie content")
+        # Check protein
+        if 'protein' in nutrition_info:
+            if nutrition_info['protein'] > 15:
+                analysis['positives'].append("Good source of protein")
+            elif nutrition_info['protein'] < 5:
+                analysis['recommendations'].append("Consider options with more protein")
+        # Check fat
+        if 'fat' in nutrition_info:
+            if nutrition_info['fat'] > 15:
+                analysis['concerns'].append("High fat content")
+        # Check sugar
+        if 'sugar' in nutrition_info:
+            if nutrition_info['sugar'] > 10:
+                analysis['concerns'].append("High sugar content")
+                analysis['recommendations'].append("Look for options with less sugar")
+        # Check fiber
+        if 'fiber' in nutrition_info:
+            if nutrition_info['fiber'] > 5:
+                analysis['positives'].append("Good source of fiber")
+            elif nutrition_info['fiber'] < 2:
+                analysis['recommendations'].append("Consider options with more fiber")
+        return analysis

utils/scraper.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import aiohttp
+import asyncio
+from bs4 import BeautifulSoup
+import json
+from typing import Dict, List, Optional
+import re
+from urllib.parse import quote_plus
+class Scraper:
+    def __init__(self):
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        self.session = None
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession(headers=self.headers)
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.close()
+    async def search_amazon(self, query: str) -> List[Dict]:
+        """Search Amazon India for products"""
+        url = f"https://www.amazon.in/s?k={quote_plus(query)}"
+        async with self.session.get(url) as response:
+            if response.status == 200:
+                html = await response.text()
+                soup = BeautifulSoup(html, 'html.parser')
+                products = []
+                for item in soup.select('.s-result-item[data-asin]'):
+                    try:
+                        title = item.select_one('.a-text-normal')
+                        price = item.select_one('.a-price-whole')
+                        url = item.select_one('a.a-link-normal')
+                        if title and price and url:
+                            products.append({
+                                'platform': 'amazon',
+                                'title': title.text.strip(),
+                                'price': float(price.text.replace(',', '')),
+                                'url': 'https://www.amazon.in' + url['href']
+                            })
+                    except Exception:
+                        continue
+                return products[:5]  # Return top 5 results
+        return []
+    async def search_blinkit(self, query: str) -> List[Dict]:
+        """Search Blinkit for products"""
+        url = f"https://blinkit.com/v2/search?q={quote_plus(query)}"
+        async with self.session.get(url) as response:
+            if response.status == 200:
+                try:
+                    data = await response.json()
+                    products = []
+                    for item in data.get('products', [])[:5]:
+                        products.append({
+                            'platform': 'blinkit',
+                            'title': item.get('name', ''),
+                            'price': float(item.get('price', 0)),
+                            'url': f"https://blinkit.com/products/{item.get('slug', '')}"
+                        })
+                    return products
+                except Exception:
+                    return []
+        return []
+    async def search_zepto(self, query: str) -> List[Dict]:
+        """Search Zepto for products"""
+        url = f"https://www.zeptonow.com/api/search?q={quote_plus(query)}"
+        async with self.session.get(url) as response:
+            if response.status == 200:
+                try:
+                    data = await response.json()
+                    products = []
+                    for item in data.get('products', [])[:5]:
+                        products.append({
+                            'platform': 'zepto',
+                            'title': item.get('name', ''),
+                            'price': float(item.get('mrp', 0)),
+                            'url': f"https://www.zeptonow.com/product/{item.get('slug', '')}"
+                        })
+                    return products
+                except Exception:
+                    return []
+        return []
+    async def search_swiggy_instamart(self, query: str) -> List[Dict]:
+        """Search Swiggy Instamart for products"""
+        url = f"https://www.swiggy.com/api/instamart/search?q={quote_plus(query)}"
+        async with self.session.get(url) as response:
+            if response.status == 200:
+                try:
+                    data = await response.json()
+                    products = []
+                    for item in data.get('data', {}).get('products', [])[:5]:
+                        products.append({
+                            'platform': 'swiggy_instamart',
+                            'title': item.get('name', ''),
+                            'price': float(item.get('price', 0)),
+                            'url': f"https://www.swiggy.com/instamart/product/{item.get('id', '')}"
+                        })
+                    return products
+                except Exception:
+                    return []
+        return []
+    def extract_ingredients(self, text: str) -> List[str]:
+        """Extract ingredients from product description text"""
+        # Common ingredient list markers
+        markers = [
+            r"ingredients?[:|\s]+(.*?)(?=\.|$)",
+            r"contains?[:|\s]+(.*?)(?=\.|$)",
+            r"composition?[:|\s]+(.*?)(?=\.|$)"
+        ]
+        for marker in markers:
+            match = re.search(marker, text, re.IGNORECASE)
+            if match:
+                ingredients_text = match.group(1)
+                # Split by common separators
+                ingredients = re.split(r'[,;]|\sand\s', ingredients_text)
+                # Clean up each ingredient
+                return [ing.strip() for ing in ingredients if ing.strip()]
+        return []
+    def extract_nutrition_info(self, text: str) -> Dict:
+        """Extract nutrition information from product description text"""
+        nutrition_info = {}
+        # Common nutrition patterns
+        patterns = {
+            'calories': r'(\d+)\s*(?:kcal|calories)',
+            'protein': r'protein\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
+            'carbohydrates': r'carbohydrates?\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
+            'fat': r'fat\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
+            'sugar': r'sugar\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
+            'fiber': r'fiber\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g'
+        }
+        for nutrient, pattern in patterns.items():
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                try:
+                    nutrition_info[nutrient] = float(match.group(1))
+                except ValueError:
+                    continue
+        return nutrition_info
+    async def get_all_prices(self, query: str) -> List[Dict]:
+        """Get prices from all supported platforms"""
+        tasks = [
+            self.search_amazon(query),
+            self.search_blinkit(query),
+            self.search_zepto(query),
+            self.search_swiggy_instamart(query)
+        ]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        all_prices = []
+        for result in results:
+            if isinstance(result, list):
+                all_prices.extend(result)
+        return all_prices