Upload 9 files
Browse files- README.md +86 -12
- app.py +23 -0
- app/frontend.py +212 -0
- app/main.py +211 -0
- init_db.py +35 -0
- models/database.py +199 -0
- requirements.txt +15 -0
- utils/health_score.py +218 -0
- utils/scraper.py +178 -0
README.md
CHANGED
|
@@ -1,12 +1,86 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IndiScan: Indian Product Health Analyzer 🔍
|
| 2 |
+
|
| 3 |
+
IndiScan is a comprehensive health analysis tool that helps users make informed decisions about food and cosmetic products by analyzing ingredients, providing health scores, and comparing prices across Indian e-commerce platforms.
|
| 4 |
+
|
| 5 |
+
## Features 🌟
|
| 6 |
+
|
| 7 |
+
- **Smart Product Analysis**
|
| 8 |
+
- Barcode scanning
|
| 9 |
+
- Image-based ingredient extraction
|
| 10 |
+
- Manual ingredient entry
|
| 11 |
+
- Health score calculation (0-1000)
|
| 12 |
+
- Ingredient risk assessment
|
| 13 |
+
- Nutrition information analysis
|
| 14 |
+
|
| 15 |
+
- **Price Comparison**
|
| 16 |
+
- Real-time price tracking across:
|
| 17 |
+
- Amazon India
|
| 18 |
+
- Blinkit
|
| 19 |
+
- Zepto
|
| 20 |
+
- Swiggy Instamart
|
| 21 |
+
|
| 22 |
+
- **Admin Controls**
|
| 23 |
+
- Product database management
|
| 24 |
+
- CSV import/export
|
| 25 |
+
- 60-day auto-refresh system
|
| 26 |
+
|
| 27 |
+
## Setup 🛠️
|
| 28 |
+
|
| 29 |
+
1. Install dependencies:
|
| 30 |
+
```bash
|
| 31 |
+
pip install -r requirements.txt
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
2. Run the application:
|
| 35 |
+
```bash
|
| 36 |
+
python app.py
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
The application will start both the backend API (port 8000) and the Streamlit frontend.
|
| 40 |
+
|
| 41 |
+
## Usage 📱
|
| 42 |
+
|
| 43 |
+
1. **Scan Products**
|
| 44 |
+
- Enter a barcode number
|
| 45 |
+
- Upload a product image
|
| 46 |
+
- Manually enter ingredients
|
| 47 |
+
|
| 48 |
+
2. **View Analysis**
|
| 49 |
+
- Health score and explanation
|
| 50 |
+
- Ingredient breakdown
|
| 51 |
+
- Risk categories
|
| 52 |
+
- Nutrition information
|
| 53 |
+
- Price comparison
|
| 54 |
+
|
| 55 |
+
3. **Admin Features**
|
| 56 |
+
- Login with admin credentials
|
| 57 |
+
- Add/update product information
|
| 58 |
+
- Export/import database
|
| 59 |
+
- Monitor data freshness
|
| 60 |
+
|
| 61 |
+
## Technology Stack 💻
|
| 62 |
+
|
| 63 |
+
- **Backend**: FastAPI
|
| 64 |
+
- **Frontend**: Streamlit
|
| 65 |
+
- **Database**: SQLite
|
| 66 |
+
- **Image Processing**: EasyOCR
|
| 67 |
+
- **Data Analysis**: Pandas, Plotly
|
| 68 |
+
- **Web Scraping**: aiohttp, BeautifulSoup4
|
| 69 |
+
|
| 70 |
+
## Contributing 🤝
|
| 71 |
+
|
| 72 |
+
Feel free to contribute to this project by:
|
| 73 |
+
1. Forking the repository
|
| 74 |
+
2. Creating a feature branch
|
| 75 |
+
3. Committing your changes
|
| 76 |
+
4. Opening a pull request
|
| 77 |
+
|
| 78 |
+
## License 📄
|
| 79 |
+
|
| 80 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 81 |
+
|
| 82 |
+
## Acknowledgments 🙏
|
| 83 |
+
|
| 84 |
+
- Inspired by the Yuka app
|
| 85 |
+
- Uses OpenFoodFacts data
|
| 86 |
+
- Built with ❤️ for Indian consumers
|
app.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
+
from multiprocessing import Process
|
| 5 |
+
|
| 6 |
+
def run_backend():
|
| 7 |
+
from app.main import app
|
| 8 |
+
import uvicorn
|
| 9 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 10 |
+
|
| 11 |
+
def run_frontend():
|
| 12 |
+
subprocess.run([sys.executable, "app/frontend.py"])
|
| 13 |
+
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
# Start backend in a separate process
|
| 16 |
+
backend_process = Process(target=run_backend)
|
| 17 |
+
backend_process.start()
|
| 18 |
+
|
| 19 |
+
# Run frontend in the main process
|
| 20 |
+
run_frontend()
|
| 21 |
+
|
| 22 |
+
# Wait for backend to finish
|
| 23 |
+
backend_process.join()
|
app/frontend.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import io
|
| 6 |
+
import base64
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import plotly.express as px
|
| 10 |
+
|
| 11 |
+
# Configure the app
|
| 12 |
+
st.set_page_config(
|
| 13 |
+
page_title="IndiScan - Product Health Analyzer",
|
| 14 |
+
page_icon="🔍",
|
| 15 |
+
layout="wide"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# API endpoint
|
| 19 |
+
API_URL = "http://localhost:8000" # Change this when deploying
|
| 20 |
+
|
| 21 |
+
def main():
|
| 22 |
+
# Sidebar
|
| 23 |
+
st.sidebar.title("IndiScan 🔍")
|
| 24 |
+
scan_option = st.sidebar.radio(
|
| 25 |
+
"Choose scan method:",
|
| 26 |
+
["Barcode", "Image Upload", "Manual Entry"]
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Main content
|
| 30 |
+
st.title("IndiScan - Product Health Analyzer")
|
| 31 |
+
st.markdown("""
|
| 32 |
+
Analyze food and cosmetic products for health risks and get detailed insights.
|
| 33 |
+
Upload an image, enter a barcode, or manually input product details.
|
| 34 |
+
""")
|
| 35 |
+
|
| 36 |
+
# Admin section in sidebar
|
| 37 |
+
with st.sidebar.expander("Admin Controls 🔐"):
|
| 38 |
+
admin_username = st.text_input("Username")
|
| 39 |
+
admin_password = st.text_input("Password", type="password")
|
| 40 |
+
if st.button("Login"):
|
| 41 |
+
try:
|
| 42 |
+
auth = (admin_username, admin_password)
|
| 43 |
+
response = requests.get(f"{API_URL}/export", auth=auth)
|
| 44 |
+
if response.status_code == 200:
|
| 45 |
+
st.sidebar.success("Logged in as admin")
|
| 46 |
+
st.session_state['admin_auth'] = auth
|
| 47 |
+
else:
|
| 48 |
+
st.sidebar.error("Invalid credentials")
|
| 49 |
+
except Exception as e:
|
| 50 |
+
st.sidebar.error(f"Login failed: {str(e)}")
|
| 51 |
+
|
| 52 |
+
# Main content based on selected option
|
| 53 |
+
if scan_option == "Barcode":
|
| 54 |
+
barcode_scanner()
|
| 55 |
+
elif scan_option == "Image Upload":
|
| 56 |
+
image_scanner()
|
| 57 |
+
else:
|
| 58 |
+
manual_entry()
|
| 59 |
+
|
| 60 |
+
def barcode_scanner():
|
| 61 |
+
st.header("Barcode Scanner 📱")
|
| 62 |
+
barcode = st.text_input("Enter barcode number:")
|
| 63 |
+
|
| 64 |
+
if barcode:
|
| 65 |
+
try:
|
| 66 |
+
response = requests.post(f"{API_URL}/scan/barcode", params={"barcode": barcode})
|
| 67 |
+
if response.status_code == 200:
|
| 68 |
+
display_results(response.json())
|
| 69 |
+
else:
|
| 70 |
+
st.error("Product not found")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
st.error(f"Error: {str(e)}")
|
| 73 |
+
|
| 74 |
+
def image_scanner():
|
| 75 |
+
st.header("Image Scanner 📸")
|
| 76 |
+
uploaded_file = st.file_uploader("Upload product image", type=["jpg", "jpeg", "png"])
|
| 77 |
+
|
| 78 |
+
if uploaded_file:
|
| 79 |
+
try:
|
| 80 |
+
# Display uploaded image
|
| 81 |
+
image = Image.open(uploaded_file)
|
| 82 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
| 83 |
+
|
| 84 |
+
# Process image
|
| 85 |
+
files = {"file": uploaded_file}
|
| 86 |
+
response = requests.post(f"{API_URL}/scan/image", files=files)
|
| 87 |
+
|
| 88 |
+
if response.status_code == 200:
|
| 89 |
+
display_results(response.json())
|
| 90 |
+
else:
|
| 91 |
+
st.error("Failed to process image")
|
| 92 |
+
except Exception as e:
|
| 93 |
+
st.error(f"Error: {str(e)}")
|
| 94 |
+
|
| 95 |
+
def manual_entry():
|
| 96 |
+
st.header("Manual Entry ✍️")
|
| 97 |
+
|
| 98 |
+
col1, col2 = st.columns(2)
|
| 99 |
+
|
| 100 |
+
with col1:
|
| 101 |
+
product_type = st.selectbox("Product Type", ["Food", "Cosmetic"])
|
| 102 |
+
ingredients_text = st.text_area("Enter ingredients list (comma-separated or as shown on package):")
|
| 103 |
+
|
| 104 |
+
with col2:
|
| 105 |
+
if ingredients_text:
|
| 106 |
+
try:
|
| 107 |
+
response = requests.post(
|
| 108 |
+
f"{API_URL}/analyze/text",
|
| 109 |
+
data={
|
| 110 |
+
"text": ingredients_text,
|
| 111 |
+
"product_type": product_type.lower()
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
if response.status_code == 200:
|
| 116 |
+
display_results(response.json())
|
| 117 |
+
else:
|
| 118 |
+
st.error("Failed to analyze ingredients")
|
| 119 |
+
except Exception as e:
|
| 120 |
+
st.error(f"Error: {str(e)}")
|
| 121 |
+
|
| 122 |
+
def display_results(data):
|
| 123 |
+
# Create three columns for different aspects of the analysis
|
| 124 |
+
col1, col2, col3 = st.columns([2, 2, 1])
|
| 125 |
+
|
| 126 |
+
with col1:
|
| 127 |
+
st.subheader("Health Score")
|
| 128 |
+
score = data.get('health_score', {}).get('score', 0)
|
| 129 |
+
|
| 130 |
+
# Create a gauge chart using plotly
|
| 131 |
+
fig = px.pie(
|
| 132 |
+
values=[score, 1000-score],
|
| 133 |
+
names=['Score', 'Remaining'],
|
| 134 |
+
hole=0.7,
|
| 135 |
+
color_discrete_sequence=['#00ff00' if score > 600 else '#ff0000', '#eee']
|
| 136 |
+
)
|
| 137 |
+
fig.update_layout(
|
| 138 |
+
annotations=[dict(text=f"{score}/1000", x=0.5, y=0.5, font_size=20, showarrow=False)],
|
| 139 |
+
showlegend=False,
|
| 140 |
+
width=300,
|
| 141 |
+
height=300
|
| 142 |
+
)
|
| 143 |
+
st.plotly_chart(fig)
|
| 144 |
+
|
| 145 |
+
# Display explanation
|
| 146 |
+
if 'explanation' in data.get('health_score', {}):
|
| 147 |
+
st.markdown(data['health_score']['explanation'])
|
| 148 |
+
|
| 149 |
+
with col2:
|
| 150 |
+
st.subheader("Ingredients Analysis")
|
| 151 |
+
if 'ingredients' in data:
|
| 152 |
+
ingredients = data['ingredients']
|
| 153 |
+
st.write(f"Found {len(ingredients)} ingredients:")
|
| 154 |
+
for i, ingredient in enumerate(ingredients, 1):
|
| 155 |
+
st.write(f"{i}. {ingredient}")
|
| 156 |
+
|
| 157 |
+
# Display risks if available
|
| 158 |
+
if 'risks' in data.get('health_score', {}):
|
| 159 |
+
st.subheader("Risk Categories")
|
| 160 |
+
risks = data['health_score']['risks']
|
| 161 |
+
for category, risk_data in risks.items():
|
| 162 |
+
with st.expander(f"{category.replace('_', ' ').title()}"):
|
| 163 |
+
st.write(f"Found in: {', '.join(risk_data['ingredients'])}")
|
| 164 |
+
|
| 165 |
+
with col3:
|
| 166 |
+
if 'nutrition_info' in data:
|
| 167 |
+
st.subheader("Nutrition Info")
|
| 168 |
+
nutrition = data['nutrition_info']
|
| 169 |
+
for nutrient, value in nutrition.items():
|
| 170 |
+
st.metric(nutrient.title(), f"{value}g")
|
| 171 |
+
|
| 172 |
+
if 'nutrition_analysis' in data:
|
| 173 |
+
analysis = data['nutrition_analysis']
|
| 174 |
+
|
| 175 |
+
if analysis['concerns']:
|
| 176 |
+
st.subheader("⚠️ Concerns")
|
| 177 |
+
for concern in analysis['concerns']:
|
| 178 |
+
st.write(f"- {concern}")
|
| 179 |
+
|
| 180 |
+
if analysis['positives']:
|
| 181 |
+
st.subheader("✅ Positives")
|
| 182 |
+
for positive in analysis['positives']:
|
| 183 |
+
st.write(f"- {positive}")
|
| 184 |
+
|
| 185 |
+
if analysis['recommendations']:
|
| 186 |
+
st.subheader("💡 Recommendations")
|
| 187 |
+
for rec in analysis['recommendations']:
|
| 188 |
+
st.write(f"- {rec}")
|
| 189 |
+
|
| 190 |
+
# Price comparison if available
|
| 191 |
+
if 'prices' in data:
|
| 192 |
+
st.subheader("Price Comparison")
|
| 193 |
+
prices_df = pd.DataFrame(data['prices'])
|
| 194 |
+
fig = px.bar(
|
| 195 |
+
prices_df,
|
| 196 |
+
x='platform',
|
| 197 |
+
y='price',
|
| 198 |
+
title="Price Comparison Across Platforms",
|
| 199 |
+
color='platform'
|
| 200 |
+
)
|
| 201 |
+
st.plotly_chart(fig)
|
| 202 |
+
|
| 203 |
+
# Display price table
|
| 204 |
+
st.dataframe(
|
| 205 |
+
prices_df[['platform', 'price', 'title', 'url']],
|
| 206 |
+
column_config={
|
| 207 |
+
"url": st.column_config.LinkColumn("Link")
|
| 208 |
+
}
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
if __name__ == "__main__":
|
| 212 |
+
main()
|
app/main.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Form
|
| 2 |
+
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
import easyocr
|
| 5 |
+
import numpy as np
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import io
|
| 8 |
+
import asyncio
|
| 9 |
+
from typing import Optional, List, Dict
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
|
| 14 |
+
# Import our utilities
|
| 15 |
+
from utils.scraper import Scraper
|
| 16 |
+
from utils.health_score import HealthScoreCalculator
|
| 17 |
+
from models.database import Database
|
| 18 |
+
|
| 19 |
+
app = FastAPI(title="IndiScan API")
|
| 20 |
+
security = HTTPBasic()
|
| 21 |
+
db = Database()
|
| 22 |
+
health_calculator = HealthScoreCalculator()
|
| 23 |
+
|
| 24 |
+
# Configure CORS
|
| 25 |
+
app.add_middleware(
|
| 26 |
+
CORSMiddleware,
|
| 27 |
+
allow_origins=["*"], # Allows all origins
|
| 28 |
+
allow_credentials=True,
|
| 29 |
+
allow_methods=["*"], # Allows all methods
|
| 30 |
+
allow_headers=["*"], # Allows all headers
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Initialize OCR reader
|
| 34 |
+
reader = easyocr.Reader(['en'])
|
| 35 |
+
|
| 36 |
+
def verify_admin(credentials: HTTPBasicCredentials = Depends(security)):
|
| 37 |
+
is_admin = db.verify_admin(credentials.username, credentials.password)
|
| 38 |
+
if not is_admin:
|
| 39 |
+
raise HTTPException(
|
| 40 |
+
status_code=401,
|
| 41 |
+
detail="Invalid credentials",
|
| 42 |
+
headers={"WWW-Authenticate": "Basic"},
|
| 43 |
+
)
|
| 44 |
+
return credentials.username
|
| 45 |
+
|
| 46 |
+
@app.post("/scan/barcode")
|
| 47 |
+
async def scan_barcode(barcode: str):
|
| 48 |
+
"""Scan a product by barcode"""
|
| 49 |
+
# First check our database
|
| 50 |
+
product = db.get_product(barcode)
|
| 51 |
+
if product:
|
| 52 |
+
return product
|
| 53 |
+
|
| 54 |
+
# If not found, scrape from various sources
|
| 55 |
+
async with Scraper() as scraper:
|
| 56 |
+
prices = await scraper.get_all_prices(barcode)
|
| 57 |
+
if not prices:
|
| 58 |
+
raise HTTPException(status_code=404, detail="Product not found")
|
| 59 |
+
|
| 60 |
+
# Use the first result to get product details
|
| 61 |
+
first_result = prices[0]
|
| 62 |
+
product_data = {
|
| 63 |
+
'barcode': barcode,
|
| 64 |
+
'name': first_result['title'],
|
| 65 |
+
'prices': prices,
|
| 66 |
+
'last_updated': datetime.now().isoformat()
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
# Add to database
|
| 70 |
+
db.add_product(product_data)
|
| 71 |
+
return product_data
|
| 72 |
+
|
| 73 |
+
@app.post("/scan/image")
|
| 74 |
+
async def scan_image(file: UploadFile = File(...)):
|
| 75 |
+
"""Scan product image for ingredients"""
|
| 76 |
+
contents = await file.read()
|
| 77 |
+
image = Image.open(io.BytesIO(contents))
|
| 78 |
+
|
| 79 |
+
# Convert to numpy array for EasyOCR
|
| 80 |
+
image_np = np.array(image)
|
| 81 |
+
|
| 82 |
+
# Extract text from image
|
| 83 |
+
results = reader.readtext(image_np)
|
| 84 |
+
text = ' '.join([result[1] for result in results])
|
| 85 |
+
|
| 86 |
+
# Extract ingredients and nutrition info
|
| 87 |
+
async with Scraper() as scraper:
|
| 88 |
+
ingredients = scraper.extract_ingredients(text)
|
| 89 |
+
nutrition_info = scraper.extract_nutrition_info(text)
|
| 90 |
+
|
| 91 |
+
# Calculate health score
|
| 92 |
+
health_score = health_calculator.calculate_score(ingredients)
|
| 93 |
+
nutrition_analysis = health_calculator.analyze_nutrition(nutrition_info)
|
| 94 |
+
|
| 95 |
+
return {
|
| 96 |
+
'ingredients': ingredients,
|
| 97 |
+
'nutrition_info': nutrition_info,
|
| 98 |
+
'health_score': health_score,
|
| 99 |
+
'nutrition_analysis': nutrition_analysis
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
@app.post("/analyze/text")
|
| 103 |
+
async def analyze_text(text: str = Form(...), product_type: str = Form("food")):
|
| 104 |
+
"""Analyze product from text description"""
|
| 105 |
+
async with Scraper() as scraper:
|
| 106 |
+
ingredients = scraper.extract_ingredients(text)
|
| 107 |
+
nutrition_info = scraper.extract_nutrition_info(text)
|
| 108 |
+
|
| 109 |
+
health_score = health_calculator.calculate_score(ingredients, product_type)
|
| 110 |
+
nutrition_analysis = health_calculator.analyze_nutrition(nutrition_info)
|
| 111 |
+
|
| 112 |
+
return {
|
| 113 |
+
'ingredients': ingredients,
|
| 114 |
+
'nutrition_info': nutrition_info,
|
| 115 |
+
'health_score': health_score,
|
| 116 |
+
'nutrition_analysis': nutrition_analysis
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
@app.get("/products/{barcode}")
|
| 120 |
+
async def get_product(barcode: str):
|
| 121 |
+
"""Get product information by barcode"""
|
| 122 |
+
product = db.get_product(barcode)
|
| 123 |
+
if not product:
|
| 124 |
+
raise HTTPException(status_code=404, detail="Product not found")
|
| 125 |
+
return product
|
| 126 |
+
|
| 127 |
+
@app.post("/products/add")
|
| 128 |
+
async def add_product(
|
| 129 |
+
barcode: str = Form(...),
|
| 130 |
+
name: str = Form(...),
|
| 131 |
+
ingredients: str = Form(...),
|
| 132 |
+
product_type: str = Form("food"),
|
| 133 |
+
admin_user: str = Depends(verify_admin)
|
| 134 |
+
):
|
| 135 |
+
"""Add or update product information (admin only)"""
|
| 136 |
+
try:
|
| 137 |
+
ingredients_list = json.loads(ingredients)
|
| 138 |
+
except json.JSONDecodeError:
|
| 139 |
+
ingredients_list = [i.strip() for i in ingredients.split(',')]
|
| 140 |
+
|
| 141 |
+
product_data = {
|
| 142 |
+
'barcode': barcode,
|
| 143 |
+
'name': name,
|
| 144 |
+
'ingredients': ingredients_list,
|
| 145 |
+
'product_type': product_type,
|
| 146 |
+
'added_by': admin_user,
|
| 147 |
+
'is_verified': True,
|
| 148 |
+
'last_updated': datetime.now().isoformat()
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
db.add_product(product_data)
|
| 152 |
+
return {"message": "Product added successfully"}
|
| 153 |
+
|
| 154 |
+
@app.get("/products/update")
|
| 155 |
+
async def update_products(admin_user: str = Depends(verify_admin)):
|
| 156 |
+
"""Update products that haven't been updated in 60 days (admin only)"""
|
| 157 |
+
products_to_update = db.get_products_for_update()
|
| 158 |
+
|
| 159 |
+
async with Scraper() as scraper:
|
| 160 |
+
for barcode in products_to_update:
|
| 161 |
+
try:
|
| 162 |
+
prices = await scraper.get_all_prices(barcode)
|
| 163 |
+
if prices:
|
| 164 |
+
first_result = prices[0]
|
| 165 |
+
product_data = {
|
| 166 |
+
'barcode': barcode,
|
| 167 |
+
'name': first_result['title'],
|
| 168 |
+
'prices': prices,
|
| 169 |
+
'last_updated': datetime.now().isoformat()
|
| 170 |
+
}
|
| 171 |
+
db.add_product(product_data)
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"Error updating product {barcode}: {str(e)}")
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
return {"message": f"Updated {len(products_to_update)} products"}
|
| 177 |
+
|
| 178 |
+
@app.get("/export")
|
| 179 |
+
async def export_data(admin_user: str = Depends(verify_admin)):
|
| 180 |
+
"""Export database to CSV (admin only)"""
|
| 181 |
+
try:
|
| 182 |
+
export_dir = "data/exports"
|
| 183 |
+
os.makedirs(export_dir, exist_ok=True)
|
| 184 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 185 |
+
export_path = f"{export_dir}/export_{timestamp}"
|
| 186 |
+
db.export_to_csv(export_path)
|
| 187 |
+
return {"message": f"Data exported to {export_path}"}
|
| 188 |
+
except Exception as e:
|
| 189 |
+
raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
|
| 190 |
+
|
| 191 |
+
@app.post("/import")
|
| 192 |
+
async def import_data(file: UploadFile = File(...), admin_user: str = Depends(verify_admin)):
|
| 193 |
+
"""Import data from CSV (admin only)"""
|
| 194 |
+
try:
|
| 195 |
+
contents = await file.read()
|
| 196 |
+
import_dir = "data/imports"
|
| 197 |
+
os.makedirs(import_dir, exist_ok=True)
|
| 198 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 199 |
+
import_path = f"{import_dir}/import_{timestamp}"
|
| 200 |
+
|
| 201 |
+
with open(import_path, 'wb') as f:
|
| 202 |
+
f.write(contents)
|
| 203 |
+
|
| 204 |
+
db.import_from_csv(import_path)
|
| 205 |
+
return {"message": "Data imported successfully"}
|
| 206 |
+
except Exception as e:
|
| 207 |
+
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
import uvicorn
|
| 211 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
init_db.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from models.database import Database
|
| 2 |
+
from passlib.hash import pbkdf2_sha256
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def init_database():
|
| 6 |
+
# Create data directory if it doesn't exist
|
| 7 |
+
os.makedirs("data", exist_ok=True)
|
| 8 |
+
|
| 9 |
+
# Initialize database
|
| 10 |
+
db = Database()
|
| 11 |
+
|
| 12 |
+
# Create admin user
|
| 13 |
+
admin_username = "admin"
|
| 14 |
+
admin_password = "admin123" # Change this in production
|
| 15 |
+
password_hash = pbkdf2_sha256.hash(admin_password)
|
| 16 |
+
|
| 17 |
+
# Add admin user to database
|
| 18 |
+
conn = db._get_connection()
|
| 19 |
+
c = conn.cursor()
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
c.execute("""
|
| 23 |
+
INSERT INTO users (username, password_hash, is_admin)
|
| 24 |
+
VALUES (?, ?, 1)
|
| 25 |
+
""", (admin_username, password_hash))
|
| 26 |
+
conn.commit()
|
| 27 |
+
print(f"Created admin user: {admin_username}")
|
| 28 |
+
print("Default password: admin123 (please change this in production)")
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Error creating admin user: {str(e)}")
|
| 31 |
+
finally:
|
| 32 |
+
conn.close()
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
init_database()
|
models/database.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import json
|
| 3 |
+
from datetime import datetime, timedelta
|
| 4 |
+
from typing import List, Optional, Dict
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
class Database:
|
| 8 |
+
def __init__(self, db_path: str = "data/indiscan.db"):
|
| 9 |
+
self.db_path = db_path
|
| 10 |
+
self.init_db()
|
| 11 |
+
|
| 12 |
+
def init_db(self):
|
| 13 |
+
conn = sqlite3.connect(self.db_path)
|
| 14 |
+
c = conn.cursor()
|
| 15 |
+
|
| 16 |
+
# Create products table
|
| 17 |
+
c.execute('''
|
| 18 |
+
CREATE TABLE IF NOT EXISTS products (
|
| 19 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 20 |
+
barcode TEXT UNIQUE,
|
| 21 |
+
name TEXT,
|
| 22 |
+
brand TEXT,
|
| 23 |
+
category TEXT,
|
| 24 |
+
ingredients TEXT,
|
| 25 |
+
nutrition_info TEXT,
|
| 26 |
+
health_score INTEGER,
|
| 27 |
+
last_updated TIMESTAMP,
|
| 28 |
+
image_url TEXT,
|
| 29 |
+
product_type TEXT,
|
| 30 |
+
added_by TEXT,
|
| 31 |
+
is_verified BOOLEAN DEFAULT 0
|
| 32 |
+
)
|
| 33 |
+
''')
|
| 34 |
+
|
| 35 |
+
# Create ingredients table
|
| 36 |
+
c.execute('''
|
| 37 |
+
CREATE TABLE IF NOT EXISTS ingredients (
|
| 38 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 39 |
+
name TEXT UNIQUE,
|
| 40 |
+
risk_score INTEGER,
|
| 41 |
+
description TEXT,
|
| 42 |
+
category TEXT,
|
| 43 |
+
concerns TEXT
|
| 44 |
+
)
|
| 45 |
+
''')
|
| 46 |
+
|
| 47 |
+
# Create users table for admin control
|
| 48 |
+
c.execute('''
|
| 49 |
+
CREATE TABLE IF NOT EXISTS users (
|
| 50 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 51 |
+
username TEXT UNIQUE,
|
| 52 |
+
password_hash TEXT,
|
| 53 |
+
is_admin BOOLEAN DEFAULT 0
|
| 54 |
+
)
|
| 55 |
+
''')
|
| 56 |
+
|
| 57 |
+
# Create price tracking table
|
| 58 |
+
c.execute('''
|
| 59 |
+
CREATE TABLE IF NOT EXISTS price_tracking (
|
| 60 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 61 |
+
product_id INTEGER,
|
| 62 |
+
platform TEXT,
|
| 63 |
+
price REAL,
|
| 64 |
+
timestamp TIMESTAMP,
|
| 65 |
+
url TEXT,
|
| 66 |
+
FOREIGN KEY (product_id) REFERENCES products (id)
|
| 67 |
+
)
|
| 68 |
+
''')
|
| 69 |
+
|
| 70 |
+
conn.commit()
|
| 71 |
+
conn.close()
|
| 72 |
+
|
| 73 |
+
def add_product(self, product_data: Dict) -> int:
|
| 74 |
+
conn = sqlite3.connect(self.db_path)
|
| 75 |
+
c = conn.cursor()
|
| 76 |
+
|
| 77 |
+
product_data['last_updated'] = datetime.now().isoformat()
|
| 78 |
+
if 'ingredients' in product_data and isinstance(product_data['ingredients'], list):
|
| 79 |
+
product_data['ingredients'] = json.dumps(product_data['ingredients'])
|
| 80 |
+
if 'nutrition_info' in product_data and isinstance(product_data['nutrition_info'], dict):
|
| 81 |
+
product_data['nutrition_info'] = json.dumps(product_data['nutrition_info'])
|
| 82 |
+
|
| 83 |
+
columns = ', '.join(product_data.keys())
|
| 84 |
+
placeholders = ', '.join(['?' for _ in product_data])
|
| 85 |
+
values = tuple(product_data.values())
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
c.execute(f"INSERT INTO products ({columns}) VALUES ({placeholders})", values)
|
| 89 |
+
product_id = c.lastrowid
|
| 90 |
+
conn.commit()
|
| 91 |
+
return product_id
|
| 92 |
+
except sqlite3.IntegrityError:
|
| 93 |
+
# Update existing product
|
| 94 |
+
update_cols = ', '.join([f"{k}=?" for k in product_data.keys()])
|
| 95 |
+
c.execute(f"UPDATE products SET {update_cols} WHERE barcode=?",
|
| 96 |
+
(*values, product_data['barcode']))
|
| 97 |
+
conn.commit()
|
| 98 |
+
return c.lastrowid
|
| 99 |
+
finally:
|
| 100 |
+
conn.close()
|
| 101 |
+
|
| 102 |
+
def get_product(self, barcode: str) -> Optional[Dict]:
|
| 103 |
+
conn = sqlite3.connect(self.db_path)
|
| 104 |
+
c = conn.cursor()
|
| 105 |
+
|
| 106 |
+
c.execute("SELECT * FROM products WHERE barcode=?", (barcode,))
|
| 107 |
+
result = c.fetchone()
|
| 108 |
+
|
| 109 |
+
if result:
|
| 110 |
+
columns = [description[0] for description in c.description]
|
| 111 |
+
product = dict(zip(columns, result))
|
| 112 |
+
|
| 113 |
+
# Parse JSON strings back to Python objects
|
| 114 |
+
if product['ingredients']:
|
| 115 |
+
product['ingredients'] = json.loads(product['ingredients'])
|
| 116 |
+
if product['nutrition_info']:
|
| 117 |
+
product['nutrition_info'] = json.loads(product['nutrition_info'])
|
| 118 |
+
|
| 119 |
+
conn.close()
|
| 120 |
+
return product
|
| 121 |
+
|
| 122 |
+
conn.close()
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
def update_prices(self, product_id: int, prices: List[Dict]):
|
| 126 |
+
conn = sqlite3.connect(self.db_path)
|
| 127 |
+
c = conn.cursor()
|
| 128 |
+
|
| 129 |
+
timestamp = datetime.now().isoformat()
|
| 130 |
+
|
| 131 |
+
for price_data in prices:
|
| 132 |
+
c.execute("""
|
| 133 |
+
INSERT INTO price_tracking (product_id, platform, price, timestamp, url)
|
| 134 |
+
VALUES (?, ?, ?, ?, ?)
|
| 135 |
+
""", (product_id, price_data['platform'], price_data['price'], timestamp, price_data['url']))
|
| 136 |
+
|
| 137 |
+
conn.commit()
|
| 138 |
+
conn.close()
|
| 139 |
+
|
| 140 |
+
def get_products_for_update(self) -> List[str]:
|
| 141 |
+
"""Get products that haven't been updated in 60 days"""
|
| 142 |
+
conn = sqlite3.connect(self.db_path)
|
| 143 |
+
c = conn.cursor()
|
| 144 |
+
|
| 145 |
+
sixty_days_ago = (datetime.now() - timedelta(days=60)).isoformat()
|
| 146 |
+
|
| 147 |
+
c.execute("""
|
| 148 |
+
SELECT barcode FROM products
|
| 149 |
+
WHERE last_updated < ? OR last_updated IS NULL
|
| 150 |
+
""", (sixty_days_ago,))
|
| 151 |
+
|
| 152 |
+
barcodes = [row[0] for row in c.fetchall()]
|
| 153 |
+
conn.close()
|
| 154 |
+
return barcodes
|
| 155 |
+
|
| 156 |
+
def export_to_csv(self, filepath: str):
|
| 157 |
+
"""Export the database to CSV files"""
|
| 158 |
+
conn = sqlite3.connect(self.db_path)
|
| 159 |
+
|
| 160 |
+
# Export products
|
| 161 |
+
pd.read_sql_query("SELECT * FROM products", conn).to_csv(f"{filepath}/products.csv", index=False)
|
| 162 |
+
|
| 163 |
+
# Export ingredients
|
| 164 |
+
pd.read_sql_query("SELECT * FROM ingredients", conn).to_csv(f"{filepath}/ingredients.csv", index=False)
|
| 165 |
+
|
| 166 |
+
# Export price tracking
|
| 167 |
+
pd.read_sql_query("SELECT * FROM price_tracking", conn).to_csv(f"{filepath}/price_tracking.csv", index=False)
|
| 168 |
+
|
| 169 |
+
conn.close()
|
| 170 |
+
|
| 171 |
+
def import_from_csv(self, filepath: str):
|
| 172 |
+
"""Import data from CSV files"""
|
| 173 |
+
conn = sqlite3.connect(self.db_path)
|
| 174 |
+
|
| 175 |
+
# Import products
|
| 176 |
+
products_df = pd.read_csv(f"{filepath}/products.csv")
|
| 177 |
+
products_df.to_sql('products', conn, if_exists='append', index=False)
|
| 178 |
+
|
| 179 |
+
# Import ingredients
|
| 180 |
+
ingredients_df = pd.read_csv(f"{filepath}/ingredients.csv")
|
| 181 |
+
ingredients_df.to_sql('ingredients', conn, if_exists='append', index=False)
|
| 182 |
+
|
| 183 |
+
# Import price tracking
|
| 184 |
+
price_df = pd.read_csv(f"{filepath}/price_tracking.csv")
|
| 185 |
+
price_df.to_sql('price_tracking', conn, if_exists='append', index=False)
|
| 186 |
+
|
| 187 |
+
conn.commit()
|
| 188 |
+
conn.close()
|
| 189 |
+
|
| 190 |
+
def verify_admin(self, username: str, password_hash: str) -> bool:
|
| 191 |
+
conn = sqlite3.connect(self.db_path)
|
| 192 |
+
c = conn.cursor()
|
| 193 |
+
|
| 194 |
+
c.execute("SELECT is_admin FROM users WHERE username=? AND password_hash=?",
|
| 195 |
+
(username, password_hash))
|
| 196 |
+
result = c.fetchone()
|
| 197 |
+
|
| 198 |
+
conn.close()
|
| 199 |
+
return bool(result and result[0])
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn==0.27.0
|
| 3 |
+
streamlit==1.31.0
|
| 4 |
+
pandas==2.2.0
|
| 5 |
+
plotly==5.18.0
|
| 6 |
+
pillow==10.2.0
|
| 7 |
+
easyocr==1.7.1
|
| 8 |
+
beautifulsoup4==4.12.3
|
| 9 |
+
aiohttp==3.9.1
|
| 10 |
+
python-multipart==0.0.6
|
| 11 |
+
python-jose==3.3.0
|
| 12 |
+
passlib==1.7.4
|
| 13 |
+
python-dotenv==1.0.0
|
| 14 |
+
requests==2.31.0
|
| 15 |
+
numpy==1.26.3
|
utils/health_score.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Optional
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
class HealthScoreCalculator:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
# Initialize risk categories and their weights
|
| 8 |
+
self.risk_categories = {
|
| 9 |
+
'preservatives': {
|
| 10 |
+
'weight': 2.0,
|
| 11 |
+
'ingredients': [
|
| 12 |
+
'sodium benzoate', 'potassium sorbate', 'sulfites', 'nitrites',
|
| 13 |
+
'BHA', 'BHT', 'propylene glycol', 'sodium nitrate'
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
'artificial_colors': {
|
| 17 |
+
'weight': 1.5,
|
| 18 |
+
'ingredients': [
|
| 19 |
+
'red 40', 'yellow 5', 'yellow 6', 'blue 1', 'blue 2',
|
| 20 |
+
'green 3', 'tartrazine', 'brilliant blue'
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
'artificial_sweeteners': {
|
| 24 |
+
'weight': 1.8,
|
| 25 |
+
'ingredients': [
|
| 26 |
+
'aspartame', 'sucralose', 'saccharin', 'acesulfame k',
|
| 27 |
+
'neotame', 'advantame'
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
'harmful_chemicals': {
|
| 31 |
+
'weight': 2.5,
|
| 32 |
+
'ingredients': [
|
| 33 |
+
'parabens', 'phthalates', 'formaldehyde', 'toluene',
|
| 34 |
+
'triclosan', 'lead acetate', 'petroleum'
|
| 35 |
+
]
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
# Cosmetic-specific categories
|
| 40 |
+
self.cosmetic_categories = {
|
| 41 |
+
'irritants': {
|
| 42 |
+
'weight': 1.7,
|
| 43 |
+
'ingredients': [
|
| 44 |
+
'sodium lauryl sulfate', 'alcohol denat', 'isopropyl alcohol',
|
| 45 |
+
'fragrance', 'essential oils'
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
'comedogenic': {
|
| 49 |
+
'weight': 1.3,
|
| 50 |
+
'ingredients': [
|
| 51 |
+
'coconut oil', 'cocoa butter', 'isopropyl myristate',
|
| 52 |
+
'sodium chloride', 'laureth-4'
|
| 53 |
+
]
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Food-specific categories
|
| 58 |
+
self.food_categories = {
|
| 59 |
+
'trans_fats': {
|
| 60 |
+
'weight': 2.2,
|
| 61 |
+
'ingredients': [
|
| 62 |
+
'partially hydrogenated', 'hydrogenated oil',
|
| 63 |
+
'shortening', 'margarine'
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
'added_sugars': {
|
| 67 |
+
'weight': 1.6,
|
| 68 |
+
'ingredients': [
|
| 69 |
+
'high fructose corn syrup', 'corn syrup', 'dextrose',
|
| 70 |
+
'maltose', 'sucrose', 'cane sugar', 'brown sugar'
|
| 71 |
+
]
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
def calculate_ingredient_position_weight(self, position: int, total_ingredients: int) -> float:
|
| 76 |
+
"""Calculate weight based on ingredient position (earlier ingredients have higher weight)"""
|
| 77 |
+
if total_ingredients == 0:
|
| 78 |
+
return 1.0
|
| 79 |
+
return 1.0 + (1.0 - (position / total_ingredients))
|
| 80 |
+
|
| 81 |
+
def identify_risks(self, ingredient: str) -> List[str]:
|
| 82 |
+
"""Identify all risk categories an ingredient belongs to"""
|
| 83 |
+
risks = []
|
| 84 |
+
ingredient = ingredient.lower()
|
| 85 |
+
|
| 86 |
+
# Check all categories
|
| 87 |
+
all_categories = {
|
| 88 |
+
**self.risk_categories,
|
| 89 |
+
**self.cosmetic_categories,
|
| 90 |
+
**self.food_categories
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
for category, data in all_categories.items():
|
| 94 |
+
if any(risk_ingredient in ingredient for risk_ingredient in data['ingredients']):
|
| 95 |
+
risks.append(category)
|
| 96 |
+
|
| 97 |
+
return risks
|
| 98 |
+
|
| 99 |
+
def calculate_score(self, ingredients: List[str], product_type: str = 'food') -> Dict:
|
| 100 |
+
"""Calculate health score and provide risk breakdown"""
|
| 101 |
+
if not ingredients:
|
| 102 |
+
return {
|
| 103 |
+
'score': 500, # Neutral score if no ingredients
|
| 104 |
+
'risks': {},
|
| 105 |
+
'explanation': "No ingredients provided for analysis"
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
total_ingredients = len(ingredients)
|
| 109 |
+
risk_points = 0
|
| 110 |
+
risk_breakdown = {}
|
| 111 |
+
|
| 112 |
+
# Select relevant categories based on product type
|
| 113 |
+
categories = {**self.risk_categories}
|
| 114 |
+
if product_type.lower() == 'food':
|
| 115 |
+
categories.update(self.food_categories)
|
| 116 |
+
else:
|
| 117 |
+
categories.update(self.cosmetic_categories)
|
| 118 |
+
|
| 119 |
+
# Analyze each ingredient
|
| 120 |
+
for position, ingredient in enumerate(ingredients):
|
| 121 |
+
position_weight = self.calculate_ingredient_position_weight(position, total_ingredients)
|
| 122 |
+
risks = self.identify_risks(ingredient)
|
| 123 |
+
|
| 124 |
+
for risk in risks:
|
| 125 |
+
if risk in categories:
|
| 126 |
+
risk_weight = categories[risk]['weight']
|
| 127 |
+
risk_value = risk_weight * position_weight
|
| 128 |
+
risk_points += risk_value
|
| 129 |
+
|
| 130 |
+
if risk not in risk_breakdown:
|
| 131 |
+
risk_breakdown[risk] = {
|
| 132 |
+
'ingredients': [],
|
| 133 |
+
'total_risk': 0
|
| 134 |
+
}
|
| 135 |
+
risk_breakdown[risk]['ingredients'].append(ingredient)
|
| 136 |
+
risk_breakdown[risk]['total_risk'] += risk_value
|
| 137 |
+
|
| 138 |
+
# Calculate final score (1000 = perfectly healthy, 0 = maximum risk)
|
| 139 |
+
base_score = 1000
|
| 140 |
+
risk_multiplier = 100 # Adjust this to control how quickly score decreases
|
| 141 |
+
final_score = max(0, min(1000, base_score - (risk_points * risk_multiplier)))
|
| 142 |
+
|
| 143 |
+
# Generate explanation
|
| 144 |
+
explanation = self._generate_explanation(risk_breakdown, final_score)
|
| 145 |
+
|
| 146 |
+
return {
|
| 147 |
+
'score': int(final_score),
|
| 148 |
+
'risks': risk_breakdown,
|
| 149 |
+
'explanation': explanation
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
def _generate_explanation(self, risk_breakdown: Dict, score: float) -> str:
|
| 153 |
+
"""Generate a human-readable explanation of the health score"""
|
| 154 |
+
if score > 800:
|
| 155 |
+
status = "very healthy"
|
| 156 |
+
elif score > 600:
|
| 157 |
+
status = "moderately healthy"
|
| 158 |
+
elif score > 400:
|
| 159 |
+
status = "moderate risk"
|
| 160 |
+
elif score > 200:
|
| 161 |
+
status = "high risk"
|
| 162 |
+
else:
|
| 163 |
+
status = "very high risk"
|
| 164 |
+
|
| 165 |
+
explanation = f"This product is considered {status} with a score of {int(score)}."
|
| 166 |
+
|
| 167 |
+
if risk_breakdown:
|
| 168 |
+
explanation += "\n\nKey concerns:"
|
| 169 |
+
for risk, data in risk_breakdown.items():
|
| 170 |
+
risk_name = risk.replace('_', ' ').title()
|
| 171 |
+
ingredients = ', '.join(data['ingredients'])
|
| 172 |
+
explanation += f"\n- {risk_name}: Found {len(data['ingredients'])} concerning ingredient(s): {ingredients}"
|
| 173 |
+
else:
|
| 174 |
+
explanation += "\n\nNo specific risk factors identified in the ingredients list."
|
| 175 |
+
|
| 176 |
+
return explanation
|
| 177 |
+
|
| 178 |
+
def analyze_nutrition(self, nutrition_info: Dict) -> Dict:
|
| 179 |
+
"""Analyze nutrition information and provide recommendations"""
|
| 180 |
+
analysis = {
|
| 181 |
+
'concerns': [],
|
| 182 |
+
'positives': [],
|
| 183 |
+
'recommendations': []
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
# Check calories
|
| 187 |
+
if 'calories' in nutrition_info:
|
| 188 |
+
if nutrition_info['calories'] > 400:
|
| 189 |
+
analysis['concerns'].append("High calorie content")
|
| 190 |
+
elif nutrition_info['calories'] < 50:
|
| 191 |
+
analysis['positives'].append("Low calorie content")
|
| 192 |
+
|
| 193 |
+
# Check protein
|
| 194 |
+
if 'protein' in nutrition_info:
|
| 195 |
+
if nutrition_info['protein'] > 15:
|
| 196 |
+
analysis['positives'].append("Good source of protein")
|
| 197 |
+
elif nutrition_info['protein'] < 5:
|
| 198 |
+
analysis['recommendations'].append("Consider options with more protein")
|
| 199 |
+
|
| 200 |
+
# Check fat
|
| 201 |
+
if 'fat' in nutrition_info:
|
| 202 |
+
if nutrition_info['fat'] > 15:
|
| 203 |
+
analysis['concerns'].append("High fat content")
|
| 204 |
+
|
| 205 |
+
# Check sugar
|
| 206 |
+
if 'sugar' in nutrition_info:
|
| 207 |
+
if nutrition_info['sugar'] > 10:
|
| 208 |
+
analysis['concerns'].append("High sugar content")
|
| 209 |
+
analysis['recommendations'].append("Look for options with less sugar")
|
| 210 |
+
|
| 211 |
+
# Check fiber
|
| 212 |
+
if 'fiber' in nutrition_info:
|
| 213 |
+
if nutrition_info['fiber'] > 5:
|
| 214 |
+
analysis['positives'].append("Good source of fiber")
|
| 215 |
+
elif nutrition_info['fiber'] < 2:
|
| 216 |
+
analysis['recommendations'].append("Consider options with more fiber")
|
| 217 |
+
|
| 218 |
+
return analysis
|
utils/scraper.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import aiohttp
|
| 2 |
+
import asyncio
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, List, Optional
|
| 6 |
+
import re
|
| 7 |
+
from urllib.parse import quote_plus
|
| 8 |
+
|
| 9 |
+
class Scraper:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.headers = {
|
| 12 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 13 |
+
}
|
| 14 |
+
self.session = None
|
| 15 |
+
|
| 16 |
+
async def __aenter__(self):
|
| 17 |
+
self.session = aiohttp.ClientSession(headers=self.headers)
|
| 18 |
+
return self
|
| 19 |
+
|
| 20 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 21 |
+
if self.session:
|
| 22 |
+
await self.session.close()
|
| 23 |
+
|
| 24 |
+
async def search_amazon(self, query: str) -> List[Dict]:
|
| 25 |
+
"""Search Amazon India for products"""
|
| 26 |
+
url = f"https://www.amazon.in/s?k={quote_plus(query)}"
|
| 27 |
+
async with self.session.get(url) as response:
|
| 28 |
+
if response.status == 200:
|
| 29 |
+
html = await response.text()
|
| 30 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 31 |
+
products = []
|
| 32 |
+
|
| 33 |
+
for item in soup.select('.s-result-item[data-asin]'):
|
| 34 |
+
try:
|
| 35 |
+
title = item.select_one('.a-text-normal')
|
| 36 |
+
price = item.select_one('.a-price-whole')
|
| 37 |
+
url = item.select_one('a.a-link-normal')
|
| 38 |
+
|
| 39 |
+
if title and price and url:
|
| 40 |
+
products.append({
|
| 41 |
+
'platform': 'amazon',
|
| 42 |
+
'title': title.text.strip(),
|
| 43 |
+
'price': float(price.text.replace(',', '')),
|
| 44 |
+
'url': 'https://www.amazon.in' + url['href']
|
| 45 |
+
})
|
| 46 |
+
except Exception:
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
return products[:5] # Return top 5 results
|
| 50 |
+
return []
|
| 51 |
+
|
| 52 |
+
async def search_blinkit(self, query: str) -> List[Dict]:
|
| 53 |
+
"""Search Blinkit for products"""
|
| 54 |
+
url = f"https://blinkit.com/v2/search?q={quote_plus(query)}"
|
| 55 |
+
async with self.session.get(url) as response:
|
| 56 |
+
if response.status == 200:
|
| 57 |
+
try:
|
| 58 |
+
data = await response.json()
|
| 59 |
+
products = []
|
| 60 |
+
|
| 61 |
+
for item in data.get('products', [])[:5]:
|
| 62 |
+
products.append({
|
| 63 |
+
'platform': 'blinkit',
|
| 64 |
+
'title': item.get('name', ''),
|
| 65 |
+
'price': float(item.get('price', 0)),
|
| 66 |
+
'url': f"https://blinkit.com/products/{item.get('slug', '')}"
|
| 67 |
+
})
|
| 68 |
+
|
| 69 |
+
return products
|
| 70 |
+
except Exception:
|
| 71 |
+
return []
|
| 72 |
+
return []
|
| 73 |
+
|
| 74 |
+
async def search_zepto(self, query: str) -> List[Dict]:
|
| 75 |
+
"""Search Zepto for products"""
|
| 76 |
+
url = f"https://www.zeptonow.com/api/search?q={quote_plus(query)}"
|
| 77 |
+
async with self.session.get(url) as response:
|
| 78 |
+
if response.status == 200:
|
| 79 |
+
try:
|
| 80 |
+
data = await response.json()
|
| 81 |
+
products = []
|
| 82 |
+
|
| 83 |
+
for item in data.get('products', [])[:5]:
|
| 84 |
+
products.append({
|
| 85 |
+
'platform': 'zepto',
|
| 86 |
+
'title': item.get('name', ''),
|
| 87 |
+
'price': float(item.get('mrp', 0)),
|
| 88 |
+
'url': f"https://www.zeptonow.com/product/{item.get('slug', '')}"
|
| 89 |
+
})
|
| 90 |
+
|
| 91 |
+
return products
|
| 92 |
+
except Exception:
|
| 93 |
+
return []
|
| 94 |
+
return []
|
| 95 |
+
|
| 96 |
+
async def search_swiggy_instamart(self, query: str) -> List[Dict]:
|
| 97 |
+
"""Search Swiggy Instamart for products"""
|
| 98 |
+
url = f"https://www.swiggy.com/api/instamart/search?q={quote_plus(query)}"
|
| 99 |
+
async with self.session.get(url) as response:
|
| 100 |
+
if response.status == 200:
|
| 101 |
+
try:
|
| 102 |
+
data = await response.json()
|
| 103 |
+
products = []
|
| 104 |
+
|
| 105 |
+
for item in data.get('data', {}).get('products', [])[:5]:
|
| 106 |
+
products.append({
|
| 107 |
+
'platform': 'swiggy_instamart',
|
| 108 |
+
'title': item.get('name', ''),
|
| 109 |
+
'price': float(item.get('price', 0)),
|
| 110 |
+
'url': f"https://www.swiggy.com/instamart/product/{item.get('id', '')}"
|
| 111 |
+
})
|
| 112 |
+
|
| 113 |
+
return products
|
| 114 |
+
except Exception:
|
| 115 |
+
return []
|
| 116 |
+
return []
|
| 117 |
+
|
| 118 |
+
def extract_ingredients(self, text: str) -> List[str]:
|
| 119 |
+
"""Extract ingredients from product description text"""
|
| 120 |
+
# Common ingredient list markers
|
| 121 |
+
markers = [
|
| 122 |
+
r"ingredients?[:|\s]+(.*?)(?=\.|$)",
|
| 123 |
+
r"contains?[:|\s]+(.*?)(?=\.|$)",
|
| 124 |
+
r"composition?[:|\s]+(.*?)(?=\.|$)"
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
for marker in markers:
|
| 128 |
+
match = re.search(marker, text, re.IGNORECASE)
|
| 129 |
+
if match:
|
| 130 |
+
ingredients_text = match.group(1)
|
| 131 |
+
# Split by common separators
|
| 132 |
+
ingredients = re.split(r'[,;]|\sand\s', ingredients_text)
|
| 133 |
+
# Clean up each ingredient
|
| 134 |
+
return [ing.strip() for ing in ingredients if ing.strip()]
|
| 135 |
+
|
| 136 |
+
return []
|
| 137 |
+
|
| 138 |
+
def extract_nutrition_info(self, text: str) -> Dict:
|
| 139 |
+
"""Extract nutrition information from product description text"""
|
| 140 |
+
nutrition_info = {}
|
| 141 |
+
|
| 142 |
+
# Common nutrition patterns
|
| 143 |
+
patterns = {
|
| 144 |
+
'calories': r'(\d+)\s*(?:kcal|calories)',
|
| 145 |
+
'protein': r'protein\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
|
| 146 |
+
'carbohydrates': r'carbohydrates?\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
|
| 147 |
+
'fat': r'fat\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
|
| 148 |
+
'sugar': r'sugar\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
|
| 149 |
+
'fiber': r'fiber\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g'
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
for nutrient, pattern in patterns.items():
|
| 153 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 154 |
+
if match:
|
| 155 |
+
try:
|
| 156 |
+
nutrition_info[nutrient] = float(match.group(1))
|
| 157 |
+
except ValueError:
|
| 158 |
+
continue
|
| 159 |
+
|
| 160 |
+
return nutrition_info
|
| 161 |
+
|
| 162 |
+
async def get_all_prices(self, query: str) -> List[Dict]:
|
| 163 |
+
"""Get prices from all supported platforms"""
|
| 164 |
+
tasks = [
|
| 165 |
+
self.search_amazon(query),
|
| 166 |
+
self.search_blinkit(query),
|
| 167 |
+
self.search_zepto(query),
|
| 168 |
+
self.search_swiggy_instamart(query)
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 172 |
+
all_prices = []
|
| 173 |
+
|
| 174 |
+
for result in results:
|
| 175 |
+
if isinstance(result, list):
|
| 176 |
+
all_prices.extend(result)
|
| 177 |
+
|
| 178 |
+
return all_prices
|