Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- main.py +54 -0
- requirements.txt +8 -1
main.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import asyncio
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import os
|
| 5 |
+
from src.scraping.scraper import AmazonScraper
|
| 6 |
+
from src.analysis.data_processor import DataProcessor
|
| 7 |
+
from src.analysis.nlp_models import NLPModels
|
| 8 |
+
from src.analysis.scoring_engine import ScoringEngine
|
| 9 |
+
from src.dashboard.app import demo # Import the Gradio app
|
| 10 |
+
|
| 11 |
+
async def run_analysis(url):
|
| 12 |
+
# Ensure data directory exists
|
| 13 |
+
os.makedirs("data", exist_ok=True)
|
| 14 |
+
|
| 15 |
+
print("Starting scraping...")
|
| 16 |
+
scraper = AmazonScraper()
|
| 17 |
+
products_data = await scraper.scrape_product_listing(url, output_dir="data")
|
| 18 |
+
|
| 19 |
+
if not products_data:
|
| 20 |
+
print("No products scraped. Exiting.")
|
| 21 |
+
return
|
| 22 |
+
|
| 23 |
+
print("Processing data...")
|
| 24 |
+
processor = DataProcessor()
|
| 25 |
+
df = processor.load_raw_data("data/product_data.json")
|
| 26 |
+
processed_df = processor.clean_and_structure(df)
|
| 27 |
+
processor.save_processed_data(processed_df, "data/processed_product_data.csv")
|
| 28 |
+
|
| 29 |
+
print("Applying NLP analysis and scoring...")
|
| 30 |
+
nlp_models = NLPModels()
|
| 31 |
+
scoring_engine = ScoringEngine(nlp_models)
|
| 32 |
+
|
| 33 |
+
# Apply NLP to each product description/title for sentiment, keyphrases, etc.
|
| 34 |
+
# This is a simplified example; in a real app, you'd iterate through products
|
| 35 |
+
# and apply NLP models more thoroughly.
|
| 36 |
+
processed_df["sentiment_score"] = processed_df["title"].apply(lambda x: nlp_models.get_sentiment(x)["score"])
|
| 37 |
+
processed_df["keyphrases"] = processed_df["title"].apply(lambda x: nlp_models.extract_keyphrases(x))
|
| 38 |
+
|
| 39 |
+
# Calculate virality score (using dummy values for now, as actual NLP-derived metrics are complex)
|
| 40 |
+
scored_df = scoring_engine.calculate_virality_score(processed_df.copy())
|
| 41 |
+
|
| 42 |
+
# Save the final scored data
|
| 43 |
+
processor.save_processed_data(scored_df, "data/final_product_insights.csv")
|
| 44 |
+
print("Analysis complete. Data saved to data/final_product_insights.csv")
|
| 45 |
+
|
| 46 |
+
print("Launching dashboard...")
|
| 47 |
+
# The Gradio app will load data from data/processed_product_data.csv or data/final_product_insights.csv
|
| 48 |
+
# For this example, we'll just launch the demo directly.
|
| 49 |
+
demo.launch()
|
| 50 |
+
|
| 51 |
+
if __name__ == "__main__":
|
| 52 |
+
# Example Amazon URL (replace with a dynamic input in a real application)
|
| 53 |
+
amazon_url = "https://www.amazon.com/s?bbn=12035955011&i=fashion-novelty&oq=Solid%20colors%3A%20100%25%2BCotton%3B%20Heather%20Grey%3A%2090%25%2BCotton%2C%2010%25%2BPolyester%3B%20All%20Other%20Heathers%3A%2050%25%2BCotton%2C%2050%25%2BPolyester%20Lightweight%2C%20Classic%20fit%2C%20Double-needle%20sleeve%20and%20bottom%20hem%20Machine%20wash%20cold%20with%20like%20colors%2C%20dry%20low%20heat%20-long%20-premium%20-sweatshirt%20-v-neck%20-tank%2010%20x%208%20x%201%20inches%3B%204.8%20Ounces&qid=1699392328&ref=glow_cls&refresh=1&rh=p_6%3AATVPDKIKX0DER&s=date-desc-rank"
|
| 54 |
+
asyncio.run(run_analysis(amazon_url))
|
requirements.txt
CHANGED
|
@@ -1 +1,8 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
playwright
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
transformers
|
| 6 |
+
torch
|
| 7 |
+
gradio
|
| 8 |
+
beautifulsoup4
|