Spaces:

briskwave
/

5abaya

Runtime error

App Files Files Community

briskwave commited on Mar 4

Commit

85873eb

verified ·

1 Parent(s): 7c60505

Upload 2 files

Browse files

Files changed (2) hide show

main.py +54 -0
requirements.txt +8 -1

main.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import asyncio
+import pandas as pd
+import os
+from src.scraping.scraper import AmazonScraper
+from src.analysis.data_processor import DataProcessor
+from src.analysis.nlp_models import NLPModels
+from src.analysis.scoring_engine import ScoringEngine
+from src.dashboard.app import demo # Import the Gradio app
+async def run_analysis(url):
+    # Ensure data directory exists
+    os.makedirs("data", exist_ok=True)
+    print("Starting scraping...")
+    scraper = AmazonScraper()
+    products_data = await scraper.scrape_product_listing(url, output_dir="data")
+    if not products_data:
+        print("No products scraped. Exiting.")
+        return
+    print("Processing data...")
+    processor = DataProcessor()
+    df = processor.load_raw_data("data/product_data.json")
+    processed_df = processor.clean_and_structure(df)
+    processor.save_processed_data(processed_df, "data/processed_product_data.csv")
+    print("Applying NLP analysis and scoring...")
+    nlp_models = NLPModels()
+    scoring_engine = ScoringEngine(nlp_models)
+    # Apply NLP to each product description/title for sentiment, keyphrases, etc.
+    # This is a simplified example; in a real app, you'd iterate through products
+    # and apply NLP models more thoroughly.
+    processed_df["sentiment_score"] = processed_df["title"].apply(lambda x: nlp_models.get_sentiment(x)["score"])
+    processed_df["keyphrases"] = processed_df["title"].apply(lambda x: nlp_models.extract_keyphrases(x))
+    # Calculate virality score (using dummy values for now, as actual NLP-derived metrics are complex)
+    scored_df = scoring_engine.calculate_virality_score(processed_df.copy())
+    # Save the final scored data
+    processor.save_processed_data(scored_df, "data/final_product_insights.csv")
+    print("Analysis complete. Data saved to data/final_product_insights.csv")
+    print("Launching dashboard...")
+    # The Gradio app will load data from data/processed_product_data.csv or data/final_product_insights.csv
+    # For this example, we'll just launch the demo directly.
+    demo.launch()
+if __name__ == "__main__":
+    # Example Amazon URL (replace with a dynamic input in a real application)
+    amazon_url = "https://www.amazon.com/s?bbn=12035955011&i=fashion-novelty&oq=Solid%20colors%3A%20100%25%2BCotton%3B%20Heather%20Grey%3A%2090%25%2BCotton%2C%2010%25%2BPolyester%3B%20All%20Other%20Heathers%3A%2050%25%2BCotton%2C%2050%25%2BPolyester%20Lightweight%2C%20Classic%20fit%2C%20Double-needle%20sleeve%20and%20bottom%20hem%20Machine%20wash%20cold%20with%20like%20colors%2C%20dry%20low%20heat%20-long%20-premium%20-sweatshirt%20-v-neck%20-tank%2010%20x%208%20x%201%20inches%3B%204.8%20Ounces&qid=1699392328&ref=glow_cls&refresh=1&rh=p_6%3AATVPDKIKX0DER&s=date-desc-rank"
+    asyncio.run(run_analysis(amazon_url))

requirements.txt CHANGED Viewed

	@@ -1 +1,8 @@
1	- ~~trackio<1.0~~

+playwright
+pandas
+numpy
+scikit-learn
+transformers
+torch
+gradio
+beautifulsoup4