briskwave commited on
Commit
85873eb
·
verified ·
1 Parent(s): 7c60505

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +54 -0
  2. requirements.txt +8 -1
main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import asyncio
3
+ import pandas as pd
4
+ import os
5
+ from src.scraping.scraper import AmazonScraper
6
+ from src.analysis.data_processor import DataProcessor
7
+ from src.analysis.nlp_models import NLPModels
8
+ from src.analysis.scoring_engine import ScoringEngine
9
+ from src.dashboard.app import demo # Import the Gradio app
10
+
11
+ async def run_analysis(url):
12
+ # Ensure data directory exists
13
+ os.makedirs("data", exist_ok=True)
14
+
15
+ print("Starting scraping...")
16
+ scraper = AmazonScraper()
17
+ products_data = await scraper.scrape_product_listing(url, output_dir="data")
18
+
19
+ if not products_data:
20
+ print("No products scraped. Exiting.")
21
+ return
22
+
23
+ print("Processing data...")
24
+ processor = DataProcessor()
25
+ df = processor.load_raw_data("data/product_data.json")
26
+ processed_df = processor.clean_and_structure(df)
27
+ processor.save_processed_data(processed_df, "data/processed_product_data.csv")
28
+
29
+ print("Applying NLP analysis and scoring...")
30
+ nlp_models = NLPModels()
31
+ scoring_engine = ScoringEngine(nlp_models)
32
+
33
+ # Apply NLP to each product description/title for sentiment, keyphrases, etc.
34
+ # This is a simplified example; in a real app, you'd iterate through products
35
+ # and apply NLP models more thoroughly.
36
+ processed_df["sentiment_score"] = processed_df["title"].apply(lambda x: nlp_models.get_sentiment(x)["score"])
37
+ processed_df["keyphrases"] = processed_df["title"].apply(lambda x: nlp_models.extract_keyphrases(x))
38
+
39
+ # Calculate virality score (using dummy values for now, as actual NLP-derived metrics are complex)
40
+ scored_df = scoring_engine.calculate_virality_score(processed_df.copy())
41
+
42
+ # Save the final scored data
43
+ processor.save_processed_data(scored_df, "data/final_product_insights.csv")
44
+ print("Analysis complete. Data saved to data/final_product_insights.csv")
45
+
46
+ print("Launching dashboard...")
47
+ # The Gradio app will load data from data/processed_product_data.csv or data/final_product_insights.csv
48
+ # For this example, we'll just launch the demo directly.
49
+ demo.launch()
50
+
51
+ if __name__ == "__main__":
52
+ # Example Amazon URL (replace with a dynamic input in a real application)
53
+ amazon_url = "https://www.amazon.com/s?bbn=12035955011&i=fashion-novelty&oq=Solid%20colors%3A%20100%25%2BCotton%3B%20Heather%20Grey%3A%2090%25%2BCotton%2C%2010%25%2BPolyester%3B%20All%20Other%20Heathers%3A%2050%25%2BCotton%2C%2050%25%2BPolyester%20Lightweight%2C%20Classic%20fit%2C%20Double-needle%20sleeve%20and%20bottom%20hem%20Machine%20wash%20cold%20with%20like%20colors%2C%20dry%20low%20heat%20-long%20-premium%20-sweatshirt%20-v-neck%20-tank%2010%20x%208%20x%201%20inches%3B%204.8%20Ounces&qid=1699392328&ref=glow_cls&refresh=1&rh=p_6%3AATVPDKIKX0DER&s=date-desc-rank"
54
+ asyncio.run(run_analysis(amazon_url))
requirements.txt CHANGED
@@ -1 +1,8 @@
1
- trackio<1.0
 
 
 
 
 
 
 
 
1
+ playwright
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ transformers
6
+ torch
7
+ gradio
8
+ beautifulsoup4