Spaces:

honeybansal23
/

nextAnalytics

Runtime error

App Files Files Community

honey234 commited on Dec 15, 2024

Commit

e1ce9ca

1 Parent(s): 3d00f61

fixed bugs and added session apis

Browse files

Files changed (16) hide show

apis/reddit_apis.py +32 -9
apis/user.py +36 -2
competitor_analysis_report_1734026339341401.json +1 -0
competitor_analysis_report_1734026339341403.json +1 -0
databases/supabase_db.py +18 -1
models/session_model.py +1 -1
new_pain_point_report.json +223 -0
reddit/load_env.py +1 -0
reddit/prompts.py +62 -14
reddit/reddit_competitor_analysis.py +13 -10
reddit/reddit_functions.py +3 -2
reddit/reddit_pain_point_analysis.py +1 -1
reddit/reddit_scraper.ipynb +17 -8
reddit/reddit_search_scrapper.py +17 -14
reddit/scraping.py +75 -48
test.py +4 -0

apis/reddit_apis.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Annotated
 from fastapi import  Depends, HTTPException, APIRouter
 from databases.firebase_db import get_firebase_user_from_token
 from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
 from models.competitor_analysis_model import CompetitorAnalysisModel
@@ -130,7 +131,7 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
         reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
-        services_result = await getServices(
             user_id=user_session['id'],
             field_inputs=inputData.field_inputs,
             user_query=keywords['query'],
@@ -142,39 +143,61 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
             'reddit_data': reddit_data_result,
             'services_result': services_result
         }
-        update_user_session(user_session=user_session,process_info=process_info)
     except Exception as e:
         print("Failed to run analyzeData ", e)
         raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
 async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
     final_result= {}
     if "Reddit" in field_inputs:
         analysis_list= field_inputs['Reddit']
         # Pain point analysis only
         if reddit_services_names[0] in analysis_list:
             pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
             final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
             if "details" not in pain_point_analysis_result[0].keys():
-                save_pain_point_analysis(data=PainPointAnalysisModel(
                     result=pain_point_analysis_result[0],
                     platform="Reddit",
                     query=user_query,
                     user_id=user_id
                 ))
         # Competitor analysis only
         if reddit_services_names[1] in analysis_list:
-            competitor_analysis_result  =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
             print("competitor_analysis_result",competitor_analysis_result)
-            temp=competitor_analysis_result
-            print("temp",temp)
             final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
                                         'e_time': competitor_analysis_result['e_time']}}
-            save_competitor_analysis(data=CompetitorAnalysisModel(
-                result=competitor_analysis_result['competitors_data'],
                 platform="Reddit",
                 query=user_query,
                 user_id=user_id,
                 all_competitors=competitor_analysis_result['all_competitor_data']
             ))
-    return final_result

 from typing import Annotated
 from fastapi import  Depends, HTTPException, APIRouter
+import requests
 from databases.firebase_db import get_firebase_user_from_token
 from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
 from models.competitor_analysis_model import CompetitorAnalysisModel
         reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
+        services_result,session_info_result = await getServices(
             user_id=user_session['id'],
             field_inputs=inputData.field_inputs,
             user_query=keywords['query'],
             'reddit_data': reddit_data_result,
             'services_result': services_result
         }
+        update_user_session(user_session=user_session,session_info=session_info_result,process_info=process_info)
     except Exception as e:
         print("Failed to run analyzeData ", e)
         raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
+def call_get_competitor_analysis(user_query,fileName):
+    url = "http://127.0.0.31:7860/getCompetitorAnalysis"  # Replace with your actual API URL
+    params = {
+        "user_query": user_query,
+        "fileName": fileName,
+        "isSolo": True  # or False, depending on your needs
+    }
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        print("Response:", response.json())
+    else:
+        print(f"Failed to call API. Status code: {response.status_code}, Response: {response.text}")
 async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
     final_result= {}
+    session_info_result= {}
     if "Reddit" in field_inputs:
         analysis_list= field_inputs['Reddit']
+        session_info_result['Reddit']=[]
         # Pain point analysis only
         if reddit_services_names[0] in analysis_list:
             pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
             final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
             if "details" not in pain_point_analysis_result[0].keys():
+                p_session = save_pain_point_analysis(data=PainPointAnalysisModel(
                     result=pain_point_analysis_result[0],
                     platform="Reddit",
                     query=user_query,
                     user_id=user_id
                 ))
+                session_info_result['Reddit'].append({'Pain point analysis':p_session['id']})
         # Competitor analysis only
         if reddit_services_names[1] in analysis_list:
+            # competitor_analysis_result  =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
+            competitor_analysis_result  =call_get_competitor_analysis(user_query=user_query,fileName=fileName)
             print("competitor_analysis_result",competitor_analysis_result)
             final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
                                         'e_time': competitor_analysis_result['e_time']}}
+            c_session=save_competitor_analysis(data=CompetitorAnalysisModel(
+                result=competitor_analysis_result['competitors_data'] if isinstance(competitor_analysis_result['competitors_data'], list) else [competitor_analysis_result['competitors_data']],
                 platform="Reddit",
                 query=user_query,
                 user_id=user_id,
                 all_competitors=competitor_analysis_result['all_competitor_data']
             ))
+            session_info_result['Reddit'].append({'Competitor analysis':c_session['id']})
+    return final_result,session_info_result

apis/user.py CHANGED Viewed

@@ -13,7 +13,7 @@ supabase_client = get_db_client()
 @router.get("/users/profile",response_model=UserProfileResponseModel)
 @time_execution
-async def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
     """
     Retrieve the profile information of the authenticated user.
@@ -42,4 +42,38 @@ async def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_toke
         multidomain_cache.update("user",user_db["id"],user_data)
     user_profile = UserProfileResponse(**user_data)
     return UserProfileResponseModel(msg="user profile",data=user_profile)

 @router.get("/users/profile",response_model=UserProfileResponseModel)
 @time_execution
+def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
     """
     Retrieve the profile information of the authenticated user.
         multidomain_cache.update("user",user_db["id"],user_data)
     user_profile = UserProfileResponse(**user_data)
     return UserProfileResponseModel(msg="user profile",data=user_profile)
+@router.get("/users/sessions")
+@time_execution
+def get_user_sessions(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
+    status,user_data = multidomain_cache.get("user",user_db["id"])
+    if status == False:
+        user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
+        if not user_data:
+            raise HTTPException(status_code=500, detail=f"Use not found")
+        user_data = user_data[0]
+        # add to multidomain_cache
+        multidomain_cache.update("user",user_db["id"],user_data)
+    user_profile = UserProfileResponse(**user_data)
+    return UserProfileResponseModel(msg="user profile",data=user_profile)
+@router.get("/users/session/{session_id}",response_model=UserProfileResponseModel)
+@time_execution
+def get_user_session_by_id(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
+    status,user_data = multidomain_cache.get("user",user_db["id"])
+    if status == False:
+        user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
+        if not user_data:
+            raise HTTPException(status_code=500, detail=f"Use not found")
+        user_data = user_data[0]
+        # add to multidomain_cache
+        multidomain_cache.update("user",user_db["id"],user_data)
+    user_profile = UserProfileResponse(**user_data)
+    return UserProfileResponseModel(msg="user profile",data=user_profile)

competitor_analysis_report_1734026339341401.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"competitor_analysis": {"competitor_name": "ClickUp", "overview": {"date_range": "2022-09-20 to 2024-12-02", "total_posts_analyzed": 7, "total_comments_analyzed": 112}, "market_sentiment": {"overall": {"positive": "30", "neutral": "20", "negative": "50"}, "trend_over_time": {"2022-09": {"positive": "25", "neutral": "30", "negative": "45"}, "2023-06": {"positive": "35", "neutral": "15", "negative": "50"}, "2023-07": {"positive": "20", "neutral": "30", "negative": "50"}, "2024-05": {"positive": "40", "neutral": "20", "negative": "40"}, "2024-10": {"positive": "20", "neutral": "20", "negative": "60"}, "2024-12": {"positive": "30", "neutral": "30", "negative": "40"}}}, "pain_points": {"key_insights": ["Performance issues are a major concern for users.", "Complexity and overwhelming features are frequently criticized.", "Inconsistent user experience across different features and updates is reported.", "Inadequate handling of permissions and privacy settings is mentioned."], "pain_points": [{"category": "Performance Issues", "pain_point": "Slow loading times and performance problems", "frequency": "10", "sentiment_analysis": {"positive": "10", "neutral": "5", "negative": "85"}, "related_features": ["General performance", "Loading times", "Email Functionality"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "If ClickUp worked properly, it would be a great choice. It doesn\u2019t, to the point that it because unusable for my team due to performance issues.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Investigate and address performance bottlenecks.", "Optimize database queries and server-side processing.", "Implement caching strategies to reduce load times."]}, {"category": "Complexity and User Experience", "pain_point": "Overly complex interface and feature set", "frequency": "10", "sentiment_analysis": {"positive": "20", "neutral": "20", "negative": "60"}, "related_features": ["User interface", "Feature set", "Onboarding process"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "ClickUp has way too many features. It feels like the team is struggling to keep everything running smoothly and bug-free, while also making sure the UX/UI stays clean and user-friendly.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Simplify the user interface.", "Provide better onboarding and tutorials.", "Prioritize core features and gradually introduce new ones."]}, {"category": "Permissions and Privacy", "pain_point": "Issues with permissions and accidental sharing of private information", "frequency": "5", "sentiment_analysis": {"positive": "10", "neutral": "15", "negative": "75"}, "related_features": ["Permissions", "Access control", "Data privacy"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "Plus, clickup is feature rich, and we really liked it even if it comes with complexity. The only issue we had with the complexity is regarding privacy and share configurations. Team members accidentally shared private information.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Improve permissions management.", "Enhance privacy settings.", "Provide better user education on privacy controls."]}, {"category": "Agile and Scrum Support", "pain_point": "Inadequate support for Agile methodologies", "frequency": "5", "sentiment_analysis": {"positive": "20", "neutral": "25", "negative": "55"}, "related_features": ["Sprint planning", "Agile workflows", "Scrum boards"], "examples": [{"post_title": "ClickUp for Software project management - would you recommend it?", "comment": "I would not use it for sprint planning. We have been using it for a while and it doesn't work very well. It falls short with organization and sorting. Which makes prioritizing difficult.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}], "recommended_actions": ["Enhance Agile features.", "Improve integration with Agile tools.", "Provide more training and resources on Agile workflows."]}], "overall_insights": {"top_pain_points": ["Performance Issues", "Complexity and User Experience", "Permissions and Privacy"], "user_segments_most_affected": ["Software development teams", "Marketing teams", "Agencies"], "impact_on_product_development": ["Focus on performance optimization", "Prioritize user experience improvements", "Improve permissions and data privacy controls"]}}, "features_and_differentiators": [{"feature": "Task Management", "sentiment": "mixed", "mentions": "50", "related_comments": [{"comment": "We've been using Clickup for at least 4 years now and honestly it's by far one of the best project management tools.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Clickup is great when it comes to management of tasks and breakdown each one to make life easier however I HATE the new update.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1h5144v/which_is_the_best_project_management_tool_trello/"}]}, {"feature": "Project Management", "sentiment": "mixed", "mentions": "40", "related_comments": [{"comment": "I like notion for data storage and notes. I like clickup for project management.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "If you take advantage of emails, create your own project templates, record your screen and have a lot of client assets you manage, its excellent. I run a small agency and Clickup is awesome.", "upvotes": "8", "post_url": "https://www.reddit.com/r/clickup/comments/14pm40m/does_clickup_make_sense_for/"}]}, {"feature": "Integrations", "sentiment": "positive", "mentions": "20", "related_comments": [{"comment": "The GitHub integration is very strong for us and it connects into pull requests, branches and similar with ease", "upvotes": "3", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}]}], "sentiment_by_feature": {"Task Management": {"positive": "40", "neutral": "30", "negative": "30"}, "Project Management": {"positive": "30", "neutral": "40", "negative": "30"}, "Integrations": {"positive": "60", "neutral": "30", "negative": "10"}}, "audience_analysis": {"popular_subreddits": ["r/clickup", "r/projectmanagement", "r/selfhosted"], "user_segments": ["Freelancers", "Solopreneurs", "Small agencies", "Software development teams"]}, "pricing_feedback": {"value_perception": {"positive": "30", "neutral": "40", "negative": "30"}, "related_comments": [{"comment": "For the price and vast amount of tools, Clickup has definitely surpassed all my expectations.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Pricing is awesome.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}]}, "competitor_strengths": ["Extensive feature set", "Wide range of integrations", "Free plan available"], "competitor_weaknesses": ["Performance issues", "Complexity", "User experience inconsistencies"], "user_recommendations": ["Improve performance", "Simplify user interface", "Enhance Agile support"], "competitive_strategy": {"pricing_strategy": "Competitive pricing with a freemium model", "feature_improvement": "Focus on improving core features and user experience", "marketing_strategy": "Target specific user segments with tailored marketing campaigns"}}}

competitor_analysis_report_1734026339341403.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"competitor_analysis": {"competitor_name": "Notion", "overview": {"date_range": "2019-11-18 to 2024-11-19", "total_posts_analyzed": 16, "total_comments_analyzed": 264}, "market_sentiment": {"overall": {"positive": "45", "neutral": "30", "negative": "25"}, "trend_over_time": {"2021-03": {"positive": "40", "neutral": "35", "negative": "25"}, "2021-06": {"positive": "50", "neutral": "30", "negative": "20"}, "2021-09": {"positive": "42", "neutral": "33", "negative": "25"}, "2021-10": {"positive": "48", "neutral": "28", "negative": "24"}, "2021-12": {"positive": "45", "neutral": "35", "negative": "20"}, "2022-10": {"positive": "40", "neutral": "40", "negative": "20"}, "2023-02": {"positive": "30", "neutral": "40", "negative": "30"}, "2023-03": {"positive": "50", "neutral": "30", "negative": "20"}, "2023-06": {"positive": "60", "neutral": "25", "negative": "15"}, "2023-09": {"positive": "55", "neutral": "30", "negative": "15"}, "2023-12": {"positive": "40", "neutral": "35", "negative": "25"}, "2024-01": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-03": {"positive": "50", "neutral": "40", "negative": "10"}, "2024-07": {"positive": "70", "neutral": "20", "negative": "10"}, "2024-09": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-10": {"positive": "50", "neutral": "35", "negative": "15"}, "2024-11": {"positive": "40", "neutral": "40", "negative": "20"}}}, "pain_points": [{"category": "Offline Access", "pain_point": "Lack of offline access is a major drawback for many users.", "frequency": "8", "sentiment_analysis": {"positive": "1", "neutral": "2", "negative": "5"}, "related_features": ["offline_editing", "sync_speed"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"No offline access kills it for me. Full stop.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Implement offline capabilities", "Improve sync speed and reliability"]}, {"category": "Collaboration Features", "pain_point": "Some users find the collaboration features lacking or difficult to use.", "frequency": "6", "sentiment_analysis": {"positive": "2", "neutral": "2", "negative": "2"}, "related_features": ["real-time_collaboration", "co-editing", "commenting"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"I don't care for collaboration functions.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Improve the user interface for collaboration features", "Add more advanced collaboration features"]}], "features_and_differentiators": [{"feature": "Databases", "sentiment": "positive", "mentions": "15", "related_comments": [{"comment": "\"Notion specializes in relational databases using blocks.\"", "upvotes": null, "post_url": null}]}, {"feature": "Templates", "sentiment": "positive", "mentions": "10", "related_comments": [{"comment": "\"Notion is still leagues ahead in terms of cross-document search, templates, databases, etc.\"", "upvotes": null, "post_url": null}]}, {"feature": "UI", "sentiment": "mixed", "mentions": "12", "related_comments": [{"comment": "\"If MS didn\u2019t copy Notions UI, the parallels between the two products would be minimal.\"", "upvotes": null, "post_url": null}]}], "sentiment_by_feature": {"Databases": {"positive": "70", "neutral": "20", "negative": "10"}, "Templates": {"positive": "80", "neutral": "15", "negative": "5"}, "UI": {"positive": "50", "neutral": "30", "negative": "20"}}, "audience_analysis": {"popular_subreddits": ["r/Notion", "r/selfhosted", "r/UI_Design", "r/todoist", "r/learnpython"], "user_segments": ["students", "small business owners", "knowledge workers", "designers", "developers"]}, "pricing_feedback": {"value_perception": {"positive": "60", "neutral": "30", "negative": "10"}, "related_comments": [{"comment": "\"Google is already free, while Notion charges per person and it's costly.\"", "upvotes": null, "post_url": null}]}, "competitor_strengths": ["intuitive interface", "powerful databases", "versatile features", "large user community", "extensive integrations"], "competitor_weaknesses": ["lack of offline access", "pricing", "some collaboration features need improvement", "not ideal for public-facing documentation"], "user_recommendations": ["improve offline functionality", "add version control", "enhance collaboration features", "consider a free tier"], "competitive_strategy": {"pricing_strategy": "freemium model", "feature_improvement": "continuously adding new features and improving existing ones"}}}

databases/supabase_db.py CHANGED Viewed

@@ -30,7 +30,7 @@ def create_user_with_id(external_id,email):
     return user
 # create user session
-def create_user_session(user_id: int, input_info: InputInfoModel) -> None:
     """
     Creates a new user session in the database.
@@ -89,6 +89,7 @@ def update_user_session(user_session: dict,session_info:dict=None, process_info:
 def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
     try:
         data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
     except Exception as e:
         print("Failed to save pain point analysis:", e)
         raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
@@ -96,6 +97,22 @@ def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
 def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
     try:
         data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
     except Exception as e:
         print("Failed to save pain point analysis:", e)
         raise HTTPException(status_code=500, detail="Failed to save pain point analysis")

     return user
 # create user session
+def create_user_session(user_id: int, input_info: InputInfoModel):
     """
     Creates a new user session in the database.
 def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
     try:
         data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
+        return data[0]
     except Exception as e:
         print("Failed to save pain point analysis:", e)
         raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
 def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
     try:
         data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
+        return data[0]
+    except Exception as e:
+        print("Failed to save pain point analysis:", e)
+        raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
+def get_user_sessions(user_id: int)->list[UserSessionModel]:
+    try:
+        data=  tasks = (
+        db_client
+        .table('sessions')
+        .select("*")
+        .eq('user_id', user_id)
+        .eq('is_deleted', False)
+        .execute()
+        .data
+    )
     except Exception as e:
         print("Failed to save pain point analysis:", e)
         raise HTTPException(status_code=500, detail="Failed to save pain point analysis")

models/session_model.py CHANGED Viewed

@@ -31,4 +31,4 @@ class UserSessionModel(BaseModel):
     session_info: dict
     process_info: Optional[dict] = None
     session_completed: Optional[bool] = False

     session_info: dict
     process_info: Optional[dict] = None
     session_completed: Optional[bool] = False
+    is_deleted: Optional[bool] = False

new_pain_point_report.json ADDED Viewed

	@@ -0,0 +1,223 @@

+{
+    "report_title": "Pain Point Analysis Report for Startups",
+    "date_generated": "2024-12-07",
+    "target_audience": {
+        "industry": "Startups",
+        "primary_subreddits": [
+            "startups",
+            "Entrepreneur",
+            "Startup_Ideas",
+            "BlockchainStartups",
+            "ecommerce",
+            "smallbusiness",
+            "EntrepreneurRideAlong",
+            "StartUpIndia",
+            "SaaSToolbox",
+            "marketing",
+            "DACXI",
+            "biotech"
+        ],
+        "audience_demographics": {
+            "age_range": "25-45",
+            "interests": [
+                "entrepreneurship",
+                "business",
+                "technology",
+                "marketing",
+                "sales",
+                "funding",
+                "team building",
+                "AI"
+            ],
+            "regions": [
+                "Global"
+            ]
+        }
+    },
+    "analysis_summary": {
+        "total_posts_analyzed": 12,
+        "total_comments_analyzed": 598,
+        "time_period": "2023-12-17 - 2024-12-01",
+        "key_findings": [
+            "Recurring frustration with securing funding, particularly for early-stage startups.",
+            "High demand for effective marketing and sales strategies, especially for reaching target audiences.",
+            "Negative sentiment about the challenges of team building and talent acquisition."
+        ]
+    },
+    "pain_points": [
+        {
+            "pain_point_id": 1,
+            "description": "Difficulty securing funding, especially for early-stage startups.",
+            "examples_from_reddit": [
+                {
+                    "post_id": "1b9g0l1",
+                    "post_url": "https://www.reddit.com/r/Entrepreneur/comments/1b9g0l1/what_are_the_common_struggles_of_a_small_business/",
+                    "text_snippet": "Money"
+                },
+                {
+                    "post_id": "1h46s6w",
+                    "post_url": "https://www.reddit.com/r/StartUpIndia/comments/1h46s6w/starting_a_startup_in_india_key_ventures_and/",
+                    "text_snippet": "Funding Gaps: Tap into angel investors, crowdfunding platforms, and state-backed funds."
+                }
+            ],
+            "impact_analysis": {
+                "frequency": "High",
+                "audience_size_affected": "Large",
+                "sentiment_trend": "Negative"
+            },
+            "actionable_recommendations": [
+                "Explore alternative funding options like crowdfunding (e.g., Kickstarter, Indiegogo), angel investors (e.g., AngelList), or government grants (e.g., SBIR, STTR).",
+                "Develop a compelling business plan and pitch deck to attract investors. Utilize resources like the Sequoia Capital Pitch Deck template or the Y Combinator application advice."
+            ],
+            "tools_and_technologies": [
+                "Crunchbase",
+                "Pitchbook",
+                "Gust",
+                "DocSend"
+            ],
+            "methods": [
+                "Lean Startup methodology",
+                "Value Proposition Design"
+            ],
+            "case_studies": [
+                "Mailchimp's bootstrapping success",
+                "Airbnb's early crowdfunding campaign"
+            ]
+        },
+        {
+            "pain_point_id": 2,
+            "description": "Intense competition, requiring strong differentiation and effective marketing.",
+            "examples_from_reddit": [
+                {
+                    "post_id": "1dwtb7l",
+                    "post_url": "https://www.reddit.com/r/Entrepreneur/comments/1dwtb7l/whats_the_worse_business_to_start_in_2024/",
+                    "text_snippet": "Restaurants gotta be no 1. No matter the circumstances. It’s most likely to fail."
+                },
+                {
+                    "post_id": "1fdig1p",
+                    "post_url": "https://www.reddit.com/r/marketing/comments/1fdig1p/what_are_some_marketing_challenges_that_startups/",
+                    "text_snippet": "The startup market was the new gold rush 10-15 years ago, but nowadays, I think a lot of newer startups are dealing with the oversaturation problem."
+                }
+            ],
+            "impact_analysis": {
+                "frequency": "High",
+                "audience_size_affected": "Large",
+                "sentiment_trend": "Negative"
+            },
+            "actionable_recommendations": [
+                "Conduct thorough market research to identify unmet needs and differentiate from competitors. Use tools like SWOT analysis and Porter's Five Forces.",
+                "Develop a strong value proposition that resonates with the target audience. Consider using the Value Proposition Canvas."
+            ],
+            "tools_and_technologies": [
+                "SEMrush",
+                "Ahrefs",
+                "SimilarWeb",
+                "Brand24"
+            ],
+            "methods": [
+                "Blue Ocean Strategy",
+                "Competitive Analysis Framework"
+            ],
+            "case_studies": [
+                "Dollar Shave Club's disruption of the razor market",
+                "Tesla's creation of the electric vehicle market"
+            ]
+        },
+        {
+            "pain_point_id": 3,
+            "description": "Difficulty with marketing and sales, and reaching the target audience due to lack of expertise and resources.",
+            "examples_from_reddit": [
+                {
+                    "post_id": "1c63gqn",
+                    "post_url": "https://www.reddit.com/r/Startup_Ideas/comments/1c63gqn/what_are_your_problems_as_a_startup/",
+                    "text_snippet": "…hiring the right marketing expert is often difficult and expensive…managing the workflow…can also be overwhelming"
+                },
+                {
+                    "post_id": "18ktmtr",
+                    "post_url": "https://www.reddit.com/r/startups/comments/18ktmtr/solo_technical_founders_when_it_came_to_sales/",
+                    "text_snippet": "I like coding and building new stuff but I don't enjoy as much the marketing and sales side."
+                }
+            ],
+            "impact_analysis": {
+                "frequency": "Very High",
+                "audience_size_affected": "Large",
+                "sentiment_trend": "Negative"
+            },
+            "actionable_recommendations": [
+                "Develop a clear marketing strategy with measurable goals and KPIs.  Utilize frameworks like the AIDA model or the marketing mix (4Ps).",
+                "Explore cost-effective advertising channels like social media ads (e.g., Facebook Ads, Twitter Ads), influencer marketing, content marketing,  or search engine optimization (SEO)."
+            ],
+            "tools_and_technologies": [
+                "HubSpot",
+                "Mailchimp",
+                "Buffer",
+                "Google Analytics",
+                "Canva"
+            ],
+            "methods": [
+                "Inbound marketing",
+                "Growth hacking",
+                "Sales funnels"
+            ],
+            "case_studies": [
+                "HubSpot's inbound marketing success",
+                "Dropbox's referral program"
+            ]
+        },
+        {
+            "pain_point_id": 4,
+            "description": "Difficulty building a strong team and acquiring talent.",
+            "examples_from_reddit": [
+                {
+                    "post_id": "1dr44rv",
+                    "post_url": "https://www.reddit.com/r/BlockchainStartups/comments/1dr44rv/advice_needed_launching_a_blockchain_startup_in/",
+                    "text_snippet": "Building a Tech Team: Look for developers with experience in blockchain technologies…"
+                },
+                {
+                    "post_id": "1c33eoz",
+                    "post_url": "https://www.reddit.com/r/startups/comments/1c33eoz/what_has_been_your_biggest_challenge_when_growing/",
+                    "text_snippet": "Getting a good team, a C level team... Still not able to and have been trying for 3 years now"
+                }
+            ],
+            "impact_analysis": {
+                "frequency": "High",
+                "audience_size_affected": "Large",
+                "sentiment_trend": "Very Negative"
+            },
+            "actionable_recommendations": [
+                "Develop a clear hiring strategy and define roles and responsibilities carefully. Use tools like job scorecards and structured interviews.",
+                "Offer competitive compensation and benefits.  Consider equity options and flexible work arrangements."
+            ],
+            "tools_and_technologies": [
+                "LinkedIn",
+                "Indeed",
+                "Glassdoor",
+                "BambooHR"
+            ],
+            "methods": [
+                "Employer branding",
+                "Employee referral programs",
+                "Culture fit assessments"
+            ],
+            "case_studies": [
+                "Netflix's culture deck",
+                "Zappos' emphasis on company culture"
+            ]
+        }
+    ],
+    "opportunities_identified": [
+        "Focus on niche markets to reduce competition.",
+        "Highlight unique value proposition and early traction to attract investors."
+    ],
+    "conclusion": {
+        "summary_of_key_findings": [
+            "Funding, competition, marketing & sales, and team building are the top challenges for startups.",
+            "Early-stage and bootstrapped startups are particularly affected."
+        ],
+        "next_steps": [
+            "Prioritize product development based on identified pain points.",
+            "Develop targeted marketing strategies.",
+            "Continuously monitor Reddit and other relevant platforms for emerging trends."
+        ]
+    }
+}

reddit/load_env.py CHANGED Viewed

@@ -36,3 +36,4 @@ reddit_username = os.getenv('REDDIT_USERNAME')
 # ScraperANT
 scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
 scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')

 # ScraperANT
 scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
 scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')
+scraper_ant_api_key3 = os.getenv('SCRAPERANT_APIKEY3')

reddit/prompts.py CHANGED Viewed

@@ -119,7 +119,25 @@ Return the response in the **JSON format** provided below, and include data for
 Here is the required JSON format:
 {{
   "pain_point_analysis": {{
-    "key_insights": ["insight1", "insight2",...],
     "pain_points": [
       {{
         "category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
@@ -142,12 +160,30 @@ Here is the required JSON format:
           .
           .
         ],
-        "recommended_actions": [
-          "Recommended solution/action 1",
-          "Recommended solution/action 2",
-          .
-          .
-          .
         ]
       }},
       .
@@ -155,16 +191,28 @@ Here is the required JSON format:
       .
       similarly, for all remaining categories
     ],
-    "overall_insights": {{
-      "top_pain_points": ["pain_point1", "pain_point2",...],
-      "user_segments_most_affected": ["segment1", "segment2",...],
-      "impact_on_product_development": [
-        "Insight for development 1",
-        "Insight for development 2",
         .
         .
         .
-      ]
     }}
   }}
 }}

 Here is the required JSON format:
 {{
   "pain_point_analysis": {{
+  "target_audience": {{
+        "industry": "Industry Name",
+        "primary_subreddits": [
+            "Subreddit 1",
+            "Subreddit 2",
+            .
+            .
+            .
+        ],
+    }},
+    "analysis_summary": {{
+        "key_findings": [
+            "Key finding 1",
+            "Key finding 2",
+            .
+            .
+            .
+        ]
+    }},
     "pain_points": [
       {{
         "category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
           .
           .
         ],
+        "impact_analysis": {{
+                "frequency": "High/Medium/Low",
+                "audience_size_affected": "Large/Medium/Small",
+            }},
+        "actionable_recommendations": [
+            "Recommendation 1",
+            "Recommendation 2",
+            .
+            .
+            .
+        ],
+        "methods": [
+            "Method 1",
+            "Method 2",
+            .
+            .
+            .
+        ],
+        "case_studies": [
+            "Case study 1",
+            "Case study 2",
+            .
+            .
+            .
         ]
       }},
       .
       .
       similarly, for all remaining categories
     ],
+    "opportunities_identified": [
+        "Opportunity 1",
+        "Opportunity 2",
         .
         .
         .
+    ],
+    "conclusion": {{
+        "summary_of_key_findings": [
+            "Summary of Key Findings 1",
+            "Summary of Key Findings 2",
+            .
+            .
+            .
+        ],
+        "next_steps": [
+            "Next Step 1",
+            "Next Step 2",
+            .
+            .
+            .
+        ]
     }}
   }}
 }}

reddit/reddit_competitor_analysis.py CHANGED Viewed

@@ -127,8 +127,11 @@ async def getPostDataofCompetitor(fileName,user_query):
       actual_list.append(index)
   print("Fetched data for competitors")
   fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
-  result=await preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
-  return result
 async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
@@ -141,14 +144,14 @@ async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
     await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
     json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
     c+=1
-    # if json_data does  contain "details" field, then skip this file
-    if "details" in json_data.keys():
-      continue
-    # save json_data to json file
-    with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
-      json.dump(json_data, outfile)
-    print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
-    competitors_json_data.append(json_data)
   for file_path in fileNames:
     # Check if the file exists before attempting to delete

       actual_list.append(index)
   print("Fetched data for competitors")
   fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
+  if len(fileNames)!=0:
+    result=await preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
+    return result
+  else:
+     return {'details':'No data found'}
 async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
     await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
     json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
     c+=1
+    # if json_data does  not contain "details" field, then only save the json
+    if "details" not in json_data.keys():
+      # save json_data to json file
+      with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
+        json.dump(json_data, outfile)
+      print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
+      competitors_json_data.append(json_data)
   for file_path in fileNames:
     # Check if the file exists before attempting to delete

reddit/reddit_functions.py CHANGED Viewed

@@ -27,8 +27,9 @@ async def getRedditData(user_query, search_keywords):
     # Step 3: Get final data
     try:
         print("fileNames", fileNames)
-        getFinalData(user_query=user_query, filesNames=fileNames)
-        successful_steps.append(('getFinalData',))  # Mark this step as successful
     except Exception as e:
         print(f"Failed at getFinalData: {e}")

     # Step 3: Get final data
     try:
         print("fileNames", fileNames)
+        res=getFinalData(user_query=user_query, filesNames=fileNames)
+        if res is True:
+            successful_steps.append(('getFinalData'))  # Mark this step as successful
     except Exception as e:
         print(f"Failed at getFinalData: {e}")

reddit/reddit_pain_point_analysis.py CHANGED Viewed

@@ -17,7 +17,7 @@ def pain_point_analysis(user_query, fileName, uniqueFileId):
         pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
         generation_config = genai.GenerationConfig(response_mime_type="application/json")  # Request JSON response
         model = genai.GenerativeModel(
-            model_name="gemini-1.5-pro-002" if environment=="PRODUCTION" else "gemini-1.5-flash",
             generation_config=generation_config,
         )

         pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
         generation_config = genai.GenerationConfig(response_mime_type="application/json")  # Request JSON response
         model = genai.GenerativeModel(
+            model_name="gemini-1.5-pro-002" if environment!="PRODUCTION" else "gemini-1.5-flash",
             generation_config=generation_config,
         )

reddit/reddit_scraper.ipynb CHANGED Viewed

@@ -7379,7 +7379,9 @@
      "text": [
       "post_elements 29\n",
       "another_post_elements 20\n",
-      "49\n"
      ]
     }
    ],
@@ -7415,13 +7417,15 @@
     "    \n",
     "    # Extract post title\n",
     "    post_title = post_title_element.text.strip() if post_title_element else None\n",
-    "    \n",
     "    # Extract votes count\n",
-    "    votes_element = post.find('faceplate-number', {'pretty': True})\n",
     "    votes_count = votes_element.text.strip() if votes_element else None\n",
     "    \n",
     "    # Extract comments count\n",
-    "    comments_element = post.find_all('faceplate-number', {'pretty': True})\n",
     "    comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
     "    \n",
     "    # Append data to the list\n",
@@ -7447,12 +7451,14 @@
     "    # Extract post title\n",
     "    post_title = post_title_element.text.strip() if post_title_element else None\n",
     "    \n",
     "    # Extract votes count\n",
-    "    votes_element = post.find('faceplate-number', {'pretty': True})\n",
     "    votes_count = votes_element.text.strip() if votes_element else None\n",
     "    \n",
     "    # Extract comments count\n",
-    "    comments_element = post.find_all('faceplate-number', {'pretty': True})\n",
     "    comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
     "    \n",
     "    # Append data to the list\n",
@@ -7470,10 +7476,13 @@
     "import pandas as pd \n",
     "\n",
     "df = pd.DataFrame(post_data_list)\n",
-    "df.to_csv(\"posts_data78.csv\",index=False)\n",
     "# Print the list of posts data\n",
     "# for idx, post_data in enumerate(post_data_list, 1):\n",
-    "#     print(f\"Post {idx}: {post_data}\")\n"
    ]
   },
   {

      "text": [
       "post_elements 29\n",
       "another_post_elements 20\n",
+      "49\n",
+      "49\n",
+      "len 49\n"
      ]
     }
    ],
     "    \n",
     "    # Extract post title\n",
     "    post_title = post_title_element.text.strip() if post_title_element else None\n",
+    "\n",
+    "    bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
+    "\n",
     "    # Extract votes count\n",
+    "    votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
     "    votes_count = votes_element.text.strip() if votes_element else None\n",
     "    \n",
     "    # Extract comments count\n",
+    "    comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
     "    comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
     "    \n",
     "    # Append data to the list\n",
     "    # Extract post title\n",
     "    post_title = post_title_element.text.strip() if post_title_element else None\n",
     "    \n",
+    "    bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
+    "\n",
     "    # Extract votes count\n",
+    "    votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
     "    votes_count = votes_element.text.strip() if votes_element else None\n",
     "    \n",
     "    # Extract comments count\n",
+    "    comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
     "    comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
     "    \n",
     "    # Append data to the list\n",
     "import pandas as pd \n",
     "\n",
     "df = pd.DataFrame(post_data_list)\n",
+    "print(len(df))\n",
+    "# df.to_csv(\"posts_data78.csv\",index=False)\n",
     "# Print the list of posts data\n",
     "# for idx, post_data in enumerate(post_data_list, 1):\n",
+    "#     print(f\"Post {idx}: {post_data}\")\n",
+    "df=df[df[\"comment_count\"]!=0]\n",
+    "print(\"len\",len(df))"
    ]
   },
   {

reddit/reddit_search_scrapper.py CHANGED Viewed

@@ -19,18 +19,21 @@ def preProcessPostData(filesNames):
         df.to_csv(i, index=False)
 def getFinalData(user_query,filesNames):
-    preProcessPostData(filesNames=filesNames)
-    # files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
-    final_df = topic_sort(path1=filesNames[0],path2= filesNames[1],path3= filesNames[2],query= user_query,)
-    for file_path in filesNames:
-    # Check if the file exists before attempting to delete
-        if os.path.exists(file_path):
-            os.remove(file_path)
-            print("File deleted successfully")
-        else:
-            print("File does not exist")
-    final_df.to_csv(filesNames[0], index=False)
-    print("Data saved to ",filesNames[0])

         df.to_csv(i, index=False)
 def getFinalData(user_query,filesNames):
+    try:
+        preProcessPostData(filesNames=filesNames)
+        # files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
+        final_df = topic_sort(path1=filesNames[0],path2= filesNames[1],path3= filesNames[2],query= user_query,)
+        for file_path in filesNames:
+        # Check if the file exists before attempting to delete
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print("File deleted successfully")
+            else:
+                print("File does not exist")
+        final_df.to_csv(filesNames[0], index=False)
+        print("Data saved to ",filesNames[0])
+        return True
+    except:
+        return False

reddit/scraping.py CHANGED Viewed

@@ -2,6 +2,8 @@
 Only Scraping related code.
 '''
 import asyncio
 import asyncpraw
 import json
 import time
@@ -10,7 +12,7 @@ import base64
 import re
 from asyncpraw.models import Comment
 from reddit.reddit_utils import topic_sort
-from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2
 import http.client
 from bs4 import BeautifulSoup
@@ -47,12 +49,13 @@ def getDFofSearchPostData(htmlContent):
         # Extract post title
         post_title = post_title_element.text.strip() if post_title_element else None
         # Extract votes count
-        votes_element = post.find('faceplate-number', {'pretty': True})
         votes_count = votes_element.text.strip() if votes_element else None
         # Extract comments count
-        comments_element = post.find_all('faceplate-number', {'pretty': True})
         comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
         # Append data to the list
@@ -77,13 +80,14 @@ def getDFofSearchPostData(htmlContent):
         # Extract post title
         post_title = post_title_element.text.strip() if post_title_element else None
         # Extract votes count
-        votes_element = post.find('faceplate-number', {'pretty': True})
         votes_count = votes_element.text.strip() if votes_element else None
         # Extract comments count
-        comments_element = post.find_all('faceplate-number', {'pretty': True})
         comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
         # Append data to the list
@@ -116,10 +120,13 @@ def getHtmlContent(search_keyword,forCompetitorAnalysis=False,even=False):
         '''
         try:
             base64_snippet = base64.b64encode(js_snippet.encode()).decode()
-            conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key if even else scraper_ant_api_key2}&js_snippet={base64_snippet}")
         except:
-            base64_snippet = base64.b64encode(js_snippet.encode()).decode()
-            conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key if even==True else scraper_ant_api_key2}&js_snippet={base64_snippet}")
     else:
         js_snippet = '''
         window.scrollTo(0,document.body.scrollHeight);
@@ -155,11 +162,13 @@ def retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=False,tries=2,ev
 # 1. Get Search Post Data
 async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
     htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
-    print("htmlcontent",htmlContent[:100])
-    htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
     if htmlContent is None:
-        return None
-    print("htmlcontent",htmlContent[:100])
     time.sleep(1)
     print("reached this step")
     df = getDFofSearchPostData(htmlContent)
@@ -210,52 +219,70 @@ async def process_comment(comment, reply_limit):
     return comment_data
-async def fetch_submission_comments(url, reddit,is_for_competitor_analysis):
     """
-    Fetch comments from a single Reddit submission given its URL.
     """
-    try:
-        # Use asyncio.wait_for to add a timeout for loading the submission
-        submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
-        # Load additional submission data
-        await submission.load()
-        # Expand comments up to the specified limit
-        await submission.comments.replace_more(limit=2)
-        # Initialize variables for comment processing
-        comments_data = []
-        comment_queue = list(submission.comments)
-        comment_count = 0
-        threshold = 20 if is_for_competitor_analysis else 40
-        while comment_queue:
-            if comment_count >= threshold:
-                break
-            comment = comment_queue.pop(0)
-            if isinstance(comment, Comment):
-                comment_data = await process_comment(
-                    comment, reply_limit=2 if is_for_competitor_analysis else 3
-                )
-                comments_data.append(comment_data)
-                comment_count += 1
-        # Return processed comments
-        return {"comments": comments_data,"description":submission.selftext if submission.selftext else ""}
-    except asyncio.TimeoutError:
-        print(f"Skipping due to timeout: {url}")
-    except Exception as e:
-        print(f"Skipping due to error: {url} - {e}")
-    # Return None if an error occurs
-    return None
 async def getPostComments(file_name, is_for_competitor_analysis=False):
     """
     Fetch comments for posts listed in the CSV file and save the processed data.

 Only Scraping related code.
 '''
 import asyncio
+import logging
+import random
 import asyncpraw
 import json
 import time
 import re
 from asyncpraw.models import Comment
 from reddit.reddit_utils import topic_sort
+from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2,scraper_ant_api_key3
 import http.client
 from bs4 import BeautifulSoup
         # Extract post title
         post_title = post_title_element.text.strip() if post_title_element else None
+        bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
         # Extract votes count
+        votes_element = bottom_element.find('faceplate-number', {'pretty': True})
         votes_count = votes_element.text.strip() if votes_element else None
         # Extract comments count
+        comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
         comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
         # Append data to the list
         # Extract post title
         post_title = post_title_element.text.strip() if post_title_element else None
+        bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
         # Extract votes count
+        votes_element = bottom_element.find('faceplate-number', {'pretty': True})
         votes_count = votes_element.text.strip() if votes_element else None
         # Extract comments count
+        comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
         comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
         # Append data to the list
         '''
         try:
             base64_snippet = base64.b64encode(js_snippet.encode()).decode()
+            conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
         except:
+            try:
+                base64_snippet = base64.b64encode(js_snippet.encode()).decode()
+                conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
+            except:
+                return ''
     else:
         js_snippet = '''
         window.scrollTo(0,document.body.scrollHeight);
 # 1. Get Search Post Data
 async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
     htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
+    print("htmlcontentBefore",htmlContent[:100])
     if htmlContent is None:
+        htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
+        if htmlContent is None:
+            return None
+    print("htmlcontentAfter",htmlContent[:100])
     time.sleep(1)
     print("reached this step")
     df = getDFofSearchPostData(htmlContent)
     return comment_data
+async def fetch_submission_comments(url, reddit, is_for_competitor_analysis, max_retries=3):
     """
+    Fetch comments from a single Reddit submission given its URL with retry mechanism.
+    Args:
+        url (str): The URL of the Reddit submission
+        reddit (Reddit): Authenticated Reddit instance
+        is_for_competitor_analysis (bool): Flag to modify comment fetching behavior
+        max_retries (int, optional): Maximum number of retry attempts. Defaults to 3.
+    Returns:
+        dict or None: Processed comments and submission description, or None if failed
     """
+    # Configure logging
+    logger = logging.getLogger(__name__)
+    for attempt in range(max_retries):
+        try:
+            # Use asyncio.wait_for to add a timeout for loading the submission
+            submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
+            # Load additional submission data
+            await submission.load()
+            # Expand comments up to the specified limit
+            await submission.comments.replace_more(limit=2)
+            # Initialize variables for comment processing
+            comments_data = []
+            comment_queue = list(submission.comments)
+            comment_count = 0
+            threshold = 20 if is_for_competitor_analysis else 40
+            while comment_queue:
+                if comment_count >= threshold:
+                    break
+                comment = comment_queue.pop(0)
+                if isinstance(comment, Comment):
+                    comment_data = await process_comment(
+                        comment, reply_limit=2 if is_for_competitor_analysis else 3
+                    )
+                    comments_data.append(comment_data)
+                    comment_count += 1
+            # Return processed comments
+            return {
+                "comments": comments_data,
+                "description": submission.selftext if submission.selftext else ""
+            }
+        except asyncio.TimeoutError:
+            logger.warning(f"Timeout on attempt {attempt + 1} for URL: {url}")
+        except Exception as e:
+            logger.error(f"Error on attempt {attempt + 1} for URL {url}: {e}")
+        # Implement exponential backoff with jitter
+        if attempt < max_retries - 1:
+            wait_time = (2 ** attempt)+1
+            logger.info(f"Waiting {wait_time:.2f} seconds before retry")
+            await asyncio.sleep(wait_time)
+    # Log final failure if all retries are exhausted
+    logger.error(f"Failed to fetch comments for URL after {max_retries} attempts: {url}")
+    return None
 async def getPostComments(file_name, is_for_competitor_analysis=False):
     """
     Fetch comments for posts listed in the CSV file and save the processed data.

test.py CHANGED Viewed

@@ -1,7 +1,11 @@
 # from databases.firebase_db import get_firebase_user_from_token
 # get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
 # from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport

 # from databases.firebase_db import get_firebase_user_from_token
+from apis.reddit_apis import call_get_competitor_analysis
+competitor_analysis_result  =call_get_competitor_analysis(user_query='significant challenges facing startups in 2024',fileName='posts_data_1734025420988523.csv')
+print("competitor_analysis_result",competitor_analysis_result)
 # get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
 # from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport