Spaces:
Runtime error
Runtime error
fixed bugs and added session apis
Browse files- apis/reddit_apis.py +32 -9
- apis/user.py +36 -2
- competitor_analysis_report_1734026339341401.json +1 -0
- competitor_analysis_report_1734026339341403.json +1 -0
- databases/supabase_db.py +18 -1
- models/session_model.py +1 -1
- new_pain_point_report.json +223 -0
- reddit/load_env.py +1 -0
- reddit/prompts.py +62 -14
- reddit/reddit_competitor_analysis.py +13 -10
- reddit/reddit_functions.py +3 -2
- reddit/reddit_pain_point_analysis.py +1 -1
- reddit/reddit_scraper.ipynb +17 -8
- reddit/reddit_search_scrapper.py +17 -14
- reddit/scraping.py +75 -48
- test.py +4 -0
apis/reddit_apis.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from typing import Annotated
|
| 2 |
from fastapi import Depends, HTTPException, APIRouter
|
|
|
|
| 3 |
from databases.firebase_db import get_firebase_user_from_token
|
| 4 |
from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
|
| 5 |
from models.competitor_analysis_model import CompetitorAnalysisModel
|
|
@@ -130,7 +131,7 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
|
|
| 130 |
|
| 131 |
reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
|
| 132 |
|
| 133 |
-
services_result = await getServices(
|
| 134 |
user_id=user_session['id'],
|
| 135 |
field_inputs=inputData.field_inputs,
|
| 136 |
user_query=keywords['query'],
|
|
@@ -142,39 +143,61 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
|
|
| 142 |
'reddit_data': reddit_data_result,
|
| 143 |
'services_result': services_result
|
| 144 |
}
|
| 145 |
-
update_user_session(user_session=user_session,process_info=process_info)
|
| 146 |
except Exception as e:
|
| 147 |
print("Failed to run analyzeData ", e)
|
| 148 |
raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
|
| 151 |
final_result= {}
|
|
|
|
| 152 |
if "Reddit" in field_inputs:
|
| 153 |
analysis_list= field_inputs['Reddit']
|
|
|
|
| 154 |
# Pain point analysis only
|
| 155 |
if reddit_services_names[0] in analysis_list:
|
| 156 |
pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
|
| 157 |
final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
|
| 158 |
if "details" not in pain_point_analysis_result[0].keys():
|
| 159 |
-
save_pain_point_analysis(data=PainPointAnalysisModel(
|
| 160 |
result=pain_point_analysis_result[0],
|
| 161 |
platform="Reddit",
|
| 162 |
query=user_query,
|
| 163 |
user_id=user_id
|
| 164 |
))
|
|
|
|
|
|
|
| 165 |
# Competitor analysis only
|
| 166 |
if reddit_services_names[1] in analysis_list:
|
| 167 |
-
competitor_analysis_result =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
|
|
|
|
| 168 |
print("competitor_analysis_result",competitor_analysis_result)
|
| 169 |
-
temp=competitor_analysis_result
|
| 170 |
-
print("temp",temp)
|
| 171 |
final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
|
| 172 |
'e_time': competitor_analysis_result['e_time']}}
|
| 173 |
-
save_competitor_analysis(data=CompetitorAnalysisModel(
|
| 174 |
-
result=competitor_analysis_result['competitors_data'],
|
| 175 |
platform="Reddit",
|
| 176 |
query=user_query,
|
| 177 |
user_id=user_id,
|
| 178 |
all_competitors=competitor_analysis_result['all_competitor_data']
|
| 179 |
))
|
| 180 |
-
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Annotated
|
| 2 |
from fastapi import Depends, HTTPException, APIRouter
|
| 3 |
+
import requests
|
| 4 |
from databases.firebase_db import get_firebase_user_from_token
|
| 5 |
from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
|
| 6 |
from models.competitor_analysis_model import CompetitorAnalysisModel
|
|
|
|
| 131 |
|
| 132 |
reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
|
| 133 |
|
| 134 |
+
services_result,session_info_result = await getServices(
|
| 135 |
user_id=user_session['id'],
|
| 136 |
field_inputs=inputData.field_inputs,
|
| 137 |
user_query=keywords['query'],
|
|
|
|
| 143 |
'reddit_data': reddit_data_result,
|
| 144 |
'services_result': services_result
|
| 145 |
}
|
| 146 |
+
update_user_session(user_session=user_session,session_info=session_info_result,process_info=process_info)
|
| 147 |
except Exception as e:
|
| 148 |
print("Failed to run analyzeData ", e)
|
| 149 |
raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
|
| 150 |
|
| 151 |
+
def call_get_competitor_analysis(user_query,fileName):
|
| 152 |
+
url = "http://127.0.0.31:7860/getCompetitorAnalysis" # Replace with your actual API URL
|
| 153 |
+
|
| 154 |
+
params = {
|
| 155 |
+
"user_query": user_query,
|
| 156 |
+
"fileName": fileName,
|
| 157 |
+
"isSolo": True # or False, depending on your needs
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
response = requests.get(url, params=params)
|
| 161 |
+
|
| 162 |
+
if response.status_code == 200:
|
| 163 |
+
print("Response:", response.json())
|
| 164 |
+
else:
|
| 165 |
+
print(f"Failed to call API. Status code: {response.status_code}, Response: {response.text}")
|
| 166 |
+
|
| 167 |
+
|
| 168 |
async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
|
| 169 |
final_result= {}
|
| 170 |
+
session_info_result= {}
|
| 171 |
if "Reddit" in field_inputs:
|
| 172 |
analysis_list= field_inputs['Reddit']
|
| 173 |
+
session_info_result['Reddit']=[]
|
| 174 |
# Pain point analysis only
|
| 175 |
if reddit_services_names[0] in analysis_list:
|
| 176 |
pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
|
| 177 |
final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
|
| 178 |
if "details" not in pain_point_analysis_result[0].keys():
|
| 179 |
+
p_session = save_pain_point_analysis(data=PainPointAnalysisModel(
|
| 180 |
result=pain_point_analysis_result[0],
|
| 181 |
platform="Reddit",
|
| 182 |
query=user_query,
|
| 183 |
user_id=user_id
|
| 184 |
))
|
| 185 |
+
session_info_result['Reddit'].append({'Pain point analysis':p_session['id']})
|
| 186 |
+
|
| 187 |
# Competitor analysis only
|
| 188 |
if reddit_services_names[1] in analysis_list:
|
| 189 |
+
# competitor_analysis_result =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
|
| 190 |
+
competitor_analysis_result =call_get_competitor_analysis(user_query=user_query,fileName=fileName)
|
| 191 |
print("competitor_analysis_result",competitor_analysis_result)
|
|
|
|
|
|
|
| 192 |
final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
|
| 193 |
'e_time': competitor_analysis_result['e_time']}}
|
| 194 |
+
c_session=save_competitor_analysis(data=CompetitorAnalysisModel(
|
| 195 |
+
result=competitor_analysis_result['competitors_data'] if isinstance(competitor_analysis_result['competitors_data'], list) else [competitor_analysis_result['competitors_data']],
|
| 196 |
platform="Reddit",
|
| 197 |
query=user_query,
|
| 198 |
user_id=user_id,
|
| 199 |
all_competitors=competitor_analysis_result['all_competitor_data']
|
| 200 |
))
|
| 201 |
+
session_info_result['Reddit'].append({'Competitor analysis':c_session['id']})
|
| 202 |
+
|
| 203 |
+
return final_result,session_info_result
|
apis/user.py
CHANGED
|
@@ -13,7 +13,7 @@ supabase_client = get_db_client()
|
|
| 13 |
|
| 14 |
@router.get("/users/profile",response_model=UserProfileResponseModel)
|
| 15 |
@time_execution
|
| 16 |
-
|
| 17 |
"""
|
| 18 |
Retrieve the profile information of the authenticated user.
|
| 19 |
|
|
@@ -42,4 +42,38 @@ async def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_toke
|
|
| 42 |
multidomain_cache.update("user",user_db["id"],user_data)
|
| 43 |
user_profile = UserProfileResponse(**user_data)
|
| 44 |
return UserProfileResponseModel(msg="user profile",data=user_profile)
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
@router.get("/users/profile",response_model=UserProfileResponseModel)
|
| 15 |
@time_execution
|
| 16 |
+
def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
|
| 17 |
"""
|
| 18 |
Retrieve the profile information of the authenticated user.
|
| 19 |
|
|
|
|
| 42 |
multidomain_cache.update("user",user_db["id"],user_data)
|
| 43 |
user_profile = UserProfileResponse(**user_data)
|
| 44 |
return UserProfileResponseModel(msg="user profile",data=user_profile)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@router.get("/users/sessions")
|
| 49 |
+
@time_execution
|
| 50 |
+
def get_user_sessions(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
|
| 51 |
+
status,user_data = multidomain_cache.get("user",user_db["id"])
|
| 52 |
+
if status == False:
|
| 53 |
+
user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
|
| 54 |
+
if not user_data:
|
| 55 |
+
raise HTTPException(status_code=500, detail=f"Use not found")
|
| 56 |
+
|
| 57 |
+
user_data = user_data[0]
|
| 58 |
+
# add to multidomain_cache
|
| 59 |
+
multidomain_cache.update("user",user_db["id"],user_data)
|
| 60 |
+
user_profile = UserProfileResponse(**user_data)
|
| 61 |
+
return UserProfileResponseModel(msg="user profile",data=user_profile)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@router.get("/users/session/{session_id}",response_model=UserProfileResponseModel)
|
| 67 |
+
@time_execution
|
| 68 |
+
def get_user_session_by_id(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
|
| 69 |
+
status,user_data = multidomain_cache.get("user",user_db["id"])
|
| 70 |
+
if status == False:
|
| 71 |
+
user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
|
| 72 |
+
if not user_data:
|
| 73 |
+
raise HTTPException(status_code=500, detail=f"Use not found")
|
| 74 |
+
|
| 75 |
+
user_data = user_data[0]
|
| 76 |
+
# add to multidomain_cache
|
| 77 |
+
multidomain_cache.update("user",user_db["id"],user_data)
|
| 78 |
+
user_profile = UserProfileResponse(**user_data)
|
| 79 |
+
return UserProfileResponseModel(msg="user profile",data=user_profile)
|
competitor_analysis_report_1734026339341401.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"competitor_analysis": {"competitor_name": "ClickUp", "overview": {"date_range": "2022-09-20 to 2024-12-02", "total_posts_analyzed": 7, "total_comments_analyzed": 112}, "market_sentiment": {"overall": {"positive": "30", "neutral": "20", "negative": "50"}, "trend_over_time": {"2022-09": {"positive": "25", "neutral": "30", "negative": "45"}, "2023-06": {"positive": "35", "neutral": "15", "negative": "50"}, "2023-07": {"positive": "20", "neutral": "30", "negative": "50"}, "2024-05": {"positive": "40", "neutral": "20", "negative": "40"}, "2024-10": {"positive": "20", "neutral": "20", "negative": "60"}, "2024-12": {"positive": "30", "neutral": "30", "negative": "40"}}}, "pain_points": {"key_insights": ["Performance issues are a major concern for users.", "Complexity and overwhelming features are frequently criticized.", "Inconsistent user experience across different features and updates is reported.", "Inadequate handling of permissions and privacy settings is mentioned."], "pain_points": [{"category": "Performance Issues", "pain_point": "Slow loading times and performance problems", "frequency": "10", "sentiment_analysis": {"positive": "10", "neutral": "5", "negative": "85"}, "related_features": ["General performance", "Loading times", "Email Functionality"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "If ClickUp worked properly, it would be a great choice. It doesn\u2019t, to the point that it because unusable for my team due to performance issues.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Investigate and address performance bottlenecks.", "Optimize database queries and server-side processing.", "Implement caching strategies to reduce load times."]}, {"category": "Complexity and User Experience", "pain_point": "Overly complex interface and feature set", "frequency": "10", "sentiment_analysis": {"positive": "20", "neutral": "20", "negative": "60"}, "related_features": ["User interface", "Feature set", "Onboarding process"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "ClickUp has way too many features. It feels like the team is struggling to keep everything running smoothly and bug-free, while also making sure the UX/UI stays clean and user-friendly.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Simplify the user interface.", "Provide better onboarding and tutorials.", "Prioritize core features and gradually introduce new ones."]}, {"category": "Permissions and Privacy", "pain_point": "Issues with permissions and accidental sharing of private information", "frequency": "5", "sentiment_analysis": {"positive": "10", "neutral": "15", "negative": "75"}, "related_features": ["Permissions", "Access control", "Data privacy"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "Plus, clickup is feature rich, and we really liked it even if it comes with complexity. The only issue we had with the complexity is regarding privacy and share configurations. Team members accidentally shared private information.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Improve permissions management.", "Enhance privacy settings.", "Provide better user education on privacy controls."]}, {"category": "Agile and Scrum Support", "pain_point": "Inadequate support for Agile methodologies", "frequency": "5", "sentiment_analysis": {"positive": "20", "neutral": "25", "negative": "55"}, "related_features": ["Sprint planning", "Agile workflows", "Scrum boards"], "examples": [{"post_title": "ClickUp for Software project management - would you recommend it?", "comment": "I would not use it for sprint planning. We have been using it for a while and it doesn't work very well. It falls short with organization and sorting. Which makes prioritizing difficult.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}], "recommended_actions": ["Enhance Agile features.", "Improve integration with Agile tools.", "Provide more training and resources on Agile workflows."]}], "overall_insights": {"top_pain_points": ["Performance Issues", "Complexity and User Experience", "Permissions and Privacy"], "user_segments_most_affected": ["Software development teams", "Marketing teams", "Agencies"], "impact_on_product_development": ["Focus on performance optimization", "Prioritize user experience improvements", "Improve permissions and data privacy controls"]}}, "features_and_differentiators": [{"feature": "Task Management", "sentiment": "mixed", "mentions": "50", "related_comments": [{"comment": "We've been using Clickup for at least 4 years now and honestly it's by far one of the best project management tools.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Clickup is great when it comes to management of tasks and breakdown each one to make life easier however I HATE the new update.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1h5144v/which_is_the_best_project_management_tool_trello/"}]}, {"feature": "Project Management", "sentiment": "mixed", "mentions": "40", "related_comments": [{"comment": "I like notion for data storage and notes. I like clickup for project management.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "If you take advantage of emails, create your own project templates, record your screen and have a lot of client assets you manage, its excellent. I run a small agency and Clickup is awesome.", "upvotes": "8", "post_url": "https://www.reddit.com/r/clickup/comments/14pm40m/does_clickup_make_sense_for/"}]}, {"feature": "Integrations", "sentiment": "positive", "mentions": "20", "related_comments": [{"comment": "The GitHub integration is very strong for us and it connects into pull requests, branches and similar with ease", "upvotes": "3", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}]}], "sentiment_by_feature": {"Task Management": {"positive": "40", "neutral": "30", "negative": "30"}, "Project Management": {"positive": "30", "neutral": "40", "negative": "30"}, "Integrations": {"positive": "60", "neutral": "30", "negative": "10"}}, "audience_analysis": {"popular_subreddits": ["r/clickup", "r/projectmanagement", "r/selfhosted"], "user_segments": ["Freelancers", "Solopreneurs", "Small agencies", "Software development teams"]}, "pricing_feedback": {"value_perception": {"positive": "30", "neutral": "40", "negative": "30"}, "related_comments": [{"comment": "For the price and vast amount of tools, Clickup has definitely surpassed all my expectations.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Pricing is awesome.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}]}, "competitor_strengths": ["Extensive feature set", "Wide range of integrations", "Free plan available"], "competitor_weaknesses": ["Performance issues", "Complexity", "User experience inconsistencies"], "user_recommendations": ["Improve performance", "Simplify user interface", "Enhance Agile support"], "competitive_strategy": {"pricing_strategy": "Competitive pricing with a freemium model", "feature_improvement": "Focus on improving core features and user experience", "marketing_strategy": "Target specific user segments with tailored marketing campaigns"}}}
|
competitor_analysis_report_1734026339341403.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"competitor_analysis": {"competitor_name": "Notion", "overview": {"date_range": "2019-11-18 to 2024-11-19", "total_posts_analyzed": 16, "total_comments_analyzed": 264}, "market_sentiment": {"overall": {"positive": "45", "neutral": "30", "negative": "25"}, "trend_over_time": {"2021-03": {"positive": "40", "neutral": "35", "negative": "25"}, "2021-06": {"positive": "50", "neutral": "30", "negative": "20"}, "2021-09": {"positive": "42", "neutral": "33", "negative": "25"}, "2021-10": {"positive": "48", "neutral": "28", "negative": "24"}, "2021-12": {"positive": "45", "neutral": "35", "negative": "20"}, "2022-10": {"positive": "40", "neutral": "40", "negative": "20"}, "2023-02": {"positive": "30", "neutral": "40", "negative": "30"}, "2023-03": {"positive": "50", "neutral": "30", "negative": "20"}, "2023-06": {"positive": "60", "neutral": "25", "negative": "15"}, "2023-09": {"positive": "55", "neutral": "30", "negative": "15"}, "2023-12": {"positive": "40", "neutral": "35", "negative": "25"}, "2024-01": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-03": {"positive": "50", "neutral": "40", "negative": "10"}, "2024-07": {"positive": "70", "neutral": "20", "negative": "10"}, "2024-09": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-10": {"positive": "50", "neutral": "35", "negative": "15"}, "2024-11": {"positive": "40", "neutral": "40", "negative": "20"}}}, "pain_points": [{"category": "Offline Access", "pain_point": "Lack of offline access is a major drawback for many users.", "frequency": "8", "sentiment_analysis": {"positive": "1", "neutral": "2", "negative": "5"}, "related_features": ["offline_editing", "sync_speed"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"No offline access kills it for me. Full stop.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Implement offline capabilities", "Improve sync speed and reliability"]}, {"category": "Collaboration Features", "pain_point": "Some users find the collaboration features lacking or difficult to use.", "frequency": "6", "sentiment_analysis": {"positive": "2", "neutral": "2", "negative": "2"}, "related_features": ["real-time_collaboration", "co-editing", "commenting"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"I don't care for collaboration functions.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Improve the user interface for collaboration features", "Add more advanced collaboration features"]}], "features_and_differentiators": [{"feature": "Databases", "sentiment": "positive", "mentions": "15", "related_comments": [{"comment": "\"Notion specializes in relational databases using blocks.\"", "upvotes": null, "post_url": null}]}, {"feature": "Templates", "sentiment": "positive", "mentions": "10", "related_comments": [{"comment": "\"Notion is still leagues ahead in terms of cross-document search, templates, databases, etc.\"", "upvotes": null, "post_url": null}]}, {"feature": "UI", "sentiment": "mixed", "mentions": "12", "related_comments": [{"comment": "\"If MS didn\u2019t copy Notions UI, the parallels between the two products would be minimal.\"", "upvotes": null, "post_url": null}]}], "sentiment_by_feature": {"Databases": {"positive": "70", "neutral": "20", "negative": "10"}, "Templates": {"positive": "80", "neutral": "15", "negative": "5"}, "UI": {"positive": "50", "neutral": "30", "negative": "20"}}, "audience_analysis": {"popular_subreddits": ["r/Notion", "r/selfhosted", "r/UI_Design", "r/todoist", "r/learnpython"], "user_segments": ["students", "small business owners", "knowledge workers", "designers", "developers"]}, "pricing_feedback": {"value_perception": {"positive": "60", "neutral": "30", "negative": "10"}, "related_comments": [{"comment": "\"Google is already free, while Notion charges per person and it's costly.\"", "upvotes": null, "post_url": null}]}, "competitor_strengths": ["intuitive interface", "powerful databases", "versatile features", "large user community", "extensive integrations"], "competitor_weaknesses": ["lack of offline access", "pricing", "some collaboration features need improvement", "not ideal for public-facing documentation"], "user_recommendations": ["improve offline functionality", "add version control", "enhance collaboration features", "consider a free tier"], "competitive_strategy": {"pricing_strategy": "freemium model", "feature_improvement": "continuously adding new features and improving existing ones"}}}
|
databases/supabase_db.py
CHANGED
|
@@ -30,7 +30,7 @@ def create_user_with_id(external_id,email):
|
|
| 30 |
return user
|
| 31 |
|
| 32 |
# create user session
|
| 33 |
-
def create_user_session(user_id: int, input_info: InputInfoModel)
|
| 34 |
"""
|
| 35 |
Creates a new user session in the database.
|
| 36 |
|
|
@@ -89,6 +89,7 @@ def update_user_session(user_session: dict,session_info:dict=None, process_info:
|
|
| 89 |
def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
|
| 90 |
try:
|
| 91 |
data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
|
|
|
|
| 92 |
except Exception as e:
|
| 93 |
print("Failed to save pain point analysis:", e)
|
| 94 |
raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
|
|
@@ -96,6 +97,22 @@ def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
|
|
| 96 |
def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
|
| 97 |
try:
|
| 98 |
data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
print("Failed to save pain point analysis:", e)
|
| 101 |
raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
|
|
|
|
| 30 |
return user
|
| 31 |
|
| 32 |
# create user session
|
| 33 |
+
def create_user_session(user_id: int, input_info: InputInfoModel):
|
| 34 |
"""
|
| 35 |
Creates a new user session in the database.
|
| 36 |
|
|
|
|
| 89 |
def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
|
| 90 |
try:
|
| 91 |
data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
|
| 92 |
+
return data[0]
|
| 93 |
except Exception as e:
|
| 94 |
print("Failed to save pain point analysis:", e)
|
| 95 |
raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
|
|
|
|
| 97 |
def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
|
| 98 |
try:
|
| 99 |
data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
|
| 100 |
+
return data[0]
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print("Failed to save pain point analysis:", e)
|
| 103 |
+
raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
|
| 104 |
+
|
| 105 |
+
def get_user_sessions(user_id: int)->list[UserSessionModel]:
|
| 106 |
+
try:
|
| 107 |
+
data= tasks = (
|
| 108 |
+
db_client
|
| 109 |
+
.table('sessions')
|
| 110 |
+
.select("*")
|
| 111 |
+
.eq('user_id', user_id)
|
| 112 |
+
.eq('is_deleted', False)
|
| 113 |
+
.execute()
|
| 114 |
+
.data
|
| 115 |
+
)
|
| 116 |
except Exception as e:
|
| 117 |
print("Failed to save pain point analysis:", e)
|
| 118 |
raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
|
models/session_model.py
CHANGED
|
@@ -31,4 +31,4 @@ class UserSessionModel(BaseModel):
|
|
| 31 |
session_info: dict
|
| 32 |
process_info: Optional[dict] = None
|
| 33 |
session_completed: Optional[bool] = False
|
| 34 |
-
|
|
|
|
| 31 |
session_info: dict
|
| 32 |
process_info: Optional[dict] = None
|
| 33 |
session_completed: Optional[bool] = False
|
| 34 |
+
is_deleted: Optional[bool] = False
|
new_pain_point_report.json
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"report_title": "Pain Point Analysis Report for Startups",
|
| 3 |
+
"date_generated": "2024-12-07",
|
| 4 |
+
"target_audience": {
|
| 5 |
+
"industry": "Startups",
|
| 6 |
+
"primary_subreddits": [
|
| 7 |
+
"startups",
|
| 8 |
+
"Entrepreneur",
|
| 9 |
+
"Startup_Ideas",
|
| 10 |
+
"BlockchainStartups",
|
| 11 |
+
"ecommerce",
|
| 12 |
+
"smallbusiness",
|
| 13 |
+
"EntrepreneurRideAlong",
|
| 14 |
+
"StartUpIndia",
|
| 15 |
+
"SaaSToolbox",
|
| 16 |
+
"marketing",
|
| 17 |
+
"DACXI",
|
| 18 |
+
"biotech"
|
| 19 |
+
],
|
| 20 |
+
"audience_demographics": {
|
| 21 |
+
"age_range": "25-45",
|
| 22 |
+
"interests": [
|
| 23 |
+
"entrepreneurship",
|
| 24 |
+
"business",
|
| 25 |
+
"technology",
|
| 26 |
+
"marketing",
|
| 27 |
+
"sales",
|
| 28 |
+
"funding",
|
| 29 |
+
"team building",
|
| 30 |
+
"AI"
|
| 31 |
+
],
|
| 32 |
+
"regions": [
|
| 33 |
+
"Global"
|
| 34 |
+
]
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"analysis_summary": {
|
| 38 |
+
"total_posts_analyzed": 12,
|
| 39 |
+
"total_comments_analyzed": 598,
|
| 40 |
+
"time_period": "2023-12-17 - 2024-12-01",
|
| 41 |
+
"key_findings": [
|
| 42 |
+
"Recurring frustration with securing funding, particularly for early-stage startups.",
|
| 43 |
+
"High demand for effective marketing and sales strategies, especially for reaching target audiences.",
|
| 44 |
+
"Negative sentiment about the challenges of team building and talent acquisition."
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"pain_points": [
|
| 48 |
+
{
|
| 49 |
+
"pain_point_id": 1,
|
| 50 |
+
"description": "Difficulty securing funding, especially for early-stage startups.",
|
| 51 |
+
"examples_from_reddit": [
|
| 52 |
+
{
|
| 53 |
+
"post_id": "1b9g0l1",
|
| 54 |
+
"post_url": "https://www.reddit.com/r/Entrepreneur/comments/1b9g0l1/what_are_the_common_struggles_of_a_small_business/",
|
| 55 |
+
"text_snippet": "Money"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"post_id": "1h46s6w",
|
| 59 |
+
"post_url": "https://www.reddit.com/r/StartUpIndia/comments/1h46s6w/starting_a_startup_in_india_key_ventures_and/",
|
| 60 |
+
"text_snippet": "Funding Gaps: Tap into angel investors, crowdfunding platforms, and state-backed funds."
|
| 61 |
+
}
|
| 62 |
+
],
|
| 63 |
+
"impact_analysis": {
|
| 64 |
+
"frequency": "High",
|
| 65 |
+
"audience_size_affected": "Large",
|
| 66 |
+
"sentiment_trend": "Negative"
|
| 67 |
+
},
|
| 68 |
+
"actionable_recommendations": [
|
| 69 |
+
"Explore alternative funding options like crowdfunding (e.g., Kickstarter, Indiegogo), angel investors (e.g., AngelList), or government grants (e.g., SBIR, STTR).",
|
| 70 |
+
"Develop a compelling business plan and pitch deck to attract investors. Utilize resources like the Sequoia Capital Pitch Deck template or the Y Combinator application advice."
|
| 71 |
+
],
|
| 72 |
+
"tools_and_technologies": [
|
| 73 |
+
"Crunchbase",
|
| 74 |
+
"Pitchbook",
|
| 75 |
+
"Gust",
|
| 76 |
+
"DocSend"
|
| 77 |
+
],
|
| 78 |
+
"methods": [
|
| 79 |
+
"Lean Startup methodology",
|
| 80 |
+
"Value Proposition Design"
|
| 81 |
+
],
|
| 82 |
+
"case_studies": [
|
| 83 |
+
"Mailchimp's bootstrapping success",
|
| 84 |
+
"Airbnb's early crowdfunding campaign"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"pain_point_id": 2,
|
| 89 |
+
"description": "Intense competition, requiring strong differentiation and effective marketing.",
|
| 90 |
+
"examples_from_reddit": [
|
| 91 |
+
{
|
| 92 |
+
"post_id": "1dwtb7l",
|
| 93 |
+
"post_url": "https://www.reddit.com/r/Entrepreneur/comments/1dwtb7l/whats_the_worse_business_to_start_in_2024/",
|
| 94 |
+
"text_snippet": "Restaurants gotta be no 1. No matter the circumstances. It’s most likely to fail."
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"post_id": "1fdig1p",
|
| 98 |
+
"post_url": "https://www.reddit.com/r/marketing/comments/1fdig1p/what_are_some_marketing_challenges_that_startups/",
|
| 99 |
+
"text_snippet": "The startup market was the new gold rush 10-15 years ago, but nowadays, I think a lot of newer startups are dealing with the oversaturation problem."
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"impact_analysis": {
|
| 103 |
+
"frequency": "High",
|
| 104 |
+
"audience_size_affected": "Large",
|
| 105 |
+
"sentiment_trend": "Negative"
|
| 106 |
+
},
|
| 107 |
+
"actionable_recommendations": [
|
| 108 |
+
"Conduct thorough market research to identify unmet needs and differentiate from competitors. Use tools like SWOT analysis and Porter's Five Forces.",
|
| 109 |
+
"Develop a strong value proposition that resonates with the target audience. Consider using the Value Proposition Canvas."
|
| 110 |
+
],
|
| 111 |
+
"tools_and_technologies": [
|
| 112 |
+
"SEMrush",
|
| 113 |
+
"Ahrefs",
|
| 114 |
+
"SimilarWeb",
|
| 115 |
+
"Brand24"
|
| 116 |
+
],
|
| 117 |
+
"methods": [
|
| 118 |
+
"Blue Ocean Strategy",
|
| 119 |
+
"Competitive Analysis Framework"
|
| 120 |
+
],
|
| 121 |
+
"case_studies": [
|
| 122 |
+
"Dollar Shave Club's disruption of the razor market",
|
| 123 |
+
"Tesla's creation of the electric vehicle market"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"pain_point_id": 3,
|
| 128 |
+
"description": "Difficulty with marketing and sales, and reaching the target audience due to lack of expertise and resources.",
|
| 129 |
+
"examples_from_reddit": [
|
| 130 |
+
{
|
| 131 |
+
"post_id": "1c63gqn",
|
| 132 |
+
"post_url": "https://www.reddit.com/r/Startup_Ideas/comments/1c63gqn/what_are_your_problems_as_a_startup/",
|
| 133 |
+
"text_snippet": "…hiring the right marketing expert is often difficult and expensive…managing the workflow…can also be overwhelming"
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"post_id": "18ktmtr",
|
| 137 |
+
"post_url": "https://www.reddit.com/r/startups/comments/18ktmtr/solo_technical_founders_when_it_came_to_sales/",
|
| 138 |
+
"text_snippet": "I like coding and building new stuff but I don't enjoy as much the marketing and sales side."
|
| 139 |
+
}
|
| 140 |
+
],
|
| 141 |
+
"impact_analysis": {
|
| 142 |
+
"frequency": "Very High",
|
| 143 |
+
"audience_size_affected": "Large",
|
| 144 |
+
"sentiment_trend": "Negative"
|
| 145 |
+
},
|
| 146 |
+
"actionable_recommendations": [
|
| 147 |
+
"Develop a clear marketing strategy with measurable goals and KPIs. Utilize frameworks like the AIDA model or the marketing mix (4Ps).",
|
| 148 |
+
"Explore cost-effective advertising channels like social media ads (e.g., Facebook Ads, Twitter Ads), influencer marketing, content marketing, or search engine optimization (SEO)."
|
| 149 |
+
],
|
| 150 |
+
"tools_and_technologies": [
|
| 151 |
+
"HubSpot",
|
| 152 |
+
"Mailchimp",
|
| 153 |
+
"Buffer",
|
| 154 |
+
"Google Analytics",
|
| 155 |
+
"Canva"
|
| 156 |
+
],
|
| 157 |
+
"methods": [
|
| 158 |
+
"Inbound marketing",
|
| 159 |
+
"Growth hacking",
|
| 160 |
+
"Sales funnels"
|
| 161 |
+
],
|
| 162 |
+
"case_studies": [
|
| 163 |
+
"HubSpot's inbound marketing success",
|
| 164 |
+
"Dropbox's referral program"
|
| 165 |
+
]
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"pain_point_id": 4,
|
| 169 |
+
"description": "Difficulty building a strong team and acquiring talent.",
|
| 170 |
+
"examples_from_reddit": [
|
| 171 |
+
{
|
| 172 |
+
"post_id": "1dr44rv",
|
| 173 |
+
"post_url": "https://www.reddit.com/r/BlockchainStartups/comments/1dr44rv/advice_needed_launching_a_blockchain_startup_in/",
|
| 174 |
+
"text_snippet": "Building a Tech Team: Look for developers with experience in blockchain technologies…"
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"post_id": "1c33eoz",
|
| 178 |
+
"post_url": "https://www.reddit.com/r/startups/comments/1c33eoz/what_has_been_your_biggest_challenge_when_growing/",
|
| 179 |
+
"text_snippet": "Getting a good team, a C level team... Still not able to and have been trying for 3 years now"
|
| 180 |
+
}
|
| 181 |
+
],
|
| 182 |
+
"impact_analysis": {
|
| 183 |
+
"frequency": "High",
|
| 184 |
+
"audience_size_affected": "Large",
|
| 185 |
+
"sentiment_trend": "Very Negative"
|
| 186 |
+
},
|
| 187 |
+
"actionable_recommendations": [
|
| 188 |
+
"Develop a clear hiring strategy and define roles and responsibilities carefully. Use tools like job scorecards and structured interviews.",
|
| 189 |
+
"Offer competitive compensation and benefits. Consider equity options and flexible work arrangements."
|
| 190 |
+
],
|
| 191 |
+
"tools_and_technologies": [
|
| 192 |
+
"LinkedIn",
|
| 193 |
+
"Indeed",
|
| 194 |
+
"Glassdoor",
|
| 195 |
+
"BambooHR"
|
| 196 |
+
],
|
| 197 |
+
"methods": [
|
| 198 |
+
"Employer branding",
|
| 199 |
+
"Employee referral programs",
|
| 200 |
+
"Culture fit assessments"
|
| 201 |
+
],
|
| 202 |
+
"case_studies": [
|
| 203 |
+
"Netflix's culture deck",
|
| 204 |
+
"Zappos' emphasis on company culture"
|
| 205 |
+
]
|
| 206 |
+
}
|
| 207 |
+
],
|
| 208 |
+
"opportunities_identified": [
|
| 209 |
+
"Focus on niche markets to reduce competition.",
|
| 210 |
+
"Highlight unique value proposition and early traction to attract investors."
|
| 211 |
+
],
|
| 212 |
+
"conclusion": {
|
| 213 |
+
"summary_of_key_findings": [
|
| 214 |
+
"Funding, competition, marketing & sales, and team building are the top challenges for startups.",
|
| 215 |
+
"Early-stage and bootstrapped startups are particularly affected."
|
| 216 |
+
],
|
| 217 |
+
"next_steps": [
|
| 218 |
+
"Prioritize product development based on identified pain points.",
|
| 219 |
+
"Develop targeted marketing strategies.",
|
| 220 |
+
"Continuously monitor Reddit and other relevant platforms for emerging trends."
|
| 221 |
+
]
|
| 222 |
+
}
|
| 223 |
+
}
|
reddit/load_env.py
CHANGED
|
@@ -36,3 +36,4 @@ reddit_username = os.getenv('REDDIT_USERNAME')
|
|
| 36 |
# ScraperANT
|
| 37 |
scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
|
| 38 |
scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')
|
|
|
|
|
|
| 36 |
# ScraperANT
|
| 37 |
scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
|
| 38 |
scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')
|
| 39 |
+
scraper_ant_api_key3 = os.getenv('SCRAPERANT_APIKEY3')
|
reddit/prompts.py
CHANGED
|
@@ -119,7 +119,25 @@ Return the response in the **JSON format** provided below, and include data for
|
|
| 119 |
Here is the required JSON format:
|
| 120 |
{{
|
| 121 |
"pain_point_analysis": {{
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
"pain_points": [
|
| 124 |
{{
|
| 125 |
"category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
|
|
@@ -142,12 +160,30 @@ Here is the required JSON format:
|
|
| 142 |
.
|
| 143 |
.
|
| 144 |
],
|
| 145 |
-
"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
]
|
| 152 |
}},
|
| 153 |
.
|
|
@@ -155,16 +191,28 @@ Here is the required JSON format:
|
|
| 155 |
.
|
| 156 |
similarly, for all remaining categories
|
| 157 |
],
|
| 158 |
-
"
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
"impact_on_product_development": [
|
| 162 |
-
"Insight for development 1",
|
| 163 |
-
"Insight for development 2",
|
| 164 |
.
|
| 165 |
.
|
| 166 |
.
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
}}
|
| 169 |
}}
|
| 170 |
}}
|
|
|
|
| 119 |
Here is the required JSON format:
|
| 120 |
{{
|
| 121 |
"pain_point_analysis": {{
|
| 122 |
+
"target_audience": {{
|
| 123 |
+
"industry": "Industry Name",
|
| 124 |
+
"primary_subreddits": [
|
| 125 |
+
"Subreddit 1",
|
| 126 |
+
"Subreddit 2",
|
| 127 |
+
.
|
| 128 |
+
.
|
| 129 |
+
.
|
| 130 |
+
],
|
| 131 |
+
}},
|
| 132 |
+
"analysis_summary": {{
|
| 133 |
+
"key_findings": [
|
| 134 |
+
"Key finding 1",
|
| 135 |
+
"Key finding 2",
|
| 136 |
+
.
|
| 137 |
+
.
|
| 138 |
+
.
|
| 139 |
+
]
|
| 140 |
+
}},
|
| 141 |
"pain_points": [
|
| 142 |
{{
|
| 143 |
"category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
|
|
|
|
| 160 |
.
|
| 161 |
.
|
| 162 |
],
|
| 163 |
+
"impact_analysis": {{
|
| 164 |
+
"frequency": "High/Medium/Low",
|
| 165 |
+
"audience_size_affected": "Large/Medium/Small",
|
| 166 |
+
}},
|
| 167 |
+
"actionable_recommendations": [
|
| 168 |
+
"Recommendation 1",
|
| 169 |
+
"Recommendation 2",
|
| 170 |
+
.
|
| 171 |
+
.
|
| 172 |
+
.
|
| 173 |
+
],
|
| 174 |
+
"methods": [
|
| 175 |
+
"Method 1",
|
| 176 |
+
"Method 2",
|
| 177 |
+
.
|
| 178 |
+
.
|
| 179 |
+
.
|
| 180 |
+
],
|
| 181 |
+
"case_studies": [
|
| 182 |
+
"Case study 1",
|
| 183 |
+
"Case study 2",
|
| 184 |
+
.
|
| 185 |
+
.
|
| 186 |
+
.
|
| 187 |
]
|
| 188 |
}},
|
| 189 |
.
|
|
|
|
| 191 |
.
|
| 192 |
similarly, for all remaining categories
|
| 193 |
],
|
| 194 |
+
"opportunities_identified": [
|
| 195 |
+
"Opportunity 1",
|
| 196 |
+
"Opportunity 2",
|
|
|
|
|
|
|
|
|
|
| 197 |
.
|
| 198 |
.
|
| 199 |
.
|
| 200 |
+
],
|
| 201 |
+
"conclusion": {{
|
| 202 |
+
"summary_of_key_findings": [
|
| 203 |
+
"Summary of Key Findings 1",
|
| 204 |
+
"Summary of Key Findings 2",
|
| 205 |
+
.
|
| 206 |
+
.
|
| 207 |
+
.
|
| 208 |
+
],
|
| 209 |
+
"next_steps": [
|
| 210 |
+
"Next Step 1",
|
| 211 |
+
"Next Step 2",
|
| 212 |
+
.
|
| 213 |
+
.
|
| 214 |
+
.
|
| 215 |
+
]
|
| 216 |
}}
|
| 217 |
}}
|
| 218 |
}}
|
reddit/reddit_competitor_analysis.py
CHANGED
|
@@ -127,8 +127,11 @@ async def getPostDataofCompetitor(fileName,user_query):
|
|
| 127 |
actual_list.append(index)
|
| 128 |
print("Fetched data for competitors")
|
| 129 |
fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
|
|
@@ -141,14 +144,14 @@ async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
|
|
| 141 |
await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
|
| 142 |
json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
|
| 143 |
c+=1
|
| 144 |
-
# if json_data does contain "details" field, then
|
| 145 |
-
if "details" in json_data.keys():
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
|
| 153 |
for file_path in fileNames:
|
| 154 |
# Check if the file exists before attempting to delete
|
|
|
|
| 127 |
actual_list.append(index)
|
| 128 |
print("Fetched data for competitors")
|
| 129 |
fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
|
| 130 |
+
if len(fileNames)!=0:
|
| 131 |
+
result=await preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
|
| 132 |
+
return result
|
| 133 |
+
else:
|
| 134 |
+
return {'details':'No data found'}
|
| 135 |
|
| 136 |
|
| 137 |
async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
|
|
|
|
| 144 |
await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
|
| 145 |
json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
|
| 146 |
c+=1
|
| 147 |
+
# if json_data does not contain "details" field, then only save the json
|
| 148 |
+
if "details" not in json_data.keys():
|
| 149 |
+
# save json_data to json file
|
| 150 |
+
with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
|
| 151 |
+
json.dump(json_data, outfile)
|
| 152 |
+
print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
|
| 153 |
+
competitors_json_data.append(json_data)
|
| 154 |
+
|
| 155 |
|
| 156 |
for file_path in fileNames:
|
| 157 |
# Check if the file exists before attempting to delete
|
reddit/reddit_functions.py
CHANGED
|
@@ -27,8 +27,9 @@ async def getRedditData(user_query, search_keywords):
|
|
| 27 |
# Step 3: Get final data
|
| 28 |
try:
|
| 29 |
print("fileNames", fileNames)
|
| 30 |
-
getFinalData(user_query=user_query, filesNames=fileNames)
|
| 31 |
-
|
|
|
|
| 32 |
except Exception as e:
|
| 33 |
print(f"Failed at getFinalData: {e}")
|
| 34 |
|
|
|
|
| 27 |
# Step 3: Get final data
|
| 28 |
try:
|
| 29 |
print("fileNames", fileNames)
|
| 30 |
+
res=getFinalData(user_query=user_query, filesNames=fileNames)
|
| 31 |
+
if res is True:
|
| 32 |
+
successful_steps.append(('getFinalData')) # Mark this step as successful
|
| 33 |
except Exception as e:
|
| 34 |
print(f"Failed at getFinalData: {e}")
|
| 35 |
|
reddit/reddit_pain_point_analysis.py
CHANGED
|
@@ -17,7 +17,7 @@ def pain_point_analysis(user_query, fileName, uniqueFileId):
|
|
| 17 |
pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
|
| 18 |
generation_config = genai.GenerationConfig(response_mime_type="application/json") # Request JSON response
|
| 19 |
model = genai.GenerativeModel(
|
| 20 |
-
model_name="gemini-1.5-pro-002" if environment=
|
| 21 |
generation_config=generation_config,
|
| 22 |
)
|
| 23 |
|
|
|
|
| 17 |
pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
|
| 18 |
generation_config = genai.GenerationConfig(response_mime_type="application/json") # Request JSON response
|
| 19 |
model = genai.GenerativeModel(
|
| 20 |
+
model_name="gemini-1.5-pro-002" if environment!="PRODUCTION" else "gemini-1.5-flash",
|
| 21 |
generation_config=generation_config,
|
| 22 |
)
|
| 23 |
|
reddit/reddit_scraper.ipynb
CHANGED
|
@@ -7379,7 +7379,9 @@
|
|
| 7379 |
"text": [
|
| 7380 |
"post_elements 29\n",
|
| 7381 |
"another_post_elements 20\n",
|
| 7382 |
-
"49\n"
|
|
|
|
|
|
|
| 7383 |
]
|
| 7384 |
}
|
| 7385 |
],
|
|
@@ -7415,13 +7417,15 @@
|
|
| 7415 |
" \n",
|
| 7416 |
" # Extract post title\n",
|
| 7417 |
" post_title = post_title_element.text.strip() if post_title_element else None\n",
|
| 7418 |
-
"
|
|
|
|
|
|
|
| 7419 |
" # Extract votes count\n",
|
| 7420 |
-
" votes_element =
|
| 7421 |
" votes_count = votes_element.text.strip() if votes_element else None\n",
|
| 7422 |
" \n",
|
| 7423 |
" # Extract comments count\n",
|
| 7424 |
-
" comments_element =
|
| 7425 |
" comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
|
| 7426 |
" \n",
|
| 7427 |
" # Append data to the list\n",
|
|
@@ -7447,12 +7451,14 @@
|
|
| 7447 |
" # Extract post title\n",
|
| 7448 |
" post_title = post_title_element.text.strip() if post_title_element else None\n",
|
| 7449 |
" \n",
|
|
|
|
|
|
|
| 7450 |
" # Extract votes count\n",
|
| 7451 |
-
" votes_element =
|
| 7452 |
" votes_count = votes_element.text.strip() if votes_element else None\n",
|
| 7453 |
" \n",
|
| 7454 |
" # Extract comments count\n",
|
| 7455 |
-
" comments_element =
|
| 7456 |
" comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
|
| 7457 |
" \n",
|
| 7458 |
" # Append data to the list\n",
|
|
@@ -7470,10 +7476,13 @@
|
|
| 7470 |
"import pandas as pd \n",
|
| 7471 |
"\n",
|
| 7472 |
"df = pd.DataFrame(post_data_list)\n",
|
| 7473 |
-
"
|
|
|
|
| 7474 |
"# Print the list of posts data\n",
|
| 7475 |
"# for idx, post_data in enumerate(post_data_list, 1):\n",
|
| 7476 |
-
"# print(f\"Post {idx}: {post_data}\")\n"
|
|
|
|
|
|
|
| 7477 |
]
|
| 7478 |
},
|
| 7479 |
{
|
|
|
|
| 7379 |
"text": [
|
| 7380 |
"post_elements 29\n",
|
| 7381 |
"another_post_elements 20\n",
|
| 7382 |
+
"49\n",
|
| 7383 |
+
"49\n",
|
| 7384 |
+
"len 49\n"
|
| 7385 |
]
|
| 7386 |
}
|
| 7387 |
],
|
|
|
|
| 7417 |
" \n",
|
| 7418 |
" # Extract post title\n",
|
| 7419 |
" post_title = post_title_element.text.strip() if post_title_element else None\n",
|
| 7420 |
+
"\n",
|
| 7421 |
+
" bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
|
| 7422 |
+
"\n",
|
| 7423 |
" # Extract votes count\n",
|
| 7424 |
+
" votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
|
| 7425 |
" votes_count = votes_element.text.strip() if votes_element else None\n",
|
| 7426 |
" \n",
|
| 7427 |
" # Extract comments count\n",
|
| 7428 |
+
" comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
|
| 7429 |
" comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
|
| 7430 |
" \n",
|
| 7431 |
" # Append data to the list\n",
|
|
|
|
| 7451 |
" # Extract post title\n",
|
| 7452 |
" post_title = post_title_element.text.strip() if post_title_element else None\n",
|
| 7453 |
" \n",
|
| 7454 |
+
" bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
|
| 7455 |
+
"\n",
|
| 7456 |
" # Extract votes count\n",
|
| 7457 |
+
" votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
|
| 7458 |
" votes_count = votes_element.text.strip() if votes_element else None\n",
|
| 7459 |
" \n",
|
| 7460 |
" # Extract comments count\n",
|
| 7461 |
+
" comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
|
| 7462 |
" comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
|
| 7463 |
" \n",
|
| 7464 |
" # Append data to the list\n",
|
|
|
|
| 7476 |
"import pandas as pd \n",
|
| 7477 |
"\n",
|
| 7478 |
"df = pd.DataFrame(post_data_list)\n",
|
| 7479 |
+
"print(len(df))\n",
|
| 7480 |
+
"# df.to_csv(\"posts_data78.csv\",index=False)\n",
|
| 7481 |
"# Print the list of posts data\n",
|
| 7482 |
"# for idx, post_data in enumerate(post_data_list, 1):\n",
|
| 7483 |
+
"# print(f\"Post {idx}: {post_data}\")\n",
|
| 7484 |
+
"df=df[df[\"comment_count\"]!=0]\n",
|
| 7485 |
+
"print(\"len\",len(df))"
|
| 7486 |
]
|
| 7487 |
},
|
| 7488 |
{
|
reddit/reddit_search_scrapper.py
CHANGED
|
@@ -19,18 +19,21 @@ def preProcessPostData(filesNames):
|
|
| 19 |
df.to_csv(i, index=False)
|
| 20 |
|
| 21 |
def getFinalData(user_query,filesNames):
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
if
|
| 28 |
-
os.
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
|
|
|
|
| 19 |
df.to_csv(i, index=False)
|
| 20 |
|
| 21 |
def getFinalData(user_query,filesNames):
|
| 22 |
+
try:
|
| 23 |
+
preProcessPostData(filesNames=filesNames)
|
| 24 |
+
# files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
|
| 25 |
+
final_df = topic_sort(path1=filesNames[0],path2= filesNames[1],path3= filesNames[2],query= user_query,)
|
| 26 |
+
for file_path in filesNames:
|
| 27 |
+
# Check if the file exists before attempting to delete
|
| 28 |
+
if os.path.exists(file_path):
|
| 29 |
+
os.remove(file_path)
|
| 30 |
+
print("File deleted successfully")
|
| 31 |
+
else:
|
| 32 |
+
print("File does not exist")
|
| 33 |
+
final_df.to_csv(filesNames[0], index=False)
|
| 34 |
+
|
| 35 |
+
print("Data saved to ",filesNames[0])
|
| 36 |
+
return True
|
| 37 |
+
except:
|
| 38 |
+
return False
|
| 39 |
|
reddit/scraping.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
Only Scraping related code.
|
| 3 |
'''
|
| 4 |
import asyncio
|
|
|
|
|
|
|
| 5 |
import asyncpraw
|
| 6 |
import json
|
| 7 |
import time
|
|
@@ -10,7 +12,7 @@ import base64
|
|
| 10 |
import re
|
| 11 |
from asyncpraw.models import Comment
|
| 12 |
from reddit.reddit_utils import topic_sort
|
| 13 |
-
from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2
|
| 14 |
import http.client
|
| 15 |
from bs4 import BeautifulSoup
|
| 16 |
|
|
@@ -47,12 +49,13 @@ def getDFofSearchPostData(htmlContent):
|
|
| 47 |
# Extract post title
|
| 48 |
post_title = post_title_element.text.strip() if post_title_element else None
|
| 49 |
|
|
|
|
| 50 |
# Extract votes count
|
| 51 |
-
votes_element =
|
| 52 |
votes_count = votes_element.text.strip() if votes_element else None
|
| 53 |
|
| 54 |
# Extract comments count
|
| 55 |
-
comments_element =
|
| 56 |
comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
|
| 57 |
|
| 58 |
# Append data to the list
|
|
@@ -77,13 +80,14 @@ def getDFofSearchPostData(htmlContent):
|
|
| 77 |
|
| 78 |
# Extract post title
|
| 79 |
post_title = post_title_element.text.strip() if post_title_element else None
|
| 80 |
-
|
|
|
|
| 81 |
# Extract votes count
|
| 82 |
-
votes_element =
|
| 83 |
votes_count = votes_element.text.strip() if votes_element else None
|
| 84 |
|
| 85 |
# Extract comments count
|
| 86 |
-
comments_element =
|
| 87 |
comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
|
| 88 |
|
| 89 |
# Append data to the list
|
|
@@ -116,10 +120,13 @@ def getHtmlContent(search_keyword,forCompetitorAnalysis=False,even=False):
|
|
| 116 |
'''
|
| 117 |
try:
|
| 118 |
base64_snippet = base64.b64encode(js_snippet.encode()).decode()
|
| 119 |
-
conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={
|
| 120 |
except:
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
else:
|
| 124 |
js_snippet = '''
|
| 125 |
window.scrollTo(0,document.body.scrollHeight);
|
|
@@ -155,11 +162,13 @@ def retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=False,tries=2,ev
|
|
| 155 |
# 1. Get Search Post Data
|
| 156 |
async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
|
| 157 |
htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
|
| 158 |
-
print("
|
| 159 |
-
htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
|
| 160 |
if htmlContent is None:
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
| 163 |
time.sleep(1)
|
| 164 |
print("reached this step")
|
| 165 |
df = getDFofSearchPostData(htmlContent)
|
|
@@ -210,52 +219,70 @@ async def process_comment(comment, reply_limit):
|
|
| 210 |
|
| 211 |
return comment_data
|
| 212 |
|
| 213 |
-
|
| 214 |
-
async def fetch_submission_comments(url, reddit,is_for_competitor_analysis):
|
| 215 |
-
|
| 216 |
-
|
| 217 |
"""
|
| 218 |
-
Fetch comments from a single Reddit submission given its URL.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
"""
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
comment_queue = list(submission.comments)
|
| 233 |
-
comment_count = 0
|
| 234 |
-
threshold = 20 if is_for_competitor_analysis else 40
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
if
|
| 241 |
-
comment_data = await process_comment(
|
| 242 |
-
comment, reply_limit=2 if is_for_competitor_analysis else 3
|
| 243 |
-
)
|
| 244 |
-
comments_data.append(comment_data)
|
| 245 |
-
comment_count += 1
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
|
|
|
|
|
|
|
|
|
| 259 |
async def getPostComments(file_name, is_for_competitor_analysis=False):
|
| 260 |
"""
|
| 261 |
Fetch comments for posts listed in the CSV file and save the processed data.
|
|
|
|
| 2 |
Only Scraping related code.
|
| 3 |
'''
|
| 4 |
import asyncio
|
| 5 |
+
import logging
|
| 6 |
+
import random
|
| 7 |
import asyncpraw
|
| 8 |
import json
|
| 9 |
import time
|
|
|
|
| 12 |
import re
|
| 13 |
from asyncpraw.models import Comment
|
| 14 |
from reddit.reddit_utils import topic_sort
|
| 15 |
+
from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2,scraper_ant_api_key3
|
| 16 |
import http.client
|
| 17 |
from bs4 import BeautifulSoup
|
| 18 |
|
|
|
|
| 49 |
# Extract post title
|
| 50 |
post_title = post_title_element.text.strip() if post_title_element else None
|
| 51 |
|
| 52 |
+
bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
|
| 53 |
# Extract votes count
|
| 54 |
+
votes_element = bottom_element.find('faceplate-number', {'pretty': True})
|
| 55 |
votes_count = votes_element.text.strip() if votes_element else None
|
| 56 |
|
| 57 |
# Extract comments count
|
| 58 |
+
comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
|
| 59 |
comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
|
| 60 |
|
| 61 |
# Append data to the list
|
|
|
|
| 80 |
|
| 81 |
# Extract post title
|
| 82 |
post_title = post_title_element.text.strip() if post_title_element else None
|
| 83 |
+
|
| 84 |
+
bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
|
| 85 |
# Extract votes count
|
| 86 |
+
votes_element = bottom_element.find('faceplate-number', {'pretty': True})
|
| 87 |
votes_count = votes_element.text.strip() if votes_element else None
|
| 88 |
|
| 89 |
# Extract comments count
|
| 90 |
+
comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
|
| 91 |
comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
|
| 92 |
|
| 93 |
# Append data to the list
|
|
|
|
| 120 |
'''
|
| 121 |
try:
|
| 122 |
base64_snippet = base64.b64encode(js_snippet.encode()).decode()
|
| 123 |
+
conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
|
| 124 |
except:
|
| 125 |
+
try:
|
| 126 |
+
base64_snippet = base64.b64encode(js_snippet.encode()).decode()
|
| 127 |
+
conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
|
| 128 |
+
except:
|
| 129 |
+
return ''
|
| 130 |
else:
|
| 131 |
js_snippet = '''
|
| 132 |
window.scrollTo(0,document.body.scrollHeight);
|
|
|
|
| 162 |
# 1. Get Search Post Data
|
| 163 |
async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
|
| 164 |
htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
|
| 165 |
+
print("htmlcontentBefore",htmlContent[:100])
|
|
|
|
| 166 |
if htmlContent is None:
|
| 167 |
+
htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
|
| 168 |
+
|
| 169 |
+
if htmlContent is None:
|
| 170 |
+
return None
|
| 171 |
+
print("htmlcontentAfter",htmlContent[:100])
|
| 172 |
time.sleep(1)
|
| 173 |
print("reached this step")
|
| 174 |
df = getDFofSearchPostData(htmlContent)
|
|
|
|
| 219 |
|
| 220 |
return comment_data
|
| 221 |
|
| 222 |
+
async def fetch_submission_comments(url, reddit, is_for_competitor_analysis, max_retries=3):
|
|
|
|
|
|
|
|
|
|
| 223 |
"""
|
| 224 |
+
Fetch comments from a single Reddit submission given its URL with retry mechanism.
|
| 225 |
+
|
| 226 |
+
Args:
|
| 227 |
+
url (str): The URL of the Reddit submission
|
| 228 |
+
reddit (Reddit): Authenticated Reddit instance
|
| 229 |
+
is_for_competitor_analysis (bool): Flag to modify comment fetching behavior
|
| 230 |
+
max_retries (int, optional): Maximum number of retry attempts. Defaults to 3.
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
dict or None: Processed comments and submission description, or None if failed
|
| 234 |
"""
|
| 235 |
+
# Configure logging
|
| 236 |
+
logger = logging.getLogger(__name__)
|
|
|
|
| 237 |
|
| 238 |
+
for attempt in range(max_retries):
|
| 239 |
+
try:
|
| 240 |
+
# Use asyncio.wait_for to add a timeout for loading the submission
|
| 241 |
+
submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
|
| 242 |
|
| 243 |
+
# Load additional submission data
|
| 244 |
+
await submission.load()
|
| 245 |
|
| 246 |
+
# Expand comments up to the specified limit
|
| 247 |
+
await submission.comments.replace_more(limit=2)
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
# Initialize variables for comment processing
|
| 250 |
+
comments_data = []
|
| 251 |
+
comment_queue = list(submission.comments)
|
| 252 |
+
comment_count = 0
|
| 253 |
+
threshold = 20 if is_for_competitor_analysis else 40
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
+
while comment_queue:
|
| 256 |
+
if comment_count >= threshold:
|
| 257 |
+
break
|
| 258 |
+
comment = comment_queue.pop(0)
|
| 259 |
+
if isinstance(comment, Comment):
|
| 260 |
+
comment_data = await process_comment(
|
| 261 |
+
comment, reply_limit=2 if is_for_competitor_analysis else 3
|
| 262 |
+
)
|
| 263 |
+
comments_data.append(comment_data)
|
| 264 |
+
comment_count += 1
|
| 265 |
|
| 266 |
+
# Return processed comments
|
| 267 |
+
return {
|
| 268 |
+
"comments": comments_data,
|
| 269 |
+
"description": submission.selftext if submission.selftext else ""
|
| 270 |
+
}
|
| 271 |
|
| 272 |
+
except asyncio.TimeoutError:
|
| 273 |
+
logger.warning(f"Timeout on attempt {attempt + 1} for URL: {url}")
|
| 274 |
+
except Exception as e:
|
| 275 |
+
logger.error(f"Error on attempt {attempt + 1} for URL {url}: {e}")
|
| 276 |
|
| 277 |
+
# Implement exponential backoff with jitter
|
| 278 |
+
if attempt < max_retries - 1:
|
| 279 |
+
wait_time = (2 ** attempt)+1
|
| 280 |
+
logger.info(f"Waiting {wait_time:.2f} seconds before retry")
|
| 281 |
+
await asyncio.sleep(wait_time)
|
| 282 |
|
| 283 |
+
# Log final failure if all retries are exhausted
|
| 284 |
+
logger.error(f"Failed to fetch comments for URL after {max_retries} attempts: {url}")
|
| 285 |
+
return None
|
| 286 |
async def getPostComments(file_name, is_for_competitor_analysis=False):
|
| 287 |
"""
|
| 288 |
Fetch comments for posts listed in the CSV file and save the processed data.
|
test.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
| 1 |
|
| 2 |
# from databases.firebase_db import get_firebase_user_from_token
|
|
|
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
# get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
|
| 6 |
|
| 7 |
# from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport
|
|
|
|
| 1 |
|
| 2 |
# from databases.firebase_db import get_firebase_user_from_token
|
| 3 |
+
from apis.reddit_apis import call_get_competitor_analysis
|
| 4 |
|
| 5 |
|
| 6 |
+
competitor_analysis_result =call_get_competitor_analysis(user_query='significant challenges facing startups in 2024',fileName='posts_data_1734025420988523.csv')
|
| 7 |
+
print("competitor_analysis_result",competitor_analysis_result)
|
| 8 |
+
|
| 9 |
# get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
|
| 10 |
|
| 11 |
# from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport
|