honey234 commited on
Commit
e1ce9ca
·
1 Parent(s): 3d00f61

fixed bugs and added session apis

Browse files
apis/reddit_apis.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import Annotated
2
  from fastapi import Depends, HTTPException, APIRouter
 
3
  from databases.firebase_db import get_firebase_user_from_token
4
  from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
5
  from models.competitor_analysis_model import CompetitorAnalysisModel
@@ -130,7 +131,7 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
130
 
131
  reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
132
 
133
- services_result = await getServices(
134
  user_id=user_session['id'],
135
  field_inputs=inputData.field_inputs,
136
  user_query=keywords['query'],
@@ -142,39 +143,61 @@ async def analyzeData(inputData:InputInfoModel,user_session:dict):
142
  'reddit_data': reddit_data_result,
143
  'services_result': services_result
144
  }
145
- update_user_session(user_session=user_session,process_info=process_info)
146
  except Exception as e:
147
  print("Failed to run analyzeData ", e)
148
  raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
151
  final_result= {}
 
152
  if "Reddit" in field_inputs:
153
  analysis_list= field_inputs['Reddit']
 
154
  # Pain point analysis only
155
  if reddit_services_names[0] in analysis_list:
156
  pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
157
  final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
158
  if "details" not in pain_point_analysis_result[0].keys():
159
- save_pain_point_analysis(data=PainPointAnalysisModel(
160
  result=pain_point_analysis_result[0],
161
  platform="Reddit",
162
  query=user_query,
163
  user_id=user_id
164
  ))
 
 
165
  # Competitor analysis only
166
  if reddit_services_names[1] in analysis_list:
167
- competitor_analysis_result =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
 
168
  print("competitor_analysis_result",competitor_analysis_result)
169
- temp=competitor_analysis_result
170
- print("temp",temp)
171
  final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
172
  'e_time': competitor_analysis_result['e_time']}}
173
- save_competitor_analysis(data=CompetitorAnalysisModel(
174
- result=competitor_analysis_result['competitors_data'],
175
  platform="Reddit",
176
  query=user_query,
177
  user_id=user_id,
178
  all_competitors=competitor_analysis_result['all_competitor_data']
179
  ))
180
- return final_result
 
 
 
1
  from typing import Annotated
2
  from fastapi import Depends, HTTPException, APIRouter
3
+ import requests
4
  from databases.firebase_db import get_firebase_user_from_token
5
  from databases.supabase_db import create_user_session, save_competitor_analysis, save_pain_point_analysis, update_user_session
6
  from models.competitor_analysis_model import CompetitorAnalysisModel
 
131
 
132
  reddit_data_result = await getRedditData(user_query=keywords['query'], search_keywords=keywords['top_3_combinations'])
133
 
134
+ services_result,session_info_result = await getServices(
135
  user_id=user_session['id'],
136
  field_inputs=inputData.field_inputs,
137
  user_query=keywords['query'],
 
143
  'reddit_data': reddit_data_result,
144
  'services_result': services_result
145
  }
146
+ update_user_session(user_session=user_session,session_info=session_info_result,process_info=process_info)
147
  except Exception as e:
148
  print("Failed to run analyzeData ", e)
149
  raise HTTPException(status_code=500, detail=str(f"Failed to run analyzeData : {e}"))
150
 
151
+ def call_get_competitor_analysis(user_query,fileName):
152
+ url = "http://127.0.0.31:7860/getCompetitorAnalysis" # Replace with your actual API URL
153
+
154
+ params = {
155
+ "user_query": user_query,
156
+ "fileName": fileName,
157
+ "isSolo": True # or False, depending on your needs
158
+ }
159
+
160
+ response = requests.get(url, params=params)
161
+
162
+ if response.status_code == 200:
163
+ print("Response:", response.json())
164
+ else:
165
+ print(f"Failed to call API. Status code: {response.status_code}, Response: {response.text}")
166
+
167
+
168
  async def getServices( user_id:int, field_inputs:dict, user_query=None, fileName=None, uniqueFileId=None):
169
  final_result= {}
170
+ session_info_result= {}
171
  if "Reddit" in field_inputs:
172
  analysis_list= field_inputs['Reddit']
173
+ session_info_result['Reddit']=[]
174
  # Pain point analysis only
175
  if reddit_services_names[0] in analysis_list:
176
  pain_point_analysis_result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
177
  final_result['Reddit'] = {'pain_point_analysis':pain_point_analysis_result[2]}
178
  if "details" not in pain_point_analysis_result[0].keys():
179
+ p_session = save_pain_point_analysis(data=PainPointAnalysisModel(
180
  result=pain_point_analysis_result[0],
181
  platform="Reddit",
182
  query=user_query,
183
  user_id=user_id
184
  ))
185
+ session_info_result['Reddit'].append({'Pain point analysis':p_session['id']})
186
+
187
  # Competitor analysis only
188
  if reddit_services_names[1] in analysis_list:
189
+ # competitor_analysis_result =await getCompetitorAnalysisData(user_query=user_query,fileName=fileName)
190
+ competitor_analysis_result =call_get_competitor_analysis(user_query=user_query,fileName=fileName)
191
  print("competitor_analysis_result",competitor_analysis_result)
 
 
192
  final_result['Reddit'] = {'competitor_analysis':{"competitors_data": len(competitor_analysis_result['competitors_data']),
193
  'e_time': competitor_analysis_result['e_time']}}
194
+ c_session=save_competitor_analysis(data=CompetitorAnalysisModel(
195
+ result=competitor_analysis_result['competitors_data'] if isinstance(competitor_analysis_result['competitors_data'], list) else [competitor_analysis_result['competitors_data']],
196
  platform="Reddit",
197
  query=user_query,
198
  user_id=user_id,
199
  all_competitors=competitor_analysis_result['all_competitor_data']
200
  ))
201
+ session_info_result['Reddit'].append({'Competitor analysis':c_session['id']})
202
+
203
+ return final_result,session_info_result
apis/user.py CHANGED
@@ -13,7 +13,7 @@ supabase_client = get_db_client()
13
 
14
  @router.get("/users/profile",response_model=UserProfileResponseModel)
15
  @time_execution
16
- async def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
17
  """
18
  Retrieve the profile information of the authenticated user.
19
 
@@ -42,4 +42,38 @@ async def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_toke
42
  multidomain_cache.update("user",user_db["id"],user_data)
43
  user_profile = UserProfileResponse(**user_data)
44
  return UserProfileResponseModel(msg="user profile",data=user_profile)
45
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  @router.get("/users/profile",response_model=UserProfileResponseModel)
15
  @time_execution
16
+ def get_user(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
17
  """
18
  Retrieve the profile information of the authenticated user.
19
 
 
42
  multidomain_cache.update("user",user_db["id"],user_data)
43
  user_profile = UserProfileResponse(**user_data)
44
  return UserProfileResponseModel(msg="user profile",data=user_profile)
45
+
46
+
47
+
48
+ @router.get("/users/sessions")
49
+ @time_execution
50
+ def get_user_sessions(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
51
+ status,user_data = multidomain_cache.get("user",user_db["id"])
52
+ if status == False:
53
+ user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
54
+ if not user_data:
55
+ raise HTTPException(status_code=500, detail=f"Use not found")
56
+
57
+ user_data = user_data[0]
58
+ # add to multidomain_cache
59
+ multidomain_cache.update("user",user_db["id"],user_data)
60
+ user_profile = UserProfileResponse(**user_data)
61
+ return UserProfileResponseModel(msg="user profile",data=user_profile)
62
+
63
+
64
+
65
+
66
+ @router.get("/users/session/{session_id}",response_model=UserProfileResponseModel)
67
+ @time_execution
68
+ def get_user_session_by_id(user_db : Annotated[dict, Depends(get_firebase_user_from_token)]) -> dict:
69
+ status,user_data = multidomain_cache.get("user",user_db["id"])
70
+ if status == False:
71
+ user_data = supabase_client.table("user_info").select("*").eq("id", user_db["id"]).eq('is_deleted', False).execute().data
72
+ if not user_data:
73
+ raise HTTPException(status_code=500, detail=f"Use not found")
74
+
75
+ user_data = user_data[0]
76
+ # add to multidomain_cache
77
+ multidomain_cache.update("user",user_db["id"],user_data)
78
+ user_profile = UserProfileResponse(**user_data)
79
+ return UserProfileResponseModel(msg="user profile",data=user_profile)
competitor_analysis_report_1734026339341401.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"competitor_analysis": {"competitor_name": "ClickUp", "overview": {"date_range": "2022-09-20 to 2024-12-02", "total_posts_analyzed": 7, "total_comments_analyzed": 112}, "market_sentiment": {"overall": {"positive": "30", "neutral": "20", "negative": "50"}, "trend_over_time": {"2022-09": {"positive": "25", "neutral": "30", "negative": "45"}, "2023-06": {"positive": "35", "neutral": "15", "negative": "50"}, "2023-07": {"positive": "20", "neutral": "30", "negative": "50"}, "2024-05": {"positive": "40", "neutral": "20", "negative": "40"}, "2024-10": {"positive": "20", "neutral": "20", "negative": "60"}, "2024-12": {"positive": "30", "neutral": "30", "negative": "40"}}}, "pain_points": {"key_insights": ["Performance issues are a major concern for users.", "Complexity and overwhelming features are frequently criticized.", "Inconsistent user experience across different features and updates is reported.", "Inadequate handling of permissions and privacy settings is mentioned."], "pain_points": [{"category": "Performance Issues", "pain_point": "Slow loading times and performance problems", "frequency": "10", "sentiment_analysis": {"positive": "10", "neutral": "5", "negative": "85"}, "related_features": ["General performance", "Loading times", "Email Functionality"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "If ClickUp worked properly, it would be a great choice. It doesn\u2019t, to the point that it because unusable for my team due to performance issues.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Investigate and address performance bottlenecks.", "Optimize database queries and server-side processing.", "Implement caching strategies to reduce load times."]}, {"category": "Complexity and User Experience", "pain_point": "Overly complex interface and feature set", "frequency": "10", "sentiment_analysis": {"positive": "20", "neutral": "20", "negative": "60"}, "related_features": ["User interface", "Feature set", "Onboarding process"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "ClickUp has way too many features. It feels like the team is struggling to keep everything running smoothly and bug-free, while also making sure the UX/UI stays clean and user-friendly.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Simplify the user interface.", "Provide better onboarding and tutorials.", "Prioritize core features and gradually introduce new ones."]}, {"category": "Permissions and Privacy", "pain_point": "Issues with permissions and accidental sharing of private information", "frequency": "5", "sentiment_analysis": {"positive": "10", "neutral": "15", "negative": "75"}, "related_features": ["Permissions", "Access control", "Data privacy"], "examples": [{"post_title": "Is ClickUp worth it?", "comment": "Plus, clickup is feature rich, and we really liked it even if it comes with complexity. The only issue we had with the complexity is regarding privacy and share configurations. Team members accidentally shared private information.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}], "recommended_actions": ["Improve permissions management.", "Enhance privacy settings.", "Provide better user education on privacy controls."]}, {"category": "Agile and Scrum Support", "pain_point": "Inadequate support for Agile methodologies", "frequency": "5", "sentiment_analysis": {"positive": "20", "neutral": "25", "negative": "55"}, "related_features": ["Sprint planning", "Agile workflows", "Scrum boards"], "examples": [{"post_title": "ClickUp for Software project management - would you recommend it?", "comment": "I would not use it for sprint planning. We have been using it for a while and it doesn't work very well. It falls short with organization and sorting. Which makes prioritizing difficult.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}], "recommended_actions": ["Enhance Agile features.", "Improve integration with Agile tools.", "Provide more training and resources on Agile workflows."]}], "overall_insights": {"top_pain_points": ["Performance Issues", "Complexity and User Experience", "Permissions and Privacy"], "user_segments_most_affected": ["Software development teams", "Marketing teams", "Agencies"], "impact_on_product_development": ["Focus on performance optimization", "Prioritize user experience improvements", "Improve permissions and data privacy controls"]}}, "features_and_differentiators": [{"feature": "Task Management", "sentiment": "mixed", "mentions": "50", "related_comments": [{"comment": "We've been using Clickup for at least 4 years now and honestly it's by far one of the best project management tools.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Clickup is great when it comes to management of tasks and breakdown each one to make life easier however I HATE the new update.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1h5144v/which_is_the_best_project_management_tool_trello/"}]}, {"feature": "Project Management", "sentiment": "mixed", "mentions": "40", "related_comments": [{"comment": "I like notion for data storage and notes. I like clickup for project management.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "If you take advantage of emails, create your own project templates, record your screen and have a lot of client assets you manage, its excellent. I run a small agency and Clickup is awesome.", "upvotes": "8", "post_url": "https://www.reddit.com/r/clickup/comments/14pm40m/does_clickup_make_sense_for/"}]}, {"feature": "Integrations", "sentiment": "positive", "mentions": "20", "related_comments": [{"comment": "The GitHub integration is very strong for us and it connects into pull requests, branches and similar with ease", "upvotes": "3", "post_url": "https://www.reddit.com/r/clickup/comments/xjiuph/clickup_for_software_project_management_would_you/"}]}], "sentiment_by_feature": {"Task Management": {"positive": "40", "neutral": "30", "negative": "30"}, "Project Management": {"positive": "30", "neutral": "40", "negative": "30"}, "Integrations": {"positive": "60", "neutral": "30", "negative": "10"}}, "audience_analysis": {"popular_subreddits": ["r/clickup", "r/projectmanagement", "r/selfhosted"], "user_segments": ["Freelancers", "Solopreneurs", "Small agencies", "Software development teams"]}, "pricing_feedback": {"value_perception": {"positive": "30", "neutral": "40", "negative": "30"}, "related_comments": [{"comment": "For the price and vast amount of tools, Clickup has definitely surpassed all my expectations.", "upvotes": "5", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}, {"comment": "Pricing is awesome.", "upvotes": "2", "post_url": "https://www.reddit.com/r/clickup/comments/1cil7cj/is_clickup_worth_it/"}]}, "competitor_strengths": ["Extensive feature set", "Wide range of integrations", "Free plan available"], "competitor_weaknesses": ["Performance issues", "Complexity", "User experience inconsistencies"], "user_recommendations": ["Improve performance", "Simplify user interface", "Enhance Agile support"], "competitive_strategy": {"pricing_strategy": "Competitive pricing with a freemium model", "feature_improvement": "Focus on improving core features and user experience", "marketing_strategy": "Target specific user segments with tailored marketing campaigns"}}}
competitor_analysis_report_1734026339341403.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"competitor_analysis": {"competitor_name": "Notion", "overview": {"date_range": "2019-11-18 to 2024-11-19", "total_posts_analyzed": 16, "total_comments_analyzed": 264}, "market_sentiment": {"overall": {"positive": "45", "neutral": "30", "negative": "25"}, "trend_over_time": {"2021-03": {"positive": "40", "neutral": "35", "negative": "25"}, "2021-06": {"positive": "50", "neutral": "30", "negative": "20"}, "2021-09": {"positive": "42", "neutral": "33", "negative": "25"}, "2021-10": {"positive": "48", "neutral": "28", "negative": "24"}, "2021-12": {"positive": "45", "neutral": "35", "negative": "20"}, "2022-10": {"positive": "40", "neutral": "40", "negative": "20"}, "2023-02": {"positive": "30", "neutral": "40", "negative": "30"}, "2023-03": {"positive": "50", "neutral": "30", "negative": "20"}, "2023-06": {"positive": "60", "neutral": "25", "negative": "15"}, "2023-09": {"positive": "55", "neutral": "30", "negative": "15"}, "2023-12": {"positive": "40", "neutral": "35", "negative": "25"}, "2024-01": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-03": {"positive": "50", "neutral": "40", "negative": "10"}, "2024-07": {"positive": "70", "neutral": "20", "negative": "10"}, "2024-09": {"positive": "60", "neutral": "30", "negative": "10"}, "2024-10": {"positive": "50", "neutral": "35", "negative": "15"}, "2024-11": {"positive": "40", "neutral": "40", "negative": "20"}}}, "pain_points": [{"category": "Offline Access", "pain_point": "Lack of offline access is a major drawback for many users.", "frequency": "8", "sentiment_analysis": {"positive": "1", "neutral": "2", "negative": "5"}, "related_features": ["offline_editing", "sync_speed"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"No offline access kills it for me. Full stop.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Implement offline capabilities", "Improve sync speed and reliability"]}, {"category": "Collaboration Features", "pain_point": "Some users find the collaboration features lacking or difficult to use.", "frequency": "6", "sentiment_analysis": {"positive": "2", "neutral": "2", "negative": "2"}, "related_features": ["real-time_collaboration", "co-editing", "commenting"], "examples": [{"post_title": "You can now try Microsoft Loop, a Notion competitor with futuristic Office documents", "comment": "\"I don't care for collaboration functions.\"", "upvotes": "216", "post_url": "https://www.reddit.com/r/Notion/comments/11zuxhi/you_can_now_try_microsoft_loop_a_notion/"}], "recommended_actions": ["Improve the user interface for collaboration features", "Add more advanced collaboration features"]}], "features_and_differentiators": [{"feature": "Databases", "sentiment": "positive", "mentions": "15", "related_comments": [{"comment": "\"Notion specializes in relational databases using blocks.\"", "upvotes": null, "post_url": null}]}, {"feature": "Templates", "sentiment": "positive", "mentions": "10", "related_comments": [{"comment": "\"Notion is still leagues ahead in terms of cross-document search, templates, databases, etc.\"", "upvotes": null, "post_url": null}]}, {"feature": "UI", "sentiment": "mixed", "mentions": "12", "related_comments": [{"comment": "\"If MS didn\u2019t copy Notions UI, the parallels between the two products would be minimal.\"", "upvotes": null, "post_url": null}]}], "sentiment_by_feature": {"Databases": {"positive": "70", "neutral": "20", "negative": "10"}, "Templates": {"positive": "80", "neutral": "15", "negative": "5"}, "UI": {"positive": "50", "neutral": "30", "negative": "20"}}, "audience_analysis": {"popular_subreddits": ["r/Notion", "r/selfhosted", "r/UI_Design", "r/todoist", "r/learnpython"], "user_segments": ["students", "small business owners", "knowledge workers", "designers", "developers"]}, "pricing_feedback": {"value_perception": {"positive": "60", "neutral": "30", "negative": "10"}, "related_comments": [{"comment": "\"Google is already free, while Notion charges per person and it's costly.\"", "upvotes": null, "post_url": null}]}, "competitor_strengths": ["intuitive interface", "powerful databases", "versatile features", "large user community", "extensive integrations"], "competitor_weaknesses": ["lack of offline access", "pricing", "some collaboration features need improvement", "not ideal for public-facing documentation"], "user_recommendations": ["improve offline functionality", "add version control", "enhance collaboration features", "consider a free tier"], "competitive_strategy": {"pricing_strategy": "freemium model", "feature_improvement": "continuously adding new features and improving existing ones"}}}
databases/supabase_db.py CHANGED
@@ -30,7 +30,7 @@ def create_user_with_id(external_id,email):
30
  return user
31
 
32
  # create user session
33
- def create_user_session(user_id: int, input_info: InputInfoModel) -> None:
34
  """
35
  Creates a new user session in the database.
36
 
@@ -89,6 +89,7 @@ def update_user_session(user_session: dict,session_info:dict=None, process_info:
89
  def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
90
  try:
91
  data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
 
92
  except Exception as e:
93
  print("Failed to save pain point analysis:", e)
94
  raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
@@ -96,6 +97,22 @@ def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
96
  def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
97
  try:
98
  data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
  print("Failed to save pain point analysis:", e)
101
  raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
 
30
  return user
31
 
32
  # create user session
33
+ def create_user_session(user_id: int, input_info: InputInfoModel):
34
  """
35
  Creates a new user session in the database.
36
 
 
89
  def save_pain_point_analysis(data:PainPointAnalysisModel)->None:
90
  try:
91
  data= db_client.table("pain_point_analysis").insert(data.model_dump()).execute().data
92
+ return data[0]
93
  except Exception as e:
94
  print("Failed to save pain point analysis:", e)
95
  raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
 
97
  def save_competitor_analysis(data:CompetitorAnalysisModel)->None:
98
  try:
99
  data= db_client.table("competitor_analysis").insert(data.model_dump()).execute().data
100
+ return data[0]
101
+ except Exception as e:
102
+ print("Failed to save pain point analysis:", e)
103
+ raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
104
+
105
+ def get_user_sessions(user_id: int)->list[UserSessionModel]:
106
+ try:
107
+ data= tasks = (
108
+ db_client
109
+ .table('sessions')
110
+ .select("*")
111
+ .eq('user_id', user_id)
112
+ .eq('is_deleted', False)
113
+ .execute()
114
+ .data
115
+ )
116
  except Exception as e:
117
  print("Failed to save pain point analysis:", e)
118
  raise HTTPException(status_code=500, detail="Failed to save pain point analysis")
models/session_model.py CHANGED
@@ -31,4 +31,4 @@ class UserSessionModel(BaseModel):
31
  session_info: dict
32
  process_info: Optional[dict] = None
33
  session_completed: Optional[bool] = False
34
-
 
31
  session_info: dict
32
  process_info: Optional[dict] = None
33
  session_completed: Optional[bool] = False
34
+ is_deleted: Optional[bool] = False
new_pain_point_report.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "report_title": "Pain Point Analysis Report for Startups",
3
+ "date_generated": "2024-12-07",
4
+ "target_audience": {
5
+ "industry": "Startups",
6
+ "primary_subreddits": [
7
+ "startups",
8
+ "Entrepreneur",
9
+ "Startup_Ideas",
10
+ "BlockchainStartups",
11
+ "ecommerce",
12
+ "smallbusiness",
13
+ "EntrepreneurRideAlong",
14
+ "StartUpIndia",
15
+ "SaaSToolbox",
16
+ "marketing",
17
+ "DACXI",
18
+ "biotech"
19
+ ],
20
+ "audience_demographics": {
21
+ "age_range": "25-45",
22
+ "interests": [
23
+ "entrepreneurship",
24
+ "business",
25
+ "technology",
26
+ "marketing",
27
+ "sales",
28
+ "funding",
29
+ "team building",
30
+ "AI"
31
+ ],
32
+ "regions": [
33
+ "Global"
34
+ ]
35
+ }
36
+ },
37
+ "analysis_summary": {
38
+ "total_posts_analyzed": 12,
39
+ "total_comments_analyzed": 598,
40
+ "time_period": "2023-12-17 - 2024-12-01",
41
+ "key_findings": [
42
+ "Recurring frustration with securing funding, particularly for early-stage startups.",
43
+ "High demand for effective marketing and sales strategies, especially for reaching target audiences.",
44
+ "Negative sentiment about the challenges of team building and talent acquisition."
45
+ ]
46
+ },
47
+ "pain_points": [
48
+ {
49
+ "pain_point_id": 1,
50
+ "description": "Difficulty securing funding, especially for early-stage startups.",
51
+ "examples_from_reddit": [
52
+ {
53
+ "post_id": "1b9g0l1",
54
+ "post_url": "https://www.reddit.com/r/Entrepreneur/comments/1b9g0l1/what_are_the_common_struggles_of_a_small_business/",
55
+ "text_snippet": "Money"
56
+ },
57
+ {
58
+ "post_id": "1h46s6w",
59
+ "post_url": "https://www.reddit.com/r/StartUpIndia/comments/1h46s6w/starting_a_startup_in_india_key_ventures_and/",
60
+ "text_snippet": "Funding Gaps: Tap into angel investors, crowdfunding platforms, and state-backed funds."
61
+ }
62
+ ],
63
+ "impact_analysis": {
64
+ "frequency": "High",
65
+ "audience_size_affected": "Large",
66
+ "sentiment_trend": "Negative"
67
+ },
68
+ "actionable_recommendations": [
69
+ "Explore alternative funding options like crowdfunding (e.g., Kickstarter, Indiegogo), angel investors (e.g., AngelList), or government grants (e.g., SBIR, STTR).",
70
+ "Develop a compelling business plan and pitch deck to attract investors. Utilize resources like the Sequoia Capital Pitch Deck template or the Y Combinator application advice."
71
+ ],
72
+ "tools_and_technologies": [
73
+ "Crunchbase",
74
+ "Pitchbook",
75
+ "Gust",
76
+ "DocSend"
77
+ ],
78
+ "methods": [
79
+ "Lean Startup methodology",
80
+ "Value Proposition Design"
81
+ ],
82
+ "case_studies": [
83
+ "Mailchimp's bootstrapping success",
84
+ "Airbnb's early crowdfunding campaign"
85
+ ]
86
+ },
87
+ {
88
+ "pain_point_id": 2,
89
+ "description": "Intense competition, requiring strong differentiation and effective marketing.",
90
+ "examples_from_reddit": [
91
+ {
92
+ "post_id": "1dwtb7l",
93
+ "post_url": "https://www.reddit.com/r/Entrepreneur/comments/1dwtb7l/whats_the_worse_business_to_start_in_2024/",
94
+ "text_snippet": "Restaurants gotta be no 1. No matter the circumstances. It’s most likely to fail."
95
+ },
96
+ {
97
+ "post_id": "1fdig1p",
98
+ "post_url": "https://www.reddit.com/r/marketing/comments/1fdig1p/what_are_some_marketing_challenges_that_startups/",
99
+ "text_snippet": "The startup market was the new gold rush 10-15 years ago, but nowadays, I think a lot of newer startups are dealing with the oversaturation problem."
100
+ }
101
+ ],
102
+ "impact_analysis": {
103
+ "frequency": "High",
104
+ "audience_size_affected": "Large",
105
+ "sentiment_trend": "Negative"
106
+ },
107
+ "actionable_recommendations": [
108
+ "Conduct thorough market research to identify unmet needs and differentiate from competitors. Use tools like SWOT analysis and Porter's Five Forces.",
109
+ "Develop a strong value proposition that resonates with the target audience. Consider using the Value Proposition Canvas."
110
+ ],
111
+ "tools_and_technologies": [
112
+ "SEMrush",
113
+ "Ahrefs",
114
+ "SimilarWeb",
115
+ "Brand24"
116
+ ],
117
+ "methods": [
118
+ "Blue Ocean Strategy",
119
+ "Competitive Analysis Framework"
120
+ ],
121
+ "case_studies": [
122
+ "Dollar Shave Club's disruption of the razor market",
123
+ "Tesla's creation of the electric vehicle market"
124
+ ]
125
+ },
126
+ {
127
+ "pain_point_id": 3,
128
+ "description": "Difficulty with marketing and sales, and reaching the target audience due to lack of expertise and resources.",
129
+ "examples_from_reddit": [
130
+ {
131
+ "post_id": "1c63gqn",
132
+ "post_url": "https://www.reddit.com/r/Startup_Ideas/comments/1c63gqn/what_are_your_problems_as_a_startup/",
133
+ "text_snippet": "…hiring the right marketing expert is often difficult and expensive…managing the workflow…can also be overwhelming"
134
+ },
135
+ {
136
+ "post_id": "18ktmtr",
137
+ "post_url": "https://www.reddit.com/r/startups/comments/18ktmtr/solo_technical_founders_when_it_came_to_sales/",
138
+ "text_snippet": "I like coding and building new stuff but I don't enjoy as much the marketing and sales side."
139
+ }
140
+ ],
141
+ "impact_analysis": {
142
+ "frequency": "Very High",
143
+ "audience_size_affected": "Large",
144
+ "sentiment_trend": "Negative"
145
+ },
146
+ "actionable_recommendations": [
147
+ "Develop a clear marketing strategy with measurable goals and KPIs. Utilize frameworks like the AIDA model or the marketing mix (4Ps).",
148
+ "Explore cost-effective advertising channels like social media ads (e.g., Facebook Ads, Twitter Ads), influencer marketing, content marketing, or search engine optimization (SEO)."
149
+ ],
150
+ "tools_and_technologies": [
151
+ "HubSpot",
152
+ "Mailchimp",
153
+ "Buffer",
154
+ "Google Analytics",
155
+ "Canva"
156
+ ],
157
+ "methods": [
158
+ "Inbound marketing",
159
+ "Growth hacking",
160
+ "Sales funnels"
161
+ ],
162
+ "case_studies": [
163
+ "HubSpot's inbound marketing success",
164
+ "Dropbox's referral program"
165
+ ]
166
+ },
167
+ {
168
+ "pain_point_id": 4,
169
+ "description": "Difficulty building a strong team and acquiring talent.",
170
+ "examples_from_reddit": [
171
+ {
172
+ "post_id": "1dr44rv",
173
+ "post_url": "https://www.reddit.com/r/BlockchainStartups/comments/1dr44rv/advice_needed_launching_a_blockchain_startup_in/",
174
+ "text_snippet": "Building a Tech Team: Look for developers with experience in blockchain technologies…"
175
+ },
176
+ {
177
+ "post_id": "1c33eoz",
178
+ "post_url": "https://www.reddit.com/r/startups/comments/1c33eoz/what_has_been_your_biggest_challenge_when_growing/",
179
+ "text_snippet": "Getting a good team, a C level team... Still not able to and have been trying for 3 years now"
180
+ }
181
+ ],
182
+ "impact_analysis": {
183
+ "frequency": "High",
184
+ "audience_size_affected": "Large",
185
+ "sentiment_trend": "Very Negative"
186
+ },
187
+ "actionable_recommendations": [
188
+ "Develop a clear hiring strategy and define roles and responsibilities carefully. Use tools like job scorecards and structured interviews.",
189
+ "Offer competitive compensation and benefits. Consider equity options and flexible work arrangements."
190
+ ],
191
+ "tools_and_technologies": [
192
+ "LinkedIn",
193
+ "Indeed",
194
+ "Glassdoor",
195
+ "BambooHR"
196
+ ],
197
+ "methods": [
198
+ "Employer branding",
199
+ "Employee referral programs",
200
+ "Culture fit assessments"
201
+ ],
202
+ "case_studies": [
203
+ "Netflix's culture deck",
204
+ "Zappos' emphasis on company culture"
205
+ ]
206
+ }
207
+ ],
208
+ "opportunities_identified": [
209
+ "Focus on niche markets to reduce competition.",
210
+ "Highlight unique value proposition and early traction to attract investors."
211
+ ],
212
+ "conclusion": {
213
+ "summary_of_key_findings": [
214
+ "Funding, competition, marketing & sales, and team building are the top challenges for startups.",
215
+ "Early-stage and bootstrapped startups are particularly affected."
216
+ ],
217
+ "next_steps": [
218
+ "Prioritize product development based on identified pain points.",
219
+ "Develop targeted marketing strategies.",
220
+ "Continuously monitor Reddit and other relevant platforms for emerging trends."
221
+ ]
222
+ }
223
+ }
reddit/load_env.py CHANGED
@@ -36,3 +36,4 @@ reddit_username = os.getenv('REDDIT_USERNAME')
36
  # ScraperANT
37
  scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
38
  scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')
 
 
36
  # ScraperANT
37
  scraper_ant_api_key = os.getenv('SCRAPERANT_APIKEY')
38
  scraper_ant_api_key2 = os.getenv('SCRAPERANT_APIKEY2')
39
+ scraper_ant_api_key3 = os.getenv('SCRAPERANT_APIKEY3')
reddit/prompts.py CHANGED
@@ -119,7 +119,25 @@ Return the response in the **JSON format** provided below, and include data for
119
  Here is the required JSON format:
120
  {{
121
  "pain_point_analysis": {{
122
- "key_insights": ["insight1", "insight2",...],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "pain_points": [
124
  {{
125
  "category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
@@ -142,12 +160,30 @@ Here is the required JSON format:
142
  .
143
  .
144
  ],
145
- "recommended_actions": [
146
- "Recommended solution/action 1",
147
- "Recommended solution/action 2",
148
- .
149
- .
150
- .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  ]
152
  }},
153
  .
@@ -155,16 +191,28 @@ Here is the required JSON format:
155
  .
156
  similarly, for all remaining categories
157
  ],
158
- "overall_insights": {{
159
- "top_pain_points": ["pain_point1", "pain_point2",...],
160
- "user_segments_most_affected": ["segment1", "segment2",...],
161
- "impact_on_product_development": [
162
- "Insight for development 1",
163
- "Insight for development 2",
164
  .
165
  .
166
  .
167
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  }}
169
  }}
170
  }}
 
119
  Here is the required JSON format:
120
  {{
121
  "pain_point_analysis": {{
122
+ "target_audience": {{
123
+ "industry": "Industry Name",
124
+ "primary_subreddits": [
125
+ "Subreddit 1",
126
+ "Subreddit 2",
127
+ .
128
+ .
129
+ .
130
+ ],
131
+ }},
132
+ "analysis_summary": {{
133
+ "key_findings": [
134
+ "Key finding 1",
135
+ "Key finding 2",
136
+ .
137
+ .
138
+ .
139
+ ]
140
+ }},
141
  "pain_points": [
142
  {{
143
  "category": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
 
160
  .
161
  .
162
  ],
163
+ "impact_analysis": {{
164
+ "frequency": "High/Medium/Low",
165
+ "audience_size_affected": "Large/Medium/Small",
166
+ }},
167
+ "actionable_recommendations": [
168
+ "Recommendation 1",
169
+ "Recommendation 2",
170
+ .
171
+ .
172
+ .
173
+ ],
174
+ "methods": [
175
+ "Method 1",
176
+ "Method 2",
177
+ .
178
+ .
179
+ .
180
+ ],
181
+ "case_studies": [
182
+ "Case study 1",
183
+ "Case study 2",
184
+ .
185
+ .
186
+ .
187
  ]
188
  }},
189
  .
 
191
  .
192
  similarly, for all remaining categories
193
  ],
194
+ "opportunities_identified": [
195
+ "Opportunity 1",
196
+ "Opportunity 2",
 
 
 
197
  .
198
  .
199
  .
200
+ ],
201
+ "conclusion": {{
202
+ "summary_of_key_findings": [
203
+ "Summary of Key Findings 1",
204
+ "Summary of Key Findings 2",
205
+ .
206
+ .
207
+ .
208
+ ],
209
+ "next_steps": [
210
+ "Next Step 1",
211
+ "Next Step 2",
212
+ .
213
+ .
214
+ .
215
+ ]
216
  }}
217
  }}
218
  }}
reddit/reddit_competitor_analysis.py CHANGED
@@ -127,8 +127,11 @@ async def getPostDataofCompetitor(fileName,user_query):
127
  actual_list.append(index)
128
  print("Fetched data for competitors")
129
  fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
130
- result=await preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
131
- return result
 
 
 
132
 
133
 
134
  async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
@@ -141,14 +144,14 @@ async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
141
  await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
142
  json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
143
  c+=1
144
- # if json_data does contain "details" field, then skip this file
145
- if "details" in json_data.keys():
146
- continue
147
- # save json_data to json file
148
- with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
149
- json.dump(json_data, outfile)
150
- print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
151
- competitors_json_data.append(json_data)
152
 
153
  for file_path in fileNames:
154
  # Check if the file exists before attempting to delete
 
127
  actual_list.append(index)
128
  print("Fetched data for competitors")
129
  fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
130
+ if len(fileNames)!=0:
131
+ result=await preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
132
+ return result
133
+ else:
134
+ return {'details':'No data found'}
135
 
136
 
137
  async def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
 
144
  await getPostComments(file_name=fileNames[i],is_for_competitor_analysis=True)
145
  json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
146
  c+=1
147
+ # if json_data does not contain "details" field, then only save the json
148
+ if "details" not in json_data.keys():
149
+ # save json_data to json file
150
+ with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
151
+ json.dump(json_data, outfile)
152
+ print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
153
+ competitors_json_data.append(json_data)
154
+
155
 
156
  for file_path in fileNames:
157
  # Check if the file exists before attempting to delete
reddit/reddit_functions.py CHANGED
@@ -27,8 +27,9 @@ async def getRedditData(user_query, search_keywords):
27
  # Step 3: Get final data
28
  try:
29
  print("fileNames", fileNames)
30
- getFinalData(user_query=user_query, filesNames=fileNames)
31
- successful_steps.append(('getFinalData',)) # Mark this step as successful
 
32
  except Exception as e:
33
  print(f"Failed at getFinalData: {e}")
34
 
 
27
  # Step 3: Get final data
28
  try:
29
  print("fileNames", fileNames)
30
+ res=getFinalData(user_query=user_query, filesNames=fileNames)
31
+ if res is True:
32
+ successful_steps.append(('getFinalData')) # Mark this step as successful
33
  except Exception as e:
34
  print(f"Failed at getFinalData: {e}")
35
 
reddit/reddit_pain_point_analysis.py CHANGED
@@ -17,7 +17,7 @@ def pain_point_analysis(user_query, fileName, uniqueFileId):
17
  pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
18
  generation_config = genai.GenerationConfig(response_mime_type="application/json") # Request JSON response
19
  model = genai.GenerativeModel(
20
- model_name="gemini-1.5-pro-002" if environment=="PRODUCTION" else "gemini-1.5-flash",
21
  generation_config=generation_config,
22
  )
23
 
 
17
  pain_point_prompt = getPainPointAnalysisPrompt(user_query=user_query)
18
  generation_config = genai.GenerationConfig(response_mime_type="application/json") # Request JSON response
19
  model = genai.GenerativeModel(
20
+ model_name="gemini-1.5-pro-002" if environment!="PRODUCTION" else "gemini-1.5-flash",
21
  generation_config=generation_config,
22
  )
23
 
reddit/reddit_scraper.ipynb CHANGED
@@ -7379,7 +7379,9 @@
7379
  "text": [
7380
  "post_elements 29\n",
7381
  "another_post_elements 20\n",
7382
- "49\n"
 
 
7383
  ]
7384
  }
7385
  ],
@@ -7415,13 +7417,15 @@
7415
  " \n",
7416
  " # Extract post title\n",
7417
  " post_title = post_title_element.text.strip() if post_title_element else None\n",
7418
- " \n",
 
 
7419
  " # Extract votes count\n",
7420
- " votes_element = post.find('faceplate-number', {'pretty': True})\n",
7421
  " votes_count = votes_element.text.strip() if votes_element else None\n",
7422
  " \n",
7423
  " # Extract comments count\n",
7424
- " comments_element = post.find_all('faceplate-number', {'pretty': True})\n",
7425
  " comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
7426
  " \n",
7427
  " # Append data to the list\n",
@@ -7447,12 +7451,14 @@
7447
  " # Extract post title\n",
7448
  " post_title = post_title_element.text.strip() if post_title_element else None\n",
7449
  " \n",
 
 
7450
  " # Extract votes count\n",
7451
- " votes_element = post.find('faceplate-number', {'pretty': True})\n",
7452
  " votes_count = votes_element.text.strip() if votes_element else None\n",
7453
  " \n",
7454
  " # Extract comments count\n",
7455
- " comments_element = post.find_all('faceplate-number', {'pretty': True})\n",
7456
  " comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
7457
  " \n",
7458
  " # Append data to the list\n",
@@ -7470,10 +7476,13 @@
7470
  "import pandas as pd \n",
7471
  "\n",
7472
  "df = pd.DataFrame(post_data_list)\n",
7473
- "df.to_csv(\"posts_data78.csv\",index=False)\n",
 
7474
  "# Print the list of posts data\n",
7475
  "# for idx, post_data in enumerate(post_data_list, 1):\n",
7476
- "# print(f\"Post {idx}: {post_data}\")\n"
 
 
7477
  ]
7478
  },
7479
  {
 
7379
  "text": [
7380
  "post_elements 29\n",
7381
  "another_post_elements 20\n",
7382
+ "49\n",
7383
+ "49\n",
7384
+ "len 49\n"
7385
  ]
7386
  }
7387
  ],
 
7417
  " \n",
7418
  " # Extract post title\n",
7419
  " post_title = post_title_element.text.strip() if post_title_element else None\n",
7420
+ "\n",
7421
+ " bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
7422
+ "\n",
7423
  " # Extract votes count\n",
7424
+ " votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
7425
  " votes_count = votes_element.text.strip() if votes_element else None\n",
7426
  " \n",
7427
  " # Extract comments count\n",
7428
+ " comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
7429
  " comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
7430
  " \n",
7431
  " # Append data to the list\n",
 
7451
  " # Extract post title\n",
7452
  " post_title = post_title_element.text.strip() if post_title_element else None\n",
7453
  " \n",
7454
+ " bottom_element = post.find('div', {'data-testid': 'search-counter-row'})\n",
7455
+ "\n",
7456
  " # Extract votes count\n",
7457
+ " votes_element = bottom_element.find('faceplate-number', {'pretty': True})\n",
7458
  " votes_count = votes_element.text.strip() if votes_element else None\n",
7459
  " \n",
7460
  " # Extract comments count\n",
7461
+ " comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})\n",
7462
  " comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None\n",
7463
  " \n",
7464
  " # Append data to the list\n",
 
7476
  "import pandas as pd \n",
7477
  "\n",
7478
  "df = pd.DataFrame(post_data_list)\n",
7479
+ "print(len(df))\n",
7480
+ "# df.to_csv(\"posts_data78.csv\",index=False)\n",
7481
  "# Print the list of posts data\n",
7482
  "# for idx, post_data in enumerate(post_data_list, 1):\n",
7483
+ "# print(f\"Post {idx}: {post_data}\")\n",
7484
+ "df=df[df[\"comment_count\"]!=0]\n",
7485
+ "print(\"len\",len(df))"
7486
  ]
7487
  },
7488
  {
reddit/reddit_search_scrapper.py CHANGED
@@ -19,18 +19,21 @@ def preProcessPostData(filesNames):
19
  df.to_csv(i, index=False)
20
 
21
  def getFinalData(user_query,filesNames):
22
- preProcessPostData(filesNames=filesNames)
23
- # files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
24
- final_df = topic_sort(path1=filesNames[0],path2= filesNames[1],path3= filesNames[2],query= user_query,)
25
- for file_path in filesNames:
26
- # Check if the file exists before attempting to delete
27
- if os.path.exists(file_path):
28
- os.remove(file_path)
29
- print("File deleted successfully")
30
- else:
31
- print("File does not exist")
32
- final_df.to_csv(filesNames[0], index=False)
33
-
34
- print("Data saved to ",filesNames[0])
35
-
 
 
 
36
 
 
19
  df.to_csv(i, index=False)
20
 
21
  def getFinalData(user_query,filesNames):
22
+ try:
23
+ preProcessPostData(filesNames=filesNames)
24
+ # files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
25
+ final_df = topic_sort(path1=filesNames[0],path2= filesNames[1],path3= filesNames[2],query= user_query,)
26
+ for file_path in filesNames:
27
+ # Check if the file exists before attempting to delete
28
+ if os.path.exists(file_path):
29
+ os.remove(file_path)
30
+ print("File deleted successfully")
31
+ else:
32
+ print("File does not exist")
33
+ final_df.to_csv(filesNames[0], index=False)
34
+
35
+ print("Data saved to ",filesNames[0])
36
+ return True
37
+ except:
38
+ return False
39
 
reddit/scraping.py CHANGED
@@ -2,6 +2,8 @@
2
  Only Scraping related code.
3
  '''
4
  import asyncio
 
 
5
  import asyncpraw
6
  import json
7
  import time
@@ -10,7 +12,7 @@ import base64
10
  import re
11
  from asyncpraw.models import Comment
12
  from reddit.reddit_utils import topic_sort
13
- from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2
14
  import http.client
15
  from bs4 import BeautifulSoup
16
 
@@ -47,12 +49,13 @@ def getDFofSearchPostData(htmlContent):
47
  # Extract post title
48
  post_title = post_title_element.text.strip() if post_title_element else None
49
 
 
50
  # Extract votes count
51
- votes_element = post.find('faceplate-number', {'pretty': True})
52
  votes_count = votes_element.text.strip() if votes_element else None
53
 
54
  # Extract comments count
55
- comments_element = post.find_all('faceplate-number', {'pretty': True})
56
  comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
57
 
58
  # Append data to the list
@@ -77,13 +80,14 @@ def getDFofSearchPostData(htmlContent):
77
 
78
  # Extract post title
79
  post_title = post_title_element.text.strip() if post_title_element else None
80
-
 
81
  # Extract votes count
82
- votes_element = post.find('faceplate-number', {'pretty': True})
83
  votes_count = votes_element.text.strip() if votes_element else None
84
 
85
  # Extract comments count
86
- comments_element = post.find_all('faceplate-number', {'pretty': True})
87
  comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
88
 
89
  # Append data to the list
@@ -116,10 +120,13 @@ def getHtmlContent(search_keyword,forCompetitorAnalysis=False,even=False):
116
  '''
117
  try:
118
  base64_snippet = base64.b64encode(js_snippet.encode()).decode()
119
- conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key if even else scraper_ant_api_key2}&js_snippet={base64_snippet}")
120
  except:
121
- base64_snippet = base64.b64encode(js_snippet.encode()).decode()
122
- conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key if even==True else scraper_ant_api_key2}&js_snippet={base64_snippet}")
 
 
 
123
  else:
124
  js_snippet = '''
125
  window.scrollTo(0,document.body.scrollHeight);
@@ -155,11 +162,13 @@ def retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=False,tries=2,ev
155
  # 1. Get Search Post Data
156
  async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
157
  htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
158
- print("htmlcontent",htmlContent[:100])
159
- htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
160
  if htmlContent is None:
161
- return None
162
- print("htmlcontent",htmlContent[:100])
 
 
 
163
  time.sleep(1)
164
  print("reached this step")
165
  df = getDFofSearchPostData(htmlContent)
@@ -210,52 +219,70 @@ async def process_comment(comment, reply_limit):
210
 
211
  return comment_data
212
 
213
-
214
- async def fetch_submission_comments(url, reddit,is_for_competitor_analysis):
215
-
216
-
217
  """
218
- Fetch comments from a single Reddit submission given its URL.
 
 
 
 
 
 
 
 
 
219
  """
220
- try:
221
- # Use asyncio.wait_for to add a timeout for loading the submission
222
- submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
223
 
224
- # Load additional submission data
225
- await submission.load()
 
 
226
 
227
- # Expand comments up to the specified limit
228
- await submission.comments.replace_more(limit=2)
229
 
230
- # Initialize variables for comment processing
231
- comments_data = []
232
- comment_queue = list(submission.comments)
233
- comment_count = 0
234
- threshold = 20 if is_for_competitor_analysis else 40
235
 
236
- while comment_queue:
237
- if comment_count >= threshold:
238
- break
239
- comment = comment_queue.pop(0)
240
- if isinstance(comment, Comment):
241
- comment_data = await process_comment(
242
- comment, reply_limit=2 if is_for_competitor_analysis else 3
243
- )
244
- comments_data.append(comment_data)
245
- comment_count += 1
246
 
247
- # Return processed comments
248
- return {"comments": comments_data,"description":submission.selftext if submission.selftext else ""}
 
 
 
 
 
 
 
 
249
 
250
- except asyncio.TimeoutError:
251
- print(f"Skipping due to timeout: {url}")
252
- except Exception as e:
253
- print(f"Skipping due to error: {url} - {e}")
 
254
 
255
- # Return None if an error occurs
256
- return None
 
 
257
 
 
 
 
 
 
258
 
 
 
 
259
  async def getPostComments(file_name, is_for_competitor_analysis=False):
260
  """
261
  Fetch comments for posts listed in the CSV file and save the processed data.
 
2
  Only Scraping related code.
3
  '''
4
  import asyncio
5
+ import logging
6
+ import random
7
  import asyncpraw
8
  import json
9
  import time
 
12
  import re
13
  from asyncpraw.models import Comment
14
  from reddit.reddit_utils import topic_sort
15
+ from reddit.load_env import reddit_client_id, reddit_client_secret,reddit_password,reddit_user_agent,reddit_username,scraper_ant_api_key,scraper_ant_api_key2,scraper_ant_api_key3
16
  import http.client
17
  from bs4 import BeautifulSoup
18
 
 
49
  # Extract post title
50
  post_title = post_title_element.text.strip() if post_title_element else None
51
 
52
+ bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
53
  # Extract votes count
54
+ votes_element = bottom_element.find('faceplate-number', {'pretty': True})
55
  votes_count = votes_element.text.strip() if votes_element else None
56
 
57
  # Extract comments count
58
+ comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
59
  comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
60
 
61
  # Append data to the list
 
80
 
81
  # Extract post title
82
  post_title = post_title_element.text.strip() if post_title_element else None
83
+
84
+ bottom_element = post.find('div', {'data-testid': 'search-counter-row'})
85
  # Extract votes count
86
+ votes_element = bottom_element.find('faceplate-number', {'pretty': True})
87
  votes_count = votes_element.text.strip() if votes_element else None
88
 
89
  # Extract comments count
90
+ comments_element = bottom_element.find_all('faceplate-number', {'pretty': True})
91
  comments_count = comments_element[1].text.strip() if len(comments_element) > 1 else None
92
 
93
  # Append data to the list
 
120
  '''
121
  try:
122
  base64_snippet = base64.b64encode(js_snippet.encode()).decode()
123
+ conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
124
  except:
125
+ try:
126
+ base64_snippet = base64.b64encode(js_snippet.encode()).decode()
127
+ conn.request("GET", f"/v2/general?url={encoded_url}&x-api-key={scraper_ant_api_key3}&js_snippet={base64_snippet}")
128
+ except:
129
+ return ''
130
  else:
131
  js_snippet = '''
132
  window.scrollTo(0,document.body.scrollHeight);
 
162
  # 1. Get Search Post Data
163
  async def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False,even=False):
164
  htmlContent = getHtmlContent(search_keyword,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
165
+ print("htmlcontentBefore",htmlContent[:100])
 
166
  if htmlContent is None:
167
+ htmlContent = retryCheck(search_keyword,htmlContent,forCompetitorAnalysis=forCompetitorAnalysis,even=even)
168
+
169
+ if htmlContent is None:
170
+ return None
171
+ print("htmlcontentAfter",htmlContent[:100])
172
  time.sleep(1)
173
  print("reached this step")
174
  df = getDFofSearchPostData(htmlContent)
 
219
 
220
  return comment_data
221
 
222
+ async def fetch_submission_comments(url, reddit, is_for_competitor_analysis, max_retries=3):
 
 
 
223
  """
224
+ Fetch comments from a single Reddit submission given its URL with retry mechanism.
225
+
226
+ Args:
227
+ url (str): The URL of the Reddit submission
228
+ reddit (Reddit): Authenticated Reddit instance
229
+ is_for_competitor_analysis (bool): Flag to modify comment fetching behavior
230
+ max_retries (int, optional): Maximum number of retry attempts. Defaults to 3.
231
+
232
+ Returns:
233
+ dict or None: Processed comments and submission description, or None if failed
234
  """
235
+ # Configure logging
236
+ logger = logging.getLogger(__name__)
 
237
 
238
+ for attempt in range(max_retries):
239
+ try:
240
+ # Use asyncio.wait_for to add a timeout for loading the submission
241
+ submission = await asyncio.wait_for(reddit.submission(url=url), timeout=30)
242
 
243
+ # Load additional submission data
244
+ await submission.load()
245
 
246
+ # Expand comments up to the specified limit
247
+ await submission.comments.replace_more(limit=2)
 
 
 
248
 
249
+ # Initialize variables for comment processing
250
+ comments_data = []
251
+ comment_queue = list(submission.comments)
252
+ comment_count = 0
253
+ threshold = 20 if is_for_competitor_analysis else 40
 
 
 
 
 
254
 
255
+ while comment_queue:
256
+ if comment_count >= threshold:
257
+ break
258
+ comment = comment_queue.pop(0)
259
+ if isinstance(comment, Comment):
260
+ comment_data = await process_comment(
261
+ comment, reply_limit=2 if is_for_competitor_analysis else 3
262
+ )
263
+ comments_data.append(comment_data)
264
+ comment_count += 1
265
 
266
+ # Return processed comments
267
+ return {
268
+ "comments": comments_data,
269
+ "description": submission.selftext if submission.selftext else ""
270
+ }
271
 
272
+ except asyncio.TimeoutError:
273
+ logger.warning(f"Timeout on attempt {attempt + 1} for URL: {url}")
274
+ except Exception as e:
275
+ logger.error(f"Error on attempt {attempt + 1} for URL {url}: {e}")
276
 
277
+ # Implement exponential backoff with jitter
278
+ if attempt < max_retries - 1:
279
+ wait_time = (2 ** attempt)+1
280
+ logger.info(f"Waiting {wait_time:.2f} seconds before retry")
281
+ await asyncio.sleep(wait_time)
282
 
283
+ # Log final failure if all retries are exhausted
284
+ logger.error(f"Failed to fetch comments for URL after {max_retries} attempts: {url}")
285
+ return None
286
  async def getPostComments(file_name, is_for_competitor_analysis=False):
287
  """
288
  Fetch comments for posts listed in the CSV file and save the processed data.
test.py CHANGED
@@ -1,7 +1,11 @@
1
 
2
  # from databases.firebase_db import get_firebase_user_from_token
 
3
 
4
 
 
 
 
5
  # get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
6
 
7
  # from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport
 
1
 
2
  # from databases.firebase_db import get_firebase_user_from_token
3
+ from apis.reddit_apis import call_get_competitor_analysis
4
 
5
 
6
+ competitor_analysis_result =call_get_competitor_analysis(user_query='significant challenges facing startups in 2024',fileName='posts_data_1734025420988523.csv')
7
+ print("competitor_analysis_result",competitor_analysis_result)
8
+
9
  # get_firebase_user_from_token(token="eyJhbGciOiJSUzI1NiIsImtpZCI6IjNmZDA3MmRmYTM4MDU2NzlmMTZmZTQxNzM4YzJhM2FkM2Y5MGIyMTQiLCJ0eXAiOiJKV1QifQ.eyJuYW1lIjoiaG9uZXkgYmFuc2FsIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FDZzhvY0tfUWpyTmtyeWhPbVd1eVkzTHZvTDN6YjcyNGstQzlaNGZnbjI1M21FdU1ndWFXbEE9czk2LWMiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vbmV4dGFuYWx5dGljcy0xM2JmYiIsImF1ZCI6Im5leHRhbmFseXRpY3MtMTNiZmIiLCJhdXRoX3RpbWUiOjE3MzMzOTY1MTQsInVzZXJfaWQiOiJIb3VvWjJOSWR5YkZZM05WbmtXRFozYlRBWjAzIiwic3ViIjoiSG91b1oyTklkeWJGWTNOVm5rV0RaM2JUQVowMyIsImlhdCI6MTczMzM5NjUxNCwiZXhwIjoxNzMzNDAwMTE0LCJlbWFpbCI6ImhvbmV5YmFuc2FsMjk2OEBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZmlyZWJhc2UiOnsiaWRlbnRpdGllcyI6eyJnb29nbGUuY29tIjpbIjExNTE0MDc4NTM4OTUzMTUxNDIyMSJdLCJlbWFpbCI6WyJob25leWJhbnNhbDI5NjhAZ21haWwuY29tIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ29vZ2xlLmNvbSJ9fQ.j15QwCVrfrF05m3Oq_Nr9WCGI4JNNtK9LTg2TkkjdQ592sDR78WyizKE-GDug1pxYEE36uPt2lARmMIid1xsH4ITwYLeCU7BoTEyHkxB8HknnvQC6VKLefxy9xFopqFwjdE90tPL2GkcwSFLw-_R5BwZ2QUOiK_8Sq48MfY08AiSwOmHgv1c1TRt4_XL0M-BvhxOGIqVappsm-x4iu75-81oiWA5eaY_HqzvruohYOMoKitVAN4NGnaxLecCE8GguByMIQ9mlc1lypqg6qGy16gYQotPEVfABCmk2bYY60OjdDXCGVUwSWO4BNSOLdSbcbiE_qRydBoSezpH262z2A")
10
 
11
  # from reddit.reddit_competitor_analysis import getCompetitorAnalysisReport