honeybansal23 commited on
Commit
c3837c5
·
1 Parent(s): 3e5b99f

added code

Browse files
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.11-slim
3
+
4
+ # Set the working directory to /app
5
+ WORKDIR /app
6
+
7
+ # Install required C++11 libraries and ca-certificates
8
+ RUN apt-get update -qq \
9
+ && apt-get install -y \
10
+ build-essential \
11
+ python3-dev \
12
+ ca-certificates \
13
+ && apt-get clean \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Copy the current directory contents into the container at /app
17
+ COPY . /app
18
+
19
+ # Create and set up a virtual environment inside the container
20
+ RUN python -m venv /app/venv \
21
+ && /app/venv/bin/pip install --no-cache-dir --upgrade pip \
22
+ && /app/venv/bin/pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Ensure the virtual environment's Python and pip are used by default
25
+ ENV PATH="/app/venv/bin:$PATH"
26
+
27
+ # Make port 8000 available to the world outside this container
28
+ EXPOSE 8000
29
+
30
+ # Run the application when the container launches
31
+ CMD ["uvicorn", "main:app", "--host", "127.0.0.23", "--port", "7860"]
venv/.gitignore ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environment
7
+ venv/
8
+ ENV/
9
+ env/
10
+ .venv/
11
+ .ENV/
12
+
13
+ # Jupyter Notebook checkpoints
14
+ .ipynb_checkpoints
15
+
16
+ # VS Code files
17
+ .vscode/
18
+
19
+ # PyCharm files
20
+ .idea/
21
+
22
+ # Distribution / Packaging
23
+ build/
24
+ dist/
25
+ Lib/
26
+ Include/
27
+ Scripts/
28
+ *.egg-info/
29
+ *.egg
30
+ *.whl
31
+
32
+ # Logs and debug files
33
+ *.log
34
+
35
+ # Test results
36
+ *.out
37
+ *.coverage
38
+ .coverage.*
39
+
40
+ # Environment variables and settings
41
+ .env
42
+ *.env
43
+
44
+ # Cache files
45
+ *.cache
46
+ .cache/
47
+ pip-log.txt
48
+ pip-delete-this-directory.txt
49
+
50
+ # macOS files
51
+ .DS_Store
52
+
53
+ # Windows files
54
+ Thumbs.db
venv/app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from models.reddit_models import RedditPostDataModel
5
+ from reddit.reddit_functions import getRedditData
6
+ from reddit.reddit_gemini import getKeywords
7
+ from reddit.api_keys import api_key,api_key2
8
+ import google.generativeai as genai
9
+
10
+ from reddit.reddit_pain_point_analysis import pain_point_analysis
11
+ from reddit.reddit_search_scrapper import getCompetitorAnalysisData
12
+ from utils import time_execution
13
+ app = FastAPI()
14
+ import asyncio
15
+ from fastapi import HTTPException, APIRouter
16
+ from asyncio import TimeoutError
17
+
18
+ # Assuming you have defined the necessary imports, e.g., config, getKeywords, api_key, api_key2
19
+
20
+ # CORS configuration
21
+ origins = [
22
+ "*",
23
+ # Add more origins as needed
24
+ ]
25
+
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=origins, # Allows specified origins
29
+ allow_credentials=False,
30
+ allow_methods=["*"], # Allows all HTTP methods
31
+ allow_headers=["*"], # Allows all headers
32
+ )
33
+ from fastapi import FastAPI
34
+ from functools import wraps
35
+
36
+ app = FastAPI()
37
+
38
+ class Config:
39
+ def __init__(self):
40
+ self.called = False
41
+ self.retry=False
42
+ self.chat_session=None
43
+ self.queue=deque([])
44
+
45
+ def configure(self, api_key):
46
+ genai.configure(api_key=api_key)
47
+
48
+
49
+ config = Config()
50
+
51
+ @app.get("/")
52
+ @time_execution
53
+ def read_root():
54
+ if not config.called:
55
+ print("called", config.called)
56
+ if config.retry:
57
+ config.configure(api_key=api_key2)
58
+ config.called = True
59
+ else:
60
+ config.configure(api_key=api_key)
61
+ config.called = True
62
+ return {"message": "Hello, World!"}
63
+
64
+ # Timeout handler: check if getKeywords takes too long
65
+ async def fetch_keywords_with_timeout(user_query: str, timeout: int = 60, retry: bool = True):
66
+ try:
67
+ # Simulate the getKeywords function with a timeout using asyncio.wait_for
68
+ keywords = await asyncio.wait_for(asyncio.to_thread(getKeywords, user_query), timeout=timeout)
69
+ return keywords
70
+ except TimeoutError:
71
+ print("Timeout exceeded, switching to api_key2")
72
+ if retry:
73
+ config.called = False
74
+ config.retry=True
75
+ # Timeout exceeded, switch to api_key2 and retry fetching keywords
76
+ read_root() # Switch API key
77
+ # Retry fetching keywords
78
+ return await fetch_keywords_with_timeout(user_query, timeout, retry=False) # Set retry to False to prevent infinite loop
79
+ else:
80
+ # If we already tried once, handle as a failure or return a fallback response
81
+ raise HTTPException(status_code=504, detail="Request timed out even after retrying")
82
+
83
+ @app.get("/keywords")
84
+ @time_execution
85
+ async def fetch_keywords(user_query: str):
86
+ if not user_query:
87
+ raise HTTPException(status_code=400, detail="User query must not be empty")
88
+
89
+ # Fetch keywords with a 10-second timeout and retry mechanism
90
+ keywords = await fetch_keywords_with_timeout(user_query=user_query)
91
+ return keywords
92
+
93
+
94
+ @app.post("/getRedditPostsData")
95
+ @time_execution
96
+ def getRedditPostsData(request: RedditPostDataModel):
97
+ """Requires user_query and search_keywords as arguments.
98
+ Steps involved in this api:
99
+ 1. get posts data from reddit
100
+ 2. filter top 18 posts
101
+ 3. get comments data
102
+ 4. get sentiment data
103
+ """
104
+ try:
105
+ # Extract user_query and search_keywords from the request body
106
+ user_query = request.user_query
107
+ search_keywords = request.search_keywords
108
+
109
+ if not user_query:
110
+ raise HTTPException(status_code=400, detail="User query must not be empty")
111
+
112
+ if not search_keywords:
113
+ raise HTTPException(status_code=400, detail="Search keywords must not be empty")
114
+ print("user_query",user_query,"search_keywords",search_keywords)
115
+ result = getRedditData(user_query=user_query, search_keywords=search_keywords)
116
+ return result
117
+ except Exception as e:
118
+ raise HTTPException(status_code=500, detail=str(f"Failed to run getRedditPostsData : {e}"))
119
+
120
+ # pain point analysis api which takes user_query and fileName as arguments
121
+ @app.get("/getPainPointAnalysis")
122
+ @time_execution
123
+ def getPainPointAnalysis(user_query: str, fileName: str, uniqueFileId: str):
124
+ try:
125
+ # Extract user_query and fileName
126
+ if not user_query:
127
+ raise HTTPException(status_code=400, detail="User query must not be empty")
128
+
129
+ if not fileName:
130
+ raise HTTPException(status_code=400, detail="fileName must not be empty")
131
+ print("user_query",user_query,"fileName",fileName)
132
+ result=pain_point_analysis(user_query=user_query,fileName=fileName,uniqueFileId=uniqueFileId)
133
+ config.chat_session= result[1]
134
+ return result[0]
135
+ except Exception as e:
136
+ raise HTTPException(status_code=500, detail=str(f"Failed to run getPainPointAnalysis : {e}"))
137
+
138
+ # pain point analysis api which takes user_query and fileName as arguments
139
+ @app.get("/getCompetitorAnalysis")
140
+ @time_execution
141
+ def getCompetitorAnalysis(user_query: str, fileName: str,isSolo=True):
142
+ try:
143
+ # Extract user_query and fileName
144
+ if not user_query:
145
+ raise HTTPException(status_code=400, detail="User query must not be empty")
146
+
147
+ if not fileName:
148
+ raise HTTPException(status_code=400, detail="fileName must not be empty")
149
+ print("user_query",user_query,"isSolo",isSolo,"fileName",fileName)
150
+ result = getCompetitorAnalysisData(user_query=user_query,isSolo=isSolo,chat_session=config.chat_session,fileName=fileName)
151
+ return result
152
+ except Exception as e:
153
+ raise HTTPException(status_code=500, detail=str(f"Failed to run getCompetitorAnalysis : {e}"))
154
+ # if __name__ == "__main__":
155
+ # import uvicorn
156
+ # uvicorn.run("main:app", host="127.0.0.23", workers=1,reload=True,port=786)
venv/models/reddit_models.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+ class RedditPostDataModel(BaseModel):
5
+ user_query: str
6
+ search_keywords: List[str]
venv/posts_data_1732244547776200.csv ADDED
The diff for this file is too large to render. See raw diff
 
venv/pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0
2
+ include-system-site-packages = false
3
+ version = 3.11.9
4
+ executable = C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe
5
+ command = C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m venv D:\development\nextAnalytics\venv
venv/reddit/api_keys.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ api_key = os.getenv('GEMINI_API_KEY')
7
+ api_key2 = os.getenv('GEMINI_SECOND_API_KEY')
8
+ api_key3 = os.getenv('GEMINI_THIRD_API_KEY')
9
+ api_key4 = os.getenv('GEMINI_FOURTH_API_KEY')
10
+ api_key5 = os.getenv('GEMINI_FIVE_API_KEY')
11
+ api_key6 = os.getenv('GEMINI_SIX_API_KEY')
12
+ api_key7 = os.getenv('GEMINI_SEVEN_API_KEY')
13
+ api_key8 = os.getenv('GEMINI_EIGHT_API_KEY')
venv/reddit/prompts.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def featureAnalysisPrompt():
2
+ return f'''
3
+ {{
4
+ "competitor": "COMPETITOR_NAME",
5
+ "user_query": "USER_QUERY",
6
+ "competitor_data_source": "DATA_SOURCE",
7
+ "overall_sentiment": {{
8
+ "positive_percentage": "PERCENTAGE",
9
+ "negative_percentage": "PERCENTAGE",
10
+ "neutral_percentage": "PERCENTAGE"
11
+ }},
12
+ "features": [
13
+ {{
14
+ "feature": "FEATURE_NAME",
15
+ "feature_description": "FEATURE_DESCRIPTION",
16
+ "sentiment_analysis": {{
17
+ "positive": "PERCENTAGE",
18
+ "negative": "PERCENTAGE",
19
+ "neutral": "PERCENTAGE"
20
+ }},
21
+ "emotion_analysis": {{
22
+ "joy": "PERCENTAGE",
23
+ "anger": "PERCENTAGE",
24
+ "disappointment": "PERCENTAGE",
25
+ "surprise": "PERCENTAGE"
26
+ }},
27
+ "user_personas": {{
28
+ "age_groups": [
29
+ {{
30
+ "age_range": "AGE_RANGE",
31
+ "positive_sentiment_count": "COUNT",
32
+ "negative_sentiment_count": "COUNT",
33
+ "neutral_sentiment_count": "COUNT"
34
+ }},
35
+ {{
36
+ "age_range": "AGE_RANGE",
37
+ "positive_sentiment_count": "COUNT",
38
+ "negative_sentiment_count": "COUNT",
39
+ "neutral_sentiment_count": "COUNT"
40
+ }}
41
+ ],
42
+ "gender": [
43
+ {{
44
+ "gender": "GENDER_TYPE",
45
+ "positive_sentiment_count": "COUNT",
46
+ "negative_sentiment_count": "COUNT",
47
+ "neutral_sentiment_count": "COUNT"
48
+ }}
49
+ ]
50
+ }},
51
+ "adoption_rate": {{
52
+ "early_adopters": "PERCENTAGE",
53
+ "mainstream_users": "PERCENTAGE",
54
+ "dissatisfied_users": "PERCENTAGE"
55
+ }},
56
+ "usage_behavior": {{
57
+ "frequency_of_use": {{
58
+ "daily": "COUNT",
59
+ "weekly": "COUNT",
60
+ "occasionally": "COUNT",
61
+ "never": "COUNT"
62
+ }},
63
+ "engagement_level": {{
64
+ "high": "COUNT",
65
+ "medium": "COUNT",
66
+ "low": "COUNT"
67
+ }}
68
+ }},
69
+ "feature_comparison": {{
70
+ "strengths": [
71
+ "STRENGTH_1",
72
+ "STRENGTH_2"
73
+ ],
74
+ "weaknesses": [
75
+ "WEAKNESS_1",
76
+ "WEAKNESS_2"
77
+ ],
78
+ "opportunities_for_improvement": [
79
+ "OPPORTUNITY_1",
80
+ "OPPORTUNITY_2"
81
+ ],
82
+ "threats": [
83
+ "THREAT_1",
84
+ "THREAT_2"
85
+ ]
86
+ }},
87
+ "pain_points": [
88
+ {{
89
+ "issue": "ISSUE_DESCRIPTION",
90
+ "impact": "ISSUE_IMPACT",
91
+ "mentions": "COUNT",
92
+ "user_sentiment": "SENTIMENT"
93
+ }}
94
+ ],
95
+ "feature_improvements_suggestions": [
96
+ {{
97
+ "suggestion": "SUGGESTION_DESCRIPTION",
98
+ "priority": "PRIORITY"
99
+ }}
100
+ ]
101
+ }}
102
+ ],
103
+ "conclusion": {{
104
+ "summary": "SUMMARY_OF_KEY_FINDINGS",
105
+ "recommendations": [
106
+ "RECOMMENDATION_1",
107
+ "RECOMMENDATION_2"
108
+ ]
109
+ }}
110
+ }}
111
+ '''
112
+
113
+ def getPainPointAnalysisPrompt(user_query):
114
+ return f"""
115
+ 1. analyze the given csv data of reddit posts with sentiments and provide a list of pain point analysis categories for the user query ={user_query}. categories in pain point analysis should be set of 2 keyword only which emphasis the abstract of pain point and also rank them based on importance and relevancy to user query
116
+ return categories title only which attracts the readers . get your data in json format {{
117
+ "pain_point_categories": [
118
+ "category1",
119
+ "category2",
120
+ "category3",
121
+ .
122
+ .
123
+ .
124
+ ]
125
+ }}
126
+ 2. For all the pain points categories that you got from first step, analyze the file_with_sentiment.csv data and peform pain point analysis on it and return the response in JSON format provided below for all categories, nothing else.
127
+ {{
128
+ "pain_point_analysis": {{
129
+ "key_insights": ["insight1", "insight2",...],
130
+ "pain_points": [
131
+ {{
132
+ "category1": "Category of Pain Point (e.g., Product Issues, Customer Service, Pricing)",
133
+ "pain_point": "Brief description of the issue (e.g., Slow Performance)",
134
+ "frequency": "number",
135
+ "sentiment_analysis": {{
136
+ "positive": "number",
137
+ "neutral": "number",
138
+ "negative": "number"
139
+ }},
140
+ "related_features": ["feature1", "feature2",...],
141
+ "examples": [
142
+ {{
143
+ "post_title": "Title of the post/comment",
144
+ "comment": "Description or excerpt of user comment",
145
+ "upvotes": "number",
146
+ "post_url": "URL of the original post/comment"
147
+ }},
148
+ .
149
+ .
150
+ .
151
+ ],
152
+ "recommended_actions": [
153
+ "Recommended solution/action 1",
154
+ "Recommended solution/action 2",
155
+ .
156
+ .
157
+ .
158
+ ]
159
+ }},
160
+ similarly for remaining categories
161
+ .
162
+ .
163
+ .
164
+ ],
165
+ "overall_insights": {{
166
+ "top_pain_points": ["pain_point1", "pain_point2",...],
167
+ "user_segments_most_affected": ["segment1", "segment2",...],
168
+ "impact_on_product_development": [
169
+ "Insight for development 1",
170
+ "Insight for development 2",
171
+ .
172
+ .
173
+ .
174
+ ]
175
+ }}
176
+ }}
177
+ }}
178
+ """
179
+
180
+
181
+ def getKeywordsPrompt(user_query):
182
+ return f"""1. Enhance the user query ="{user_query}" for better relevance to the main intent of query
183
+ get the enhanced query in JSON:
184
+ {{
185
+ "query": "enhanced_user_query"
186
+ }}.
187
+ 2. Extract keywords from the enhanced query and get them in JSON:
188
+ {{
189
+ "keywords": ["keyword1", ...]
190
+ }}.
191
+ 3. Create relevant combinations of 2-3 keywords for the enhanced query context.
192
+ 4. Get 3 keyword combinations with 95% relevancy to the original user query in JSON:
193
+ {{
194
+ "top_3_combinations": ["combination phrase 1", "combination phrase 2", "combination phrase 3"]
195
+ }}.
196
+ 5. Return the final output in JSON format:
197
+ {{
198
+ "query": "enhanced_user_query",
199
+ "keywords": ["keyword1", ...],
200
+ "top_3_combinations": ["combination phrase 1", "combination phrase 2", "combination phrase 3"]
201
+ }}.
202
+ """
203
+
204
+ def getCompetitorPrompt(user_query):
205
+ pain_point_prompt =getPainPointAnalysisPrompt(user_query = user_query)
206
+ return f'''
207
+ 1.{pain_point_prompt}
208
+ 2. Perform competitor analysis on the given csv file and return your final output in JSON format:
209
+ {{
210
+ "competitor_analysis": {{
211
+ "competitor_name": "<CompetitorName>",
212
+ "overview": {{
213
+ "date_range": "<Start_Date> to <End_Date>",
214
+ "total_posts_analyzed": "<Total_Posts>",
215
+ "total_comments_analyzed": "<Total_Comments>"
216
+ }},
217
+ "market_sentiment": {{
218
+ "overall": {{
219
+ "positive": "<Positive_Percentage>",
220
+ "neutral": "<Neutral_Percentage>",
221
+ "negative": "<Negative_Percentage>"
222
+ }},
223
+ "trend_over_time": {{
224
+ "<Year-Month>": {{
225
+ "positive": "<Positive_Percentage>",
226
+ "neutral": "<Neutral_Percentage>",
227
+ "negative": "<Negative_Percentage>"
228
+ }},
229
+ "<Year-Month>": {{
230
+ "positive": "<Positive_Percentage>",
231
+ "neutral": "<Neutral_Percentage>",
232
+ "negative": "<Negative_Percentage>"
233
+ }}
234
+ }}
235
+ }},
236
+ "pain_points": {{pain point from step 1}},
237
+ "features_and_differentiators": [
238
+ {{
239
+ "feature": "<Feature_Name>",
240
+ "sentiment": "<Sentiment_Type>",
241
+ "mentions": "<Mentions>",
242
+ "related_comments": [
243
+ {{
244
+ "comment": "<Comment_Text>",
245
+ "upvotes": "<Upvotes>",
246
+ "post_url": "<Post_URL>"
247
+ }}
248
+ .
249
+ .
250
+ .
251
+ ]
252
+ }}
253
+ .
254
+ .
255
+ .
256
+ ],
257
+ "sentiment_by_feature": {{
258
+ "<Feature_Name>": {{
259
+ "positive": "<Positive_Percentage>",
260
+ "neutral": "<Neutral_Percentage>",
261
+ "negative": "<Negative_Percentage>"
262
+ }},
263
+ "<Feature_Name>": {{
264
+ "positive": "<Positive_Percentage>",
265
+ "neutral": "<Neutral_Percentage>",
266
+ "negative": "<Negative_Percentage>"
267
+ }}
268
+ }},
269
+ "audience_analysis": {{
270
+ "popular_subreddits": [
271
+ "<Subreddit_1>",
272
+ "<Subreddit_2>"
273
+ ],
274
+ "user_segments": [
275
+ "<User_Segment_1>",
276
+ "<User_Segment_2>"
277
+ ]
278
+ }},
279
+ "pricing_feedback": {{
280
+ "value_perception": {{
281
+ "positive": "<Positive_Percentage>",
282
+ "neutral": "<Neutral_Percentage>",
283
+ "negative": "<Negative_Percentage>"
284
+ }},
285
+ "related_comments": [
286
+ {{
287
+ "comment": "<Comment_Text>",
288
+ "upvotes": "<Upvotes>",
289
+ "post_url": "<Post_URL>"
290
+ }}
291
+ ]
292
+ }},
293
+ "competitor_strengths": [
294
+ "<Strength_1>",
295
+ "<Strength_2>",
296
+ .
297
+ .
298
+ .
299
+ ],
300
+ "competitor_weaknesses": [
301
+ "<Weakness_1>",
302
+ "<Weakness_2>",
303
+ .
304
+ .
305
+ .
306
+ ],
307
+ "user_recommendations": [
308
+ "<Recommendation_1>",
309
+ "<Recommendation_2>",
310
+ .
311
+ .
312
+ .
313
+ ],
314
+ "competitive_strategy": {{
315
+ "pricing_strategy": "<Pricing_Strategy>",
316
+ "feature_improvement": "<Feature_Strategy>",
317
+ .
318
+ .
319
+ .
320
+ }}
321
+ }}
322
+ }}
323
+
324
+ '''
325
+
326
+ def getTop10CompetitorPrompt(reddit_data,gemini_data,user_query):
327
+ return f'''
328
+ Competitor names from reddit are:
329
+ {reddit_data}
330
+
331
+ Competitor name from gemini are:
332
+ {gemini_data}
333
+
334
+ both are for the user query: "{user_query}"
335
+
336
+ get me 10 most relevant competitors for the given user query from both the data and return a combined list in following json format, nothing else and try not to include very general competitors into the list which are not directly related to user query, be specific according to the user query. competitor1 ,2,..10 should be the details given in the json data:
337
+ {{
338
+ list:[competitor1,competitor2,....competitor10]
339
+ }}
340
+ '''
venv/reddit/reddit_call.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reddit Data Scrapper
2
+ from reddit_utils import get_microseconds_list
3
+ from reddit_pain_point_analysis import pain_point_analysis
4
+ from reddit_sentiment_analysis import SentimentAnalysis
5
+ from reddit_gemini import getKeywords
6
+ from reddit_search_scrapper import getCompetitorAnalysisData, getFinalData
7
+ import google.generativeai as genai
8
+ from scraping import driver, getPostComments, getSearchPostData
9
+ from api_keys import api_key
10
+ genai.configure(api_key=api_key)
11
+
12
+ user_query = "AI image"
13
+
14
+ def redditScrapper(user_query):
15
+ search_keywords=getKeywords(user_query=user_query)
16
+ # unique_list = get_microseconds_list()
17
+ # for i in range(len(search_keywords["top_3_combinations"])):
18
+ # getSearchPostData( search_keyword=search_keywords['top_3_combinations'][i],index=unique_list[i])
19
+
20
+ # getFinalData(user_query=user_query)
21
+ # getPostComments()
22
+ sentiment_instance= SentimentAnalysis()
23
+ sentiment_instance.generate_sentiment_and_emotion_from_data(fileName='posts_data_1732105228633815.csv')
24
+ # sentiment_data.to_csv("file_with_sentiment.csv", index=False)
25
+ # # Specify the file path
26
+ # file_path = 'posts_data.csv'
27
+
28
+ # # Check if the file exists before attempting to delete
29
+ # if os.path.exists(file_path):
30
+ # os.remove(file_path)
31
+ # print("File deleted successfully")
32
+ # else:
33
+ # print("File does not exist")
34
+ # pain_point_analysis(user_query = "AI image generation techniques and applications", fileName="posts_data.csv")
35
+ getCompetitorAnalysisData(user_query="social media analytics tools and strategies")
36
+
37
+ driver.quit()
38
+
39
+
40
+ # Call the function
41
+ redditScrapper(user_query)
42
+
venv/reddit/reddit_community_post_scraper.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import praw
2
+
3
+ reddit = praw.Reddit(
4
+ client_id="yjGfys3QZPpdCpNZl25Kig",
5
+ client_secret="dqoc8LrQBQhB_IgjV-lKyrD9lBPftg",
6
+ password="&honeyB90",
7
+ user_agent="Curious",
8
+ username="Final-Difference7055",
9
+ )
10
+
11
+ subRed = reddit.subreddit("SkincareAddiction")
12
+ data=[]
13
+ for posts in subRed.hot(limit=25):
14
+ data.append([posts.title,posts.selftext,f"https://www.reddit.com/r/{subRed}/comments/{posts.id}"])
15
+
16
+ def process_comment(comment):
17
+ # Prepare the comment data
18
+ comment_data = {
19
+ "user": comment.author.name if comment.author else "Unknown",
20
+ "comment": comment.body,
21
+ "replies": [] # Initialize replies list
22
+ }
23
+
24
+ # Process replies recursively if any
25
+ if comment.replies:
26
+ for reply in comment.replies:
27
+ reply_data = process_comment(reply) # Recursive call for replies
28
+ comment_data["replies"].append(reply_data)
29
+
30
+ return comment_data
31
+ for url in range(len(data)):
32
+ # comment_data_sub=[]
33
+ submission = reddit.submission(url=data[url][2])
34
+
35
+ # Fetch and process comments
36
+ submission.comments.replace_more(limit=2) # Use limit=0 to get all comments
37
+ comments_data = []
38
+
39
+ # Function to process a comment and its replies
40
+ # Seed with top-level comments
41
+ comment_queue = list(submission.comments)
42
+
43
+ while comment_queue:
44
+ comment = comment_queue.pop(0)
45
+ comment_data = process_comment(comment) # Process each comment
46
+ comments_data.append(comment_data)
47
+
48
+ # Now, structure the data into the desired JSON format
49
+ json_output = {
50
+ "comments": comments_data
51
+ }
52
+ data[url].append(json_output)
53
+
54
+ new_data=[]
55
+ for i in range(len(data)):
56
+ new_data.append([data[i][0],data[i][1],data[i][3]])
57
+ import pandas as pd
58
+
59
+
60
+ # Convert the list to a DataFrame and specify column names
61
+ df = pd.DataFrame(new_data, columns=["Title", "Description", "Comments"])
62
+
63
+ # Save the DataFrame as a CSV file
64
+ df.to_csv('output.csv', index=False)
65
+
66
+ print("Data saved to output.csv")
venv/reddit/reddit_competitor_analysis.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import google.generativeai as genai
4
+
5
+ from reddit.prompts import getCompetitorPrompt, getTop10CompetitorPrompt
6
+ from reddit.reddit_utils import get_microseconds_list
7
+ from reddit.scraping import getPostComments, getSearchPostData
8
+ from reddit.reddit_gemini import getModelAndGenerationConfigCommon
9
+ from reddit.api_keys import api_key3,api_key4,api_key5,api_key6,api_key7,api_key8
10
+
11
+ def getCompetitorNames(user_query ): #model, generation_config
12
+ prompt=f"""Extract a list of product names, alternatives, and competitors relevant to the query: {user_query}. Ensure that the results focus on tools, platforms, or services explicitly aligned with the domain and purpose of the query. Avoid including general or loosely related products unless they directly offer features tailored to the query's intent.
13
+
14
+ Additionally, provide the platform(s) (e.g., web, apps, integrations) on which each competitor operates and categorize their functionality. Include a frequency count indicating the number of times each entry is mentioned or relevant to the query. Also, aggregate the total frequency of each platform across all entries. Also give provide popularity score for each competitor out of 100.
15
+ give top 6 competitors details only.
16
+ return in given json format only:
17
+ {{
18
+ "competitors":[{{"name":"","platform":[],"category":"","count":number,"popularity":number}}],
19
+ "platforms":[{{"platform":name,count:number}}]
20
+ }}"""
21
+ generation_config = {
22
+ "temperature": 1,
23
+ "top_p": 0.95,
24
+ "top_k": 40,
25
+ "max_output_tokens": 8192,
26
+ "response_mime_type": "application/json",
27
+ }
28
+
29
+ model = genai.GenerativeModel(
30
+ model_name="gemini-1.5-pro-002",
31
+ generation_config=generation_config,
32
+ )
33
+ try:
34
+ response = model.generate_content(prompt)
35
+ data = response.text
36
+ print("getCompetitorNames",data)
37
+ return json.loads(data)
38
+ except:
39
+ try:
40
+ # retry
41
+ response = model.generate_content(prompt)
42
+ data = response.text
43
+ print("retry getCompetitorNames",data)
44
+ return json.loads(data)
45
+ except Exception as e:
46
+ return {"details": str(e)}
47
+
48
+
49
+ def getCompetitorNamesFromReddit(user_query,fileName,isSolo=True,last_chat_session=None ): #model, generation_config
50
+ prompt=f"""Extract a list of product names, alternatives, and competitors relevant to the query: {user_query} from the given csv data. Ensure that the results focus on tools, platforms, or services explicitly aligned with the domain and purpose of the query and do not include very general competitors into the list which are not directly related to user query use case or intent.
51
+
52
+ Additionally, provide the platform(s) (e.g., web, apps, integrations) on which each competitor operates and categorize their functionality.Category should be string. Include a frequency count indicating the number of times each entry is mentioned or relevant to the query. Also, aggregate the total frequency of each platform across all entries. Also give provide popularity score for each competitor out of 100.
53
+ give top 6 competitors details only.
54
+ return in given json format only:
55
+ {{
56
+ "competitors":[{{"name":"","platform":[],"category":"","count":number,"popularity":number}}],
57
+ "platforms":[{{"platform":name,count:number}}]
58
+ }}"""
59
+ chat_session=None
60
+ if isSolo:
61
+ data=getModelAndGenerationConfigCommon(fileName=fileName,isFlash=False)
62
+ model = data[0]
63
+ chat_session = model.start_chat(
64
+ history=[
65
+ {
66
+ "role": "user",
67
+ "parts": [
68
+ data[1],
69
+ prompt
70
+ ],
71
+ }
72
+ ]
73
+ )
74
+ else:
75
+ chat_session = last_chat_session
76
+ try:
77
+ response = chat_session.send_message("give your last response of competitor names")
78
+
79
+ data = response.text
80
+ print("getCompetitorNames",data)
81
+ return json.loads(data)
82
+ except:
83
+ try:
84
+ # retry
85
+ response = chat_session.send_message("give your last response of competitor names")
86
+ data = response.text
87
+ print("retry getCompetitorNames",data)
88
+ return json.loads(data)
89
+ except Exception as e:
90
+ return {"details": str(e)}
91
+
92
+
93
+ # def top10competitor(reddit_data,gemini_data):
94
+ # # ensure path 1 is reddit and path 2 is gemini
95
+ # d1 = reddit_data
96
+ # d2 = gemini_data
97
+ # popularity = {}
98
+ # count = {}
99
+ # for x in d1['competitors']:count[x['name']] = x['count']
100
+ # for x in d2['competitors']:
101
+ # if x['name'] in count.keys(): continue
102
+ # else:popularity[x['name']] = x['popularity']
103
+ # ma = sum(count.values())/len(count.values())
104
+ # mb = sum(popularity.values())/len(popularity.values())
105
+ # df = pd.DataFrame(d1['competitors'])
106
+ # df['extracted_by'] = 'reddit'
107
+ # df2 = pd.DataFrame(d2['competitors'])
108
+ # df2['extracted_by'] = 'gemini'
109
+ # df2['count'] = df2['popularity']/(mb/ma)
110
+ # df = pd.concat([df, df2], axis=0)
111
+ # df = df.drop_duplicates('name')
112
+ # df = df.reset_index(drop=True)
113
+ # df = df.sort_values(by ='count', ascending=False)
114
+ # df = df.reset_index(drop=True)
115
+ # df = df.head(10)
116
+ # df = df.drop('extracted_by', axis=1)
117
+ # return df
118
+
119
+ def getTop10Competitors(user_query,reddit_data,gemini_data):
120
+ prompt = getTop10CompetitorPrompt(user_query=user_query,reddit_data=reddit_data,gemini_data=gemini_data)
121
+ model = genai.GenerativeModel("gemini-exp-1114")
122
+
123
+ generation_config = genai.GenerationConfig(response_mime_type="application/json")
124
+
125
+ try:
126
+ response = model.generate_content(prompt, generation_config=generation_config) # Adjust if the library supports async
127
+ data = response.text
128
+ print("getTop10Competitors:", data)
129
+ return json.loads(data)
130
+ except Exception as e:
131
+ print("Error while fetching getTop10Competitors: %s", e)
132
+
133
+ def getPostDataofCompetitor(fileName,user_query):
134
+ df = fileName
135
+ unique_list = get_microseconds_list(length=len(df))
136
+ actual_list=[]
137
+ for i in range(len(df)):
138
+ index=getSearchPostData(forCompetitorAnalysis=True,search_keyword=f"{df.iloc[i]['name']} {df.iloc[i]['category']}",name=df.iloc[i]['name'] ,index=unique_list[i])
139
+ if index is not None:
140
+ actual_list.append(index)
141
+ print("actual_list",actual_list)
142
+ print("Fetched data for competitors")
143
+ fileNames = [f"posts_data_{actual_list[i]}.csv" for i in range(len(actual_list))]
144
+ return preprocessingCompetitorsData(user_query=user_query,fileNames=fileNames,fileUniqueIds=actual_list)
145
+
146
+
147
+ def preprocessingCompetitorsData(user_query,fileNames,fileUniqueIds):
148
+ c=0
149
+ competitors_json_data = []
150
+ for i in range(len(fileUniqueIds)):
151
+ if c==6:break
152
+ print(f"Processing file {fileNames[i]}")
153
+ # get posts comments data
154
+ getPostComments(fileName=fileNames[i])
155
+ json_data = getCompetitorAnalysisReport(user_query=user_query,fileName=fileNames[i],count=c)
156
+ c+=1
157
+ # if json_data does contain "details" field, then skip this file
158
+ if "details" in json_data.keys():
159
+ continue
160
+ # save json_data to json file
161
+ with open(f"competitor_analysis_report_{fileUniqueIds[i]}.json", "w") as outfile:
162
+ json.dump(json_data, outfile)
163
+ print("Competitor Analysis Report",f"competitor_analysis_report_{fileUniqueIds[i]}.json")
164
+ competitors_json_data.append(json_data)
165
+
166
+ for file_path in fileNames:
167
+ # Check if the file exists before attempting to delete
168
+ if os.path.exists(file_path):
169
+ os.remove(file_path)
170
+ print("File deleted successfully")
171
+ else:
172
+ print("File does not exist")
173
+ return competitors_json_data
174
+
175
+ def getCompetitorAnalysisReport(user_query,fileName,count=0):
176
+ prompt = getCompetitorPrompt(user_query=user_query)
177
+ api_key_map = {
178
+ 0: api_key3,
179
+ 1: api_key4,
180
+ 2: api_key5,
181
+ 3: api_key6,
182
+ 4: api_key7,
183
+ 5: api_key8
184
+ }
185
+
186
+ selected_api_key = api_key_map.get(count, api_key8) # Default to api_key8 if count > 5
187
+ genai.configure(api_key=selected_api_key)
188
+ data = getModelAndGenerationConfigCommon(fileName=fileName,isFlash=False)
189
+ model = data[0]
190
+ chat_session = model.start_chat(
191
+ history=[
192
+ {
193
+ "role": "user",
194
+ "parts": [
195
+ data[1],
196
+ prompt
197
+ ],
198
+ }
199
+ ]
200
+ )
201
+
202
+
203
+ try:
204
+ response = chat_session.send_message("give your last response of competitor analysis")
205
+ data = response.text
206
+ print("getCompetitorNames",data)
207
+ return json.loads(data)
208
+ except:
209
+ try:
210
+ # retry
211
+ response = chat_session.send_message("give your last response of competitor analysis")
212
+ data = response.text
213
+ print("retry getCompetitorNames",data)
214
+ return json.loads(data)
215
+ except Exception as e:
216
+ print("competitor analysis error",str(e))
217
+ return {"details": str(e)}
venv/reddit/reddit_functions.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from reddit.reddit_search_scrapper import getFinalData
3
+ from reddit.reddit_sentiment_analysis import SentimentAnalysis
4
+ from reddit.reddit_utils import get_microseconds_list
5
+ from reddit.scraping import getPostComments, getSearchPostData
6
+
7
+
8
+ def getRedditData(user_query, search_keywords):
9
+ unique_list = get_microseconds_list()
10
+
11
+ successful_steps = []
12
+
13
+ # Step 1: Get search post data
14
+ for i in range(len(search_keywords)):
15
+ try:
16
+ getSearchPostData(search_keyword=search_keywords[i], index=unique_list[i])
17
+ successful_steps.append(('getSearchPostData', i)) # Mark this step as successful
18
+ except Exception as e:
19
+ print(f"Failed at getSearchPostData for keyword {search_keywords[i]}: {e}")
20
+
21
+ # Step 2: Generate file names
22
+ fileNames = [f"posts_data_{unique_list[i]}.csv" for i in range(len(unique_list))]
23
+
24
+ # Step 3: Get final data
25
+ try:
26
+ getFinalData(user_query=user_query, filesNames=fileNames)
27
+ successful_steps.append(('getFinalData',)) # Mark this step as successful
28
+ except Exception as e:
29
+ print(f"Failed at getFinalData: {e}")
30
+
31
+ # Step 4: Get post comments
32
+ try:
33
+ getPostComments(fileName=fileNames[0])
34
+ successful_steps.append(('getPostComments',)) # Mark this step as successful
35
+ except Exception as e:
36
+ print(f"Failed at getPostComments: {e}")
37
+ # Step 5: Get sentiment of post comments
38
+ try:
39
+ sentiment_instance = SentimentAnalysis()
40
+ sentiment_instance.generate_sentiment_and_emotion_from_data(fileName=fileNames[0])
41
+ successful_steps.append(('getPostSentiment',)) # Mark this step as successful
42
+ except Exception as e:
43
+ print(f"Failed at getPostSentiment: {e}")
44
+ # Optionally, return the successful steps for logging or further processing
45
+ return {
46
+ "fileName":fileNames[0],
47
+ "fileUniqueId":str(unique_list[0]),
48
+ "successful_steps": successful_steps
49
+ }
venv/reddit/reddit_gemini.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import time
4
+ import google.generativeai as genai
5
+
6
+ from reddit.prompts import getKeywordsPrompt
7
+
8
+ def getKeywords(user_query: str):
9
+ prompt = getKeywordsPrompt(user_query)
10
+ model = genai.GenerativeModel("gemini-exp-1114")
11
+
12
+ generation_config = genai.GenerationConfig(response_mime_type="application/json")
13
+
14
+ try:
15
+ response = model.generate_content(prompt, generation_config=generation_config) # Adjust if the library supports async
16
+ data = response.text
17
+ logging.info("Enhanced user query: %s", data)
18
+ return json.loads(data)
19
+ except Exception as e:
20
+ logging.error("Error while fetching keywords: %s", e)
21
+ # raise HTTPException(status_code=500, detail="Error processing request")
22
+
23
+
24
+ def upload_to_gemini(path, mime_type=None):
25
+ file = genai.upload_file(path, mime_type=mime_type)
26
+ print(f"Uploaded file '{file.display_name}' as: {file.uri}")
27
+ return file
28
+
29
+ def wait_for_files_active(files):
30
+ """Waits for the given files to be active.
31
+
32
+ Some files uploaded to the Gemini API need to be processed before they can be
33
+ used as prompt inputs. The status can be seen by querying the file's "state"
34
+ field.
35
+
36
+ This implementation uses a simple blocking polling loop. Production code
37
+ should probably employ a more sophisticated approach.
38
+ """
39
+ print("Waiting for file processing...")
40
+ for name in (file.name for file in files):
41
+ file = genai.get_file(name)
42
+ while file.state.name == "PROCESSING":
43
+ print(".", end="", flush=True)
44
+ time.sleep(10)
45
+ file = genai.get_file(name)
46
+ print("file.state.name", file.state.name)
47
+ if file.state.name != "ACTIVE":
48
+ raise Exception(f"File {file.name} failed to process")
49
+ print("...all files ready")
50
+ print()
51
+ def getModelAndGenerationConfigCommon(fileName, isFlash=True):
52
+ generation_config = {
53
+ "temperature": 1,
54
+ "top_p": 0.95,
55
+ "top_k": 40,
56
+ "max_output_tokens": 8192,
57
+ "response_mime_type": "application/json",
58
+ }
59
+
60
+ model = genai.GenerativeModel(
61
+ model_name="gemini-1.5-flash" if isFlash else "gemini-1.5-pro-002",
62
+ generation_config=generation_config,
63
+ )
64
+ files = [
65
+ upload_to_gemini(fileName, mime_type="text/csv"),
66
+ ]
67
+
68
+ # Some files have a processing delay. Wait for them to be ready.
69
+ wait_for_files_active(files)
70
+ return [model,files[0]]
venv/reddit/reddit_pain_point_analysis.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import google.generativeai as genai
3
+
4
+ from reddit.prompts import getPainPointAnalysisPrompt
5
+ from reddit.reddit_gemini import upload_to_gemini, wait_for_files_active
6
+
7
+
8
+ def pain_point_analysis(user_query,fileName,uniqueFileId):
9
+ pain_point_prompt = getPainPointAnalysisPrompt(user_query = user_query)
10
+ generation_config = genai.GenerationConfig(response_mime_type="application/json") # Request JSON response
11
+ model = genai.GenerativeModel(
12
+ model_name="gemini-1.5-pro-002",
13
+ generation_config=generation_config,
14
+ )
15
+
16
+ files = [
17
+ upload_to_gemini(fileName, mime_type="text/csv"),
18
+ ]
19
+ # Some files have a processing delay. Wait for them to be ready.
20
+ wait_for_files_active(files)
21
+ chat_session = model.start_chat(
22
+ )
23
+
24
+ chat_session.history = [
25
+ {
26
+ "role": "user",
27
+ "parts": [
28
+ files[0],
29
+ pain_point_prompt
30
+ ]
31
+ }
32
+ ]
33
+ response = chat_session.send_message("give your pain point analysis output json as it is.")
34
+ data = response.text
35
+ try:
36
+ print("pain point analysis output",data)
37
+ json_data = json.loads(data)
38
+ with open(f'pain_point_analysis_{uniqueFileId}.json', 'w') as json_file:
39
+ json.dump(json_data, json_file, indent=4)
40
+ return [json_data,chat_session]
41
+ except:
42
+ try:
43
+ response = chat_session.send_message("give your pain point analysis output json as it is.")
44
+ print("retried pain point analysis output",data)
45
+ json_data = json.loads(data)
46
+ with open(f'pain_point_analysis_{uniqueFileId}.json', 'w') as json_file:
47
+ json.dump(json_data, json_file, indent=4)
48
+ return [json_data,chat_session]
49
+ except:
50
+ json_data = {"details": "something went wrong"}
51
+ return [json_data,chat_session]
venv/reddit/reddit_search_scrapper.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Reddit data pre processing code only.
3
+ '''
4
+
5
+ import os
6
+ import pandas as pd
7
+ from reddit.reddit_competitor_analysis import getCompetitorNames, getCompetitorNamesFromReddit, getPostDataofCompetitor, getTop10Competitors
8
+ from reddit.reddit_utils import topic_sort
9
+
10
+ def preProcessPostData(filesNames):
11
+ for i in filesNames:
12
+ df=pd.read_csv(i)
13
+ df.drop_duplicates(subset=["title"],inplace=True)
14
+ # drop rows with comment_count =0
15
+ df=df[df["comment_count"]!=0]
16
+ # drop index column
17
+ df.drop(columns=["index"], inplace=True)
18
+ indexes=[i for i in range(len(df))]
19
+ # insert index column
20
+ df.insert(0, "index", indexes)
21
+ df.to_csv(i, index=False)
22
+
23
+ def getFinalData(user_query,filesNames):
24
+ preProcessPostData(filesNames=filesNames)
25
+ # files_name=["posts_data_0.csv","posts_data_1.csv","posts_data_2.csv"]
26
+ final_df = topic_sort(filesNames[0], filesNames[1], filesNames[2], user_query)
27
+ for file_path in filesNames:
28
+ # Check if the file exists before attempting to delete
29
+ if os.path.exists(file_path):
30
+ os.remove(file_path)
31
+ print("File deleted successfully")
32
+ else:
33
+ print("File does not exist")
34
+ final_df.to_csv(filesNames[0], index=False)
35
+
36
+ print("Data saved to ",filesNames[0])
37
+
38
+
39
+ def getCompetitorAnalysisData(user_query,fileName,isSolo=True,chat_session=None):
40
+ json_data_gemini = getCompetitorNames(user_query=user_query)
41
+ # with open('competitors_names_gemini.json', 'w') as json_file:
42
+ # json.dump(json_data, json_file, indent=4)
43
+ json_data_reddit = getCompetitorNamesFromReddit(user_query=user_query,isSolo=isSolo,last_chat_session=chat_session,fileName=fileName)
44
+ # with open('competitors_names_reddit.json', 'w') as json_file:
45
+ # json.dump(json_data, json_file, indent=4)
46
+ data = getTop10Competitors(gemini_data=json_data_gemini,reddit_data=json_data_reddit,user_query=user_query)
47
+ df = pd.DataFrame(data["list"])
48
+ # print("Data saved to competitors_names.csv")
49
+ competitors_data = getPostDataofCompetitor(user_query=user_query,fileName=df)
50
+
51
+ return {
52
+ "competitors_data": competitors_data,
53
+ "all_competitor_data": data["list"]
54
+ }
55
+
venv/reddit/reddit_sentiment_analysis.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import ast
3
+ from transformers import BertTokenizer, BertForSequenceClassification
4
+ from transformers import pipeline
5
+ output=pd.DataFrame()
6
+ class SentimentAnalysis:
7
+ def __init__(self):
8
+ self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
9
+ self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
10
+ self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
11
+ def process_comment(self,comment):
12
+
13
+ sentence=[comment['comment'][:512]]
14
+ model_outputs = self.classifier(sentence)
15
+ # Prepare the comment data
16
+ comment_data = {
17
+ "comment": comment['comment'],
18
+ 'emotion':model_outputs[0][:3],
19
+ "replies": [] # Initialize replies list
20
+ }
21
+
22
+ # Process replies recursively if any
23
+ if comment['replies']:
24
+ for reply in comment['replies']:
25
+
26
+ reply_data = self.process_comment(reply) # Recursive call for replies
27
+ comment_data["replies"].append(reply_data)
28
+
29
+ return comment_data
30
+ def generate_sentiment_and_emotion_from_data(self,fileName):
31
+ df = pd.read_csv(fileName)
32
+ comments_data=[]
33
+ for i in range(df.shape[0]):
34
+ row=df.iloc[i]
35
+ commentary=(ast.literal_eval(row['comments']))
36
+ commentary=commentary['comments']
37
+ while commentary:
38
+ comment = commentary.pop(0)
39
+ comment_data = self.process_comment(comment)
40
+ comments_data.append(comment_data)
41
+ json_output = {
42
+ "comments": comments_data
43
+ }
44
+ subset_data = df.iloc[i].copy()
45
+
46
+ # Modify the subset
47
+ subset_data['comments'] = json_output
48
+
49
+ # Assign back if needed
50
+ df.iloc[i] = subset_data
51
+ df.to_csv(fileName, index=False)
52
+ print("Sentiment Data saved to",fileName)
53
+
venv/reddit/reddit_utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ def get_microseconds_list(length=3):
7
+ # Get the current time in microseconds
8
+ microseconds = int(time.time() * 1_000_000)
9
+
10
+ # Create a list with three microseconds
11
+ return [microseconds + i for i in range(length)]
12
+
13
+
14
+ def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
15
+ sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
16
+ if isForCompetitorAnalysis==True:
17
+ df=pd.read_csv(path1)
18
+ else:
19
+ df0 = pd.read_csv(path1)
20
+ df1 = pd.read_csv(path2)
21
+ df2 = pd.read_csv(path3)
22
+ df = pd.concat([df0, df1, df2],axis=0)
23
+ df = df.drop_duplicates("title")
24
+ df = df.reset_index(drop=True)
25
+ df = df.drop("index", axis = 1)
26
+ title = df["title"]
27
+ sentences = [query] + list(title)
28
+ embeddings = sentence_model.encode(sentences)
29
+ similarities = sentence_model.similarity(embeddings[0], embeddings)
30
+ print(len(similarities[0]))
31
+ df["similarity"] = similarities[0][1:]
32
+ df = df.sort_values(by='similarity', ascending=False)
33
+ df = df.reset_index(drop=True)
34
+ df = df.head(30)
35
+ df = df.sort_values(by=['comment_count','votes_count'], ascending=False)
36
+ df = df.reset_index(drop=True)
37
+ df = df.head(18)
38
+ return df
venv/reddit/scraping.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Only Scraping related code.
3
+ '''
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.action_chains import ActionChains
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.firefox.options import Options as FirefoxOptions
8
+ import time
9
+ from fake_headers import Headers
10
+ import pandas as pd
11
+ import praw
12
+ import re
13
+
14
+ from reddit.reddit_utils import topic_sort
15
+ # # Set up WebDriver
16
+ header = Headers().generate()["User-Agent"]
17
+ proxy=None
18
+ browser_option = FirefoxOptions()
19
+ browser_option.add_argument("--no-sandbox")
20
+ browser_option.add_argument("--disable-dev-shm-usage")
21
+ browser_option.add_argument("--ignore-certificate-errors")
22
+ browser_option.add_argument("--disable-gpu")
23
+ browser_option.add_argument("--log-level=3")
24
+ browser_option.add_argument("--disable-notifications")
25
+ browser_option.add_argument("--disable-popup-blocking")
26
+ browser_option.add_argument("--user-agent={}".format(header))
27
+ if proxy is not None:
28
+ browser_option.add_argument("--proxy-server=%s" % proxy)
29
+
30
+ # For Hiding Browser
31
+ browser_option.add_argument("--headless")
32
+
33
+ driver = webdriver.Firefox(options=browser_option)
34
+ actions = ActionChains(driver)
35
+ reddit = praw.Reddit(
36
+ client_id="yjGfys3QZPpdCpNZl25Kig",
37
+ client_secret="dqoc8LrQBQhB_IgjV-lKyrD9lBPftg",
38
+ password="&honeyB90",
39
+ user_agent="Curious",
40
+ username="Final-Difference7055",
41
+ )
42
+
43
+
44
+ def extract_post_id(url):
45
+ # Use regex to match the post ID pattern
46
+ match = re.search(r'/comments/([^/]+)/', url)
47
+ if match:
48
+ return match.group(1)
49
+ return None
50
+ # 1. Get Search Post Data
51
+ def getSearchPostData( search_keyword,index, name="",forCompetitorAnalysis=False):
52
+ # Navigate to the search results page
53
+
54
+ url = f'https://www.reddit.com/search/?q={search_keyword}'
55
+ driver.get(url)
56
+ time.sleep(3)
57
+ print("reached this step")
58
+
59
+ # Scroll and extract data
60
+ posts_data = [
61
+ # {
62
+ # "index":0,
63
+ # "title":"",
64
+ # "description":"",
65
+ # "comment_count":0,
66
+ # "votes_count":0,
67
+ # "url":"",
68
+ # "time":""
69
+ # }
70
+ ]
71
+ list_length=0 # posts count
72
+ # Scroll down and wait for content to load
73
+ try:
74
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
75
+ time.sleep(5)
76
+ if forCompetitorAnalysis:
77
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
78
+ time.sleep(5)
79
+ # Find post cards
80
+ post_cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid="post-title-text"]')
81
+ post_cards_1 = driver.find_elements(By.CSS_SELECTOR, 'div[data-testid="search-counter-row"]')
82
+ post_cards_2 = driver.find_elements(By.CSS_SELECTOR, 'faceplate-timeago')
83
+ idx=list_length
84
+ # Loop through each post card and extract required information
85
+ for card in post_cards_1:
86
+ try:
87
+ # Extract votes count
88
+ votes_count = card.find_element(By.XPATH, './/faceplate-number').text
89
+
90
+ # Extract the comments count, checking for both "comment" and "comments"
91
+ comments_count = card.find_element(By.XPATH,
92
+ './/span[contains(text(), "comment") or contains(text(), "comments")]/preceding-sibling::faceplate-number'
93
+ ).text
94
+ posts_data.append({
95
+ "index":idx,
96
+ "comment_count":comments_count,
97
+ "votes_count":votes_count
98
+ })
99
+ idx+=1
100
+ except Exception as e:
101
+ print("Error in post_card_1:", e)
102
+
103
+
104
+ idx=list_length
105
+ for card in post_cards:
106
+ try:
107
+ url=card.get_attribute("href")
108
+ title=card.text
109
+ posts_data[idx]["title"]=title
110
+ posts_data[idx]["url"]=url
111
+ idx+=1
112
+ except Exception as e:
113
+ print("Error in post_cards:", e)
114
+ idx=list_length
115
+
116
+ for card in post_cards_2:
117
+ try:
118
+ time_element = card.find_element(By.XPATH,'./time')
119
+ post_time=time_element.get_attribute('datetime')
120
+ posts_data[idx]["time"]=post_time
121
+ idx+=1
122
+ except Exception as e:
123
+ print("Error in post_cards_2:", e)
124
+ except Exception as e:
125
+ print("Error in scrolling:", e)
126
+
127
+ df = pd.DataFrame(posts_data)
128
+ if name!="":
129
+ df = df.drop_duplicates("title")
130
+ df = df[df["title"].str.contains(name)]
131
+ if len(df) >=6:
132
+ df.to_csv(f'posts_data_{index}.csv', index=False)
133
+ if len(df)>18:
134
+ getFinalDataOfCompetitor(user_query=search_keyword,fileName=f'posts_data_{index}.csv')
135
+ print(f"Data saved to posts_data_{index}.csv")
136
+ return index
137
+ else:
138
+ df.to_csv(f'posts_data_{index}.csv', index=False)
139
+ print(f"Data saved to posts_data_{index}.csv")
140
+ def getFinalDataOfCompetitor(user_query,fileName):
141
+ final_df = topic_sort(path1=fileName,isForCompetitorAnalysis=True,query= user_query)
142
+ final_df.to_csv(fileName, index=False)
143
+ print("getFinalDataOfCompetitor Data saved to ",fileName)
144
+ def getSearchPostDescription(url):
145
+ id=extract_post_id(url)
146
+ # Navigate to the search results page
147
+ driver.get(url)
148
+ time.sleep(0.5)
149
+
150
+ # Scroll and extract data
151
+ description=""
152
+ try:
153
+ post_data = driver.find_element(By.CSS_SELECTOR, f'div[id="t3_{id}-post-rtjson-content"]')
154
+ description = post_data.find_element(By.XPATH, './p').text
155
+ except:
156
+ description=""
157
+ return description
158
+
159
+
160
+
161
+
162
+ def process_comment(comment,reply_limit):
163
+ # Prepare the comment data
164
+ comment_data = {
165
+ "user": comment.author.name if comment.author else "Unknown",
166
+ "comment": comment.body,
167
+ "replies": [] # Initialize replies list
168
+ }
169
+
170
+ # Process replies recursively if any
171
+ if comment.replies:
172
+ for reply in comment.replies:
173
+ if reply_limit==0:
174
+ break
175
+ reply_data = process_comment(reply,reply_limit=reply_limit-1) # Recursive call for replies
176
+ comment_data["replies"].append(reply_data)
177
+
178
+ return comment_data
179
+
180
+ # 3. get post comments data
181
+ def getPostComments(fileName,isForCompetitorAnalysis=False):
182
+ data= pd.DataFrame(pd.read_csv(fileName))
183
+ data["comments"]=""
184
+ for i in range(len(data)):
185
+ # comment_data_sub=[]
186
+ try:
187
+ submission = reddit.submission(url=data.iloc[i]['url'])
188
+
189
+ # Fetch and process comments
190
+ submission.comments.replace_more(limit=2) # Use limit=0 to get all comments
191
+ except Exception as e:
192
+ print("skipping due to error",data.iloc[i]['url'], e)
193
+ continue
194
+ comments_data = []
195
+
196
+ # Function to process a comment and its replies
197
+ # Seed with top-level comments
198
+ comment_queue = list(submission.comments)
199
+ comment_count=0
200
+ threshold=20 if isForCompetitorAnalysis else 40
201
+ while comment_queue:
202
+ if(comment_count>=threshold):
203
+ break
204
+ comment = comment_queue.pop(0)
205
+ comment_data = process_comment(comment,reply_limit=2 if isForCompetitorAnalysis else 3) # Process each comment
206
+ comments_data.append(comment_data)
207
+ comment_count+=1
208
+ # Now, structure the data into the desired JSON format
209
+ json_output = {
210
+ "comments": comments_data
211
+ }
212
+ subset_data = data.iloc[i].copy()
213
+
214
+ # Modify the subset
215
+ subset_data['comments'] = json_output
216
+
217
+ # Assign back if needed
218
+ data.iloc[i] = subset_data
219
+ # Remove rows where 'comments' is an empty string
220
+ data = data[data['comments'] != ""]
221
+ data["descriptions"] = data["url"].apply(getSearchPostDescription)
222
+ data.to_csv(fileName, index=False)
223
+ print("Data saved to",fileName)
venv/test.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # import pandas as pd
2
+ # # import json
3
+ # # import ast
4
+ # # # Load the CSV file
5
+ # # df = pd.read_csv('file_with_sentiment.csv')
6
+
7
+ # # # Convert the 'comments' column to a list
8
+ # # comments_list = []
9
+ # # for i in df['descriptions']:
10
+ # # # json_data= ast.literal_eval(i)
11
+ # # comments_list.append(i)
12
+
13
+ # # # print("comments_list",len())
14
+ # # # Recursive function to count non-empty replies
15
+ # # def count_non_empty_replies(comments):
16
+ # # count = 1
17
+ # # for comment in comments:
18
+ # # if comment.get("replies"):
19
+ # # count += 1 # Increment for this non-empty replies list
20
+ # # count += count_non_empty_replies(comment["replies"]) # Recursively count nested replies
21
+ # # return count
22
+
23
+ # # # Example usage
24
+ # # # total_non_empty_replies = count_non_empty_replies(comments_list[0]['comments'][0]['replies'])
25
+ # # # print("Total non-empty replies:", total_non_empty_replies)
26
+
27
+ # # # Sample data structure
28
+ # # # comments_list = [
29
+ # # # {
30
+ # # # "comments": [
31
+ # # # # Each post contains a list of comments with nested replies
32
+ # # # ]
33
+ # # # }
34
+ # # # # More comments_list here
35
+ # # # ]
36
+
37
+ # # # Recursive function to limit replies in a comment tree
38
+ # # def limit_replies(comment, reply_limit=3):
39
+ # # limited_comment = {
40
+ # # "user": comment["user"],
41
+ # # "comment": comment["comment"],
42
+ # # "replies": []
43
+ # # }
44
+ # # if reply_limit == 0:
45
+ # # return limited_comment
46
+
47
+ # # # Get up to reply_limit replies, recursively applying the function
48
+ # # if "replies" in comment:
49
+ # # for reply in comment["replies"][:reply_limit-1]:
50
+ # # limited_comment["replies"].append(limit_replies(reply, reply_limit-2))
51
+
52
+ # # return limited_comment
53
+
54
+ # # # Function to process each post, extracting 10 comments with limited replies
55
+ # # def get_limited_comments(comments_list, comment_limit=10, reply_limit=7):
56
+ # # limited_comments_list = []
57
+
58
+ # # for post in comments_list:
59
+ # # limited_post = {"comments": []}
60
+
61
+ # # # Get up to comment_limit comments for each post
62
+ # # for comment in post["comments"][:comment_limit]:
63
+ # # limited_comment = limit_replies(comment, reply_limit)
64
+ # # limited_post["comments"].append(limited_comment)
65
+
66
+ # # limited_comments_list.append(limited_post)
67
+
68
+ # # return limited_comments_list
69
+
70
+ # # Example usage
71
+ # # limited_comments_data = get_limited_comments(comments_list)
72
+ # # total_non_empty_replies = count_non_empty_replies(limited_comments_data[0]['comments'][0]['replies'])
73
+
74
+
75
+ # # Save the list to a JSON file
76
+ # # with open('comments2.json', 'w') as json_file:
77
+ # # json.dump(comments_list, json_file,indent=4)
78
+
79
+ # # # from reddit.scraping import getPostComments
80
+
81
+
82
+ # # # getPostComments(fileName="posts_data_1732105228633815.csv")
83
+ # # import time
84
+
85
+ # # from reddit.reddit_sentiment_analysis import SentimentAnalysis
86
+
87
+ # # # Create an instance of the SentimentAnalysis class
88
+ # # sentiment_instance = SentimentAnalysis()
89
+
90
+ # # # Record the start time
91
+ # # start_time = time.time()
92
+
93
+ # # # Call the method to generate sentiment and emotion
94
+ # # sentiment_instance.generate_sentiment_and_emotion_from_data(fileName='posts_data.csv')
95
+
96
+ # # # Record the end time
97
+ # # end_time = time.time()
98
+
99
+ # # # Calculate and print the processing time
100
+ # # process_time = end_time - start_time
101
+ # # print(f"Processing time: {process_time:.2f} seconds")
102
+ # # from reddit.reddit_pain_point_analysis import pain_point_analysis
103
+
104
+
105
+ # # pain_point_analysis(user_query="artificial intelligence applications in skincare and cosmetic industry",fileName="file_with_sentiment.csv")
106
+
107
+ # # import google.generativeai as genai
108
+ # # genai.configure(api_key='AIzaSyBtHE4Bg2ERWsKeGLxGPOSmtZeWRD6nNr0')
109
+ # # model = genai.GenerativeModel("gemini-1.5-flash")
110
+
111
+ # # generation_config = genai.GenerationConfig(response_mime_type="application/json")
112
+ # # response = model.generate_content("skin care ai ", generation_config=generation_config) # Adjust if the library supports async
113
+ # # data = response.text
114
+ # # print(data)
115
+
116
+ # '''
117
+ # Only Scraping related code.
118
+ # '''
119
+ # from selenium import webdriver
120
+ # from selenium.webdriver.common.action_chains import ActionChains
121
+ # from selenium.webdriver.common.by import By
122
+ # from selenium.webdriver.firefox.options import Options as FirefoxOptions
123
+ # import time
124
+ # from fake_headers import Headers
125
+ # import pandas as pd
126
+ # import praw
127
+ # # from reddit_call import sentence_model
128
+ # import re
129
+ # # # Set up WebDriver
130
+ # header = Headers().generate()["User-Agent"]
131
+ # proxy=None
132
+ # browser_option = FirefoxOptions()
133
+ # browser_option.add_argument("--no-sandbox")
134
+ # browser_option.add_argument("--disable-dev-shm-usage")
135
+ # browser_option.add_argument("--ignore-certificate-errors")
136
+ # browser_option.add_argument("--disable-gpu")
137
+ # browser_option.add_argument("--log-level=3")
138
+ # browser_option.add_argument("--disable-notifications")
139
+ # browser_option.add_argument("--disable-popup-blocking")
140
+ # browser_option.add_argument("--user-agent={}".format(header))
141
+ # if proxy is not None:
142
+ # browser_option.add_argument("--proxy-server=%s" % proxy)
143
+
144
+ # # For Hiding Browser
145
+ # browser_option.add_argument("--headless")
146
+
147
+ # driver = webdriver.Firefox(options=browser_option)
148
+ # actions = ActionChains(driver)
149
+ # reddit = praw.Reddit(
150
+ # client_id="yjGfys3QZPpdCpNZl25Kig",
151
+ # client_secret="dqoc8LrQBQhB_IgjV-lKyrD9lBPftg",
152
+ # password="&honeyB90",
153
+ # user_agent="Curious",
154
+ # username="Final-Difference7055",
155
+ # )
156
+ # fileName="posts_data_1732244765294548.csv"
157
+ # data= pd.DataFrame(pd.read_csv(fileName))
158
+ # data["comments"]=""
159
+ # for i in range(len(data)):
160
+ # # comment_data_sub=[]
161
+ # submission = reddit.submission(url=data.iloc[i]['url'])
162
+
163
+ # # Fetch and process comments
164
+ # submission.comments.replace_more(limit=2) # Use limit=0 to get all comments
165
+ # comments_data = []
166
+
167
+ # # Function to process a comment and its replies
168
+ # # Seed with top-level comments
169
+ # comment_queue = list(submission.comments)
170
+ # comment_count=0
171
+ # threshold=20
172
+ # while comment_queue:
173
+ # if(comment_count>=threshold):
174
+ # break
175
+ # comment = comment_queue.pop(0)
176
+ # comment_data = process_comment(comment,reply_limit=2 if isForCompetitorAnalysis else 3) # Process each comment
177
+ # comments_data.append(comment_data)
178
+ # comment_count+=1
179
+ # # Now, structure the data into the desired JSON format
180
+ # json_output = {
181
+ # "comments": comments_data
182
+ # }
183
+ # subset_data = data.iloc[i].copy()
184
+
185
+ # # Modify the subset
186
+ # subset_data['comments'] = json_output
187
+
188
+ # # Assign back if needed
189
+ # data.iloc[i] = subset_data
190
+ # # Remove rows where 'comments' is an empty string
191
+ # data = data[data['comments'] != ""]
192
+ # data["descriptions"] = data["url"].apply(getSearchPostDescription)
193
+ # data.to_csv(fileName, index=False)
194
+ # print("Data saved to",fileName)
195
+
196
+ from reddit.reddit_competitor_analysis import getCompetitorNamesFromReddit
197
+ from reddit.api_keys import api_key,api_key2, api_key3
198
+ import google.generativeai as genai
199
+ genai.configure(api_key=api_key3)
200
+ getCompetitorNamesFromReddit(user_query='AI powered personalized skin care recommendations',isSolo=True,fileName='posts_data_1732244547776200.csv',last_chat_session=None)
venv/utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from functools import wraps
3
+ import time
4
+
5
+ def time_execution(func):
6
+ if asyncio.iscoroutinefunction(func):
7
+ @wraps(func)
8
+ async def async_wrapper(*args, **kwargs):
9
+ start_time = time.time()
10
+ result = await func(*args, **kwargs)
11
+ end_time = time.time()
12
+ execution_time = end_time - start_time
13
+ print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
14
+ return result
15
+ return async_wrapper
16
+ else:
17
+ @wraps(func)
18
+ def sync_wrapper(*args, **kwargs):
19
+ start_time = time.time()
20
+ result = func(*args, **kwargs)
21
+ end_time = time.time()
22
+ execution_time = end_time - start_time
23
+ print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
24
+ return result
25
+ return sync_wrapper
26
+