nihalaninihal commited on
Commit
ec05dfb
·
verified ·
1 Parent(s): 6ecf42a

Delete prompt_analyzer.py

Browse files
Files changed (1) hide show
  1. prompt_analyzer.py +0 -216
prompt_analyzer.py DELETED
@@ -1,216 +0,0 @@
1
- import os
2
- import re
3
- import json
4
- import time
5
- from typing import Dict, Any, Optional, List
6
- import google.generativeai as genai
7
- from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception
8
- from threading import Lock
9
- import traceback
10
-
11
-
12
- SYSTEM_PROMPT = "You are an experienced software engineer and data analyst tasked with building a report on developer's coding style, technical background, approach to problem solving, architectural thinking, technology choices, re-used frameworks etc,. There will be a set of prompts, divided into CODE STYLE ANALYSIS, TEMPORAL ANALYSIS, PROJECT PREFERENCES ANALYSIS and IDENTITY CONFIDENCE CALCULATION together with data samples provided to you. You'll summarize your findings from all of the modules in a single comprehensive IDENTITY CALCULATION CONFIDENCE output. Output a valid JSON, avoid including to many strings into the list objects! Follow the instructions provided for this section:"
13
-
14
- def _should_retry_error(exception: Exception) -> bool:
15
- """Check if the exception is one we should retry"""
16
- error_str = str(exception).lower()
17
- return any(
18
- msg in error_str
19
- for msg in [
20
- "resource exhaust",
21
- "429",
22
- "too many requests",
23
- "quota exceeded",
24
- "rate limit",
25
- ]
26
- )
27
-
28
-
29
- class RateLimiter:
30
- """Token bucket rate limiter implementation"""
31
-
32
- def __init__(self, rate: int, per: int):
33
- self.rate = rate # Number of requests allowed per time period
34
- self.per = per # Time period in seconds
35
- self.tokens = rate # Current token count
36
- self.last_update = time.time()
37
- self.lock = Lock()
38
-
39
- def _add_tokens(self):
40
- """Add tokens based on time elapsed"""
41
- now = time.time()
42
- time_passed = now - self.last_update
43
- new_tokens = time_passed * (self.rate / self.per)
44
- if new_tokens > 0:
45
- self.tokens = min(self.rate, self.tokens + new_tokens)
46
- self.last_update = now
47
-
48
- def acquire(self) -> float:
49
- """
50
- Try to acquire a token. Returns the time to wait if no token is available.
51
- """
52
- with self.lock:
53
- self._add_tokens()
54
-
55
- if self.tokens >= 1:
56
- self.tokens -= 1
57
- return 0.0
58
-
59
- # Calculate wait time needed for next token
60
- wait_time = (1 - self.tokens) * (self.per / self.rate)
61
- return wait_time
62
-
63
-
64
- class PromptAnalyzer:
65
- """Handles LLM prompting for code analysis tasks"""
66
-
67
- def __init__(self, api_key: Optional[str] = None):
68
- """Initialize Gemini handler with API key"""
69
- self.api_key = ("AIzaSyBHtQa-YQwhlDGihbyUUlS2MiclnXxiN8E")
70
- if not self.api_key:
71
- raise ValueError(
72
- "Gemini API key must be provided or set in GEMINI_API_KEY environment variable"
73
- )
74
-
75
- genai.configure(api_key=self.api_key)
76
- self.model = genai.GenerativeModel(model_name="gemini-1.5-flash-001", system_instruction=SYSTEM_PROMPT)
77
- # self.chat = self.model.start_chat()
78
- self.token_count = 0
79
- self.prompt_count = 0
80
- self.rate_limiter = RateLimiter(rate=5, per=60)
81
-
82
- def count_tokens(self, text: str) -> int:
83
- """Count tokens in a text string"""
84
- try:
85
- token_count = self.model.count_tokens(text)
86
- return token_count.total_tokens
87
- except Exception as e:
88
- print(f"Warning: Error counting tokens: {str(e)}")
89
- # Fallback to approximate count if token counting fails
90
- return len(text) // 4 # Rough approximation
91
-
92
- def _clean_json_response(self, response_text: str) -> str:
93
- """Clean up response text to extract JSON content"""
94
- if "```" in response_text:
95
- match = re.search(r"```(?:json)?\n(.*?)```", response_text, re.DOTALL)
96
- if match:
97
- return match.group(1).strip()
98
- return response_text.strip()
99
-
100
- @retry(
101
- retry=retry_if_exception(_should_retry_error),
102
- stop=stop_after_attempt(5),
103
- wait=wait_exponential(multiplier=2, min=4, max=60),
104
- before_sleep=lambda retry_state: print(
105
- f"Retrying due to rate limit/resource exhaustion... (attempt {retry_state.attempt_number})"
106
- ),
107
- )
108
- def _rate_limited_generate(self, prompt: str) -> Any:
109
- """Handle rate-limited generation with waiting and resource exhaustion"""
110
- while True:
111
- wait_time = self.rate_limiter.acquire()
112
-
113
- if wait_time == 0:
114
- try:
115
- # Direct call to generate_content instead of using chat
116
- return self.model.generate_content(prompt)
117
- except Exception as e:
118
- if _should_retry_error(e):
119
- print(
120
- f"Rate limit/resource exhaustion error, will retry: {str(e)}"
121
- )
122
- raise # Let the retry decorator handle it
123
- else:
124
- print(f"Non-retryable error occurred: {str(e)}")
125
- raise
126
-
127
- print(f"Rate limit reached. Waiting {wait_time:.2f} seconds...")
128
- time.sleep(wait_time)
129
-
130
- @retry(
131
- stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)
132
- )
133
- def generate_json_response(self, prompt: str) -> Dict[str, Any]:
134
- """Generate and parse JSON response with robust error handling"""
135
- try:
136
- self.prompt_count += 1
137
- print(f"\n📝 Processing prompt #{self.prompt_count}...")
138
-
139
- # Count input tokens
140
- token_count = self.model.count_tokens(prompt)
141
- input_tokens = token_count.total_tokens
142
- print(f"📊 Sending prompt with {input_tokens:,} tokens...")
143
-
144
- # Track retries for JSON parsing
145
- max_json_retries = 3
146
- last_response = None
147
- last_error = None
148
-
149
- for attempt in range(max_json_retries):
150
- try:
151
- # Generate with rate limiting
152
- start_time = time.time()
153
- # Here's the actual model call
154
- response = self._rate_limited_generate(prompt)
155
- elapsed_time = time.time() - start_time
156
-
157
- # Track token usage
158
- output_token_count = response.usage_metadata.total_token_count
159
- prompt_total_tokens = input_tokens + output_token_count
160
- self.token_count += prompt_total_tokens
161
-
162
- print(f"✓ Response received in {elapsed_time:.2f} seconds")
163
- print(f"📊 Prompt #{self.prompt_count} token usage:")
164
- print(f" - Input tokens: {input_tokens:,}")
165
- print(f" - Output tokens: {output_token_count:,}")
166
- print(f" - Total tokens: {prompt_total_tokens:,}")
167
- print(f"📈 Cumulative token usage: {self.token_count:,}")
168
-
169
- # Try to parse JSON with advanced error recovery
170
- last_response = response.text
171
- result = self._clean_json_response(last_response)
172
- return json.loads(result)
173
-
174
- except json.JSONDecodeError as e:
175
- last_error = e
176
-
177
- if attempt < max_json_retries - 1:
178
- print(f"⚠️ Attempt {attempt + 1}/{max_json_retries}: JSON parsing failed, retrying with feedback...")
179
-
180
- # Add feedback about the JSON parsing failure and retry
181
- error_feedback = f"""Your previous response could not be parsed as valid JSON. The specific error was: {str(e)}
182
-
183
- IMPORTANT: You must provide a response that:
184
- 1. Contains ONLY valid JSON
185
- 2. Has NO markdown code blocks
186
- 3. Has NO explanatory text
187
- 4. Follows the exact schema requested
188
- 5. Uses proper JSON syntax (quotes, commas, brackets)
189
- 6. AVOID falling into recursive loops when retrieving data from the prompt
190
-
191
- Here is the original prompt again:
192
- """
193
- # Combine feedback with original prompt
194
- prompt = error_feedback + prompt
195
- continue
196
- else:
197
- print(f"❌ Failed to parse JSON after {max_json_retries} attempts")
198
- print("Last response received:")
199
- print(last_response)
200
- print(f"Last error: {str(last_error)}")
201
- raise
202
-
203
- except Exception as e:
204
- print(f"❌ Error in generate_json_response: {str(e)}")
205
- print("Stack trace:")
206
- print(traceback.format_exc())
207
- if "last_response" in locals():
208
- print("\nLast response received:")
209
- print(last_response)
210
- raise
211
-
212
- def create_handler(api_key: Optional[str] = None) -> PromptAnalyzer:
213
- """
214
- Factory function to create a PromptAnalyzer instance.
215
- """
216
- return PromptAnalyzer(api_key)