Delete prompt_analyzer.py
Browse files- prompt_analyzer.py +0 -216
prompt_analyzer.py
DELETED
|
@@ -1,216 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import json
|
| 4 |
-
import time
|
| 5 |
-
from typing import Dict, Any, Optional, List
|
| 6 |
-
import google.generativeai as genai
|
| 7 |
-
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception
|
| 8 |
-
from threading import Lock
|
| 9 |
-
import traceback
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
SYSTEM_PROMPT = "You are an experienced software engineer and data analyst tasked with building a report on developer's coding style, technical background, approach to problem solving, architectural thinking, technology choices, re-used frameworks etc,. There will be a set of prompts, divided into CODE STYLE ANALYSIS, TEMPORAL ANALYSIS, PROJECT PREFERENCES ANALYSIS and IDENTITY CONFIDENCE CALCULATION together with data samples provided to you. You'll summarize your findings from all of the modules in a single comprehensive IDENTITY CALCULATION CONFIDENCE output. Output a valid JSON, avoid including to many strings into the list objects! Follow the instructions provided for this section:"
|
| 13 |
-
|
| 14 |
-
def _should_retry_error(exception: Exception) -> bool:
|
| 15 |
-
"""Check if the exception is one we should retry"""
|
| 16 |
-
error_str = str(exception).lower()
|
| 17 |
-
return any(
|
| 18 |
-
msg in error_str
|
| 19 |
-
for msg in [
|
| 20 |
-
"resource exhaust",
|
| 21 |
-
"429",
|
| 22 |
-
"too many requests",
|
| 23 |
-
"quota exceeded",
|
| 24 |
-
"rate limit",
|
| 25 |
-
]
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class RateLimiter:
|
| 30 |
-
"""Token bucket rate limiter implementation"""
|
| 31 |
-
|
| 32 |
-
def __init__(self, rate: int, per: int):
|
| 33 |
-
self.rate = rate # Number of requests allowed per time period
|
| 34 |
-
self.per = per # Time period in seconds
|
| 35 |
-
self.tokens = rate # Current token count
|
| 36 |
-
self.last_update = time.time()
|
| 37 |
-
self.lock = Lock()
|
| 38 |
-
|
| 39 |
-
def _add_tokens(self):
|
| 40 |
-
"""Add tokens based on time elapsed"""
|
| 41 |
-
now = time.time()
|
| 42 |
-
time_passed = now - self.last_update
|
| 43 |
-
new_tokens = time_passed * (self.rate / self.per)
|
| 44 |
-
if new_tokens > 0:
|
| 45 |
-
self.tokens = min(self.rate, self.tokens + new_tokens)
|
| 46 |
-
self.last_update = now
|
| 47 |
-
|
| 48 |
-
def acquire(self) -> float:
|
| 49 |
-
"""
|
| 50 |
-
Try to acquire a token. Returns the time to wait if no token is available.
|
| 51 |
-
"""
|
| 52 |
-
with self.lock:
|
| 53 |
-
self._add_tokens()
|
| 54 |
-
|
| 55 |
-
if self.tokens >= 1:
|
| 56 |
-
self.tokens -= 1
|
| 57 |
-
return 0.0
|
| 58 |
-
|
| 59 |
-
# Calculate wait time needed for next token
|
| 60 |
-
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
| 61 |
-
return wait_time
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
class PromptAnalyzer:
|
| 65 |
-
"""Handles LLM prompting for code analysis tasks"""
|
| 66 |
-
|
| 67 |
-
def __init__(self, api_key: Optional[str] = None):
|
| 68 |
-
"""Initialize Gemini handler with API key"""
|
| 69 |
-
self.api_key = ("AIzaSyBHtQa-YQwhlDGihbyUUlS2MiclnXxiN8E")
|
| 70 |
-
if not self.api_key:
|
| 71 |
-
raise ValueError(
|
| 72 |
-
"Gemini API key must be provided or set in GEMINI_API_KEY environment variable"
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
genai.configure(api_key=self.api_key)
|
| 76 |
-
self.model = genai.GenerativeModel(model_name="gemini-1.5-flash-001", system_instruction=SYSTEM_PROMPT)
|
| 77 |
-
# self.chat = self.model.start_chat()
|
| 78 |
-
self.token_count = 0
|
| 79 |
-
self.prompt_count = 0
|
| 80 |
-
self.rate_limiter = RateLimiter(rate=5, per=60)
|
| 81 |
-
|
| 82 |
-
def count_tokens(self, text: str) -> int:
|
| 83 |
-
"""Count tokens in a text string"""
|
| 84 |
-
try:
|
| 85 |
-
token_count = self.model.count_tokens(text)
|
| 86 |
-
return token_count.total_tokens
|
| 87 |
-
except Exception as e:
|
| 88 |
-
print(f"Warning: Error counting tokens: {str(e)}")
|
| 89 |
-
# Fallback to approximate count if token counting fails
|
| 90 |
-
return len(text) // 4 # Rough approximation
|
| 91 |
-
|
| 92 |
-
def _clean_json_response(self, response_text: str) -> str:
|
| 93 |
-
"""Clean up response text to extract JSON content"""
|
| 94 |
-
if "```" in response_text:
|
| 95 |
-
match = re.search(r"```(?:json)?\n(.*?)```", response_text, re.DOTALL)
|
| 96 |
-
if match:
|
| 97 |
-
return match.group(1).strip()
|
| 98 |
-
return response_text.strip()
|
| 99 |
-
|
| 100 |
-
@retry(
|
| 101 |
-
retry=retry_if_exception(_should_retry_error),
|
| 102 |
-
stop=stop_after_attempt(5),
|
| 103 |
-
wait=wait_exponential(multiplier=2, min=4, max=60),
|
| 104 |
-
before_sleep=lambda retry_state: print(
|
| 105 |
-
f"Retrying due to rate limit/resource exhaustion... (attempt {retry_state.attempt_number})"
|
| 106 |
-
),
|
| 107 |
-
)
|
| 108 |
-
def _rate_limited_generate(self, prompt: str) -> Any:
|
| 109 |
-
"""Handle rate-limited generation with waiting and resource exhaustion"""
|
| 110 |
-
while True:
|
| 111 |
-
wait_time = self.rate_limiter.acquire()
|
| 112 |
-
|
| 113 |
-
if wait_time == 0:
|
| 114 |
-
try:
|
| 115 |
-
# Direct call to generate_content instead of using chat
|
| 116 |
-
return self.model.generate_content(prompt)
|
| 117 |
-
except Exception as e:
|
| 118 |
-
if _should_retry_error(e):
|
| 119 |
-
print(
|
| 120 |
-
f"Rate limit/resource exhaustion error, will retry: {str(e)}"
|
| 121 |
-
)
|
| 122 |
-
raise # Let the retry decorator handle it
|
| 123 |
-
else:
|
| 124 |
-
print(f"Non-retryable error occurred: {str(e)}")
|
| 125 |
-
raise
|
| 126 |
-
|
| 127 |
-
print(f"Rate limit reached. Waiting {wait_time:.2f} seconds...")
|
| 128 |
-
time.sleep(wait_time)
|
| 129 |
-
|
| 130 |
-
@retry(
|
| 131 |
-
stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)
|
| 132 |
-
)
|
| 133 |
-
def generate_json_response(self, prompt: str) -> Dict[str, Any]:
|
| 134 |
-
"""Generate and parse JSON response with robust error handling"""
|
| 135 |
-
try:
|
| 136 |
-
self.prompt_count += 1
|
| 137 |
-
print(f"\n📝 Processing prompt #{self.prompt_count}...")
|
| 138 |
-
|
| 139 |
-
# Count input tokens
|
| 140 |
-
token_count = self.model.count_tokens(prompt)
|
| 141 |
-
input_tokens = token_count.total_tokens
|
| 142 |
-
print(f"📊 Sending prompt with {input_tokens:,} tokens...")
|
| 143 |
-
|
| 144 |
-
# Track retries for JSON parsing
|
| 145 |
-
max_json_retries = 3
|
| 146 |
-
last_response = None
|
| 147 |
-
last_error = None
|
| 148 |
-
|
| 149 |
-
for attempt in range(max_json_retries):
|
| 150 |
-
try:
|
| 151 |
-
# Generate with rate limiting
|
| 152 |
-
start_time = time.time()
|
| 153 |
-
# Here's the actual model call
|
| 154 |
-
response = self._rate_limited_generate(prompt)
|
| 155 |
-
elapsed_time = time.time() - start_time
|
| 156 |
-
|
| 157 |
-
# Track token usage
|
| 158 |
-
output_token_count = response.usage_metadata.total_token_count
|
| 159 |
-
prompt_total_tokens = input_tokens + output_token_count
|
| 160 |
-
self.token_count += prompt_total_tokens
|
| 161 |
-
|
| 162 |
-
print(f"✓ Response received in {elapsed_time:.2f} seconds")
|
| 163 |
-
print(f"📊 Prompt #{self.prompt_count} token usage:")
|
| 164 |
-
print(f" - Input tokens: {input_tokens:,}")
|
| 165 |
-
print(f" - Output tokens: {output_token_count:,}")
|
| 166 |
-
print(f" - Total tokens: {prompt_total_tokens:,}")
|
| 167 |
-
print(f"📈 Cumulative token usage: {self.token_count:,}")
|
| 168 |
-
|
| 169 |
-
# Try to parse JSON with advanced error recovery
|
| 170 |
-
last_response = response.text
|
| 171 |
-
result = self._clean_json_response(last_response)
|
| 172 |
-
return json.loads(result)
|
| 173 |
-
|
| 174 |
-
except json.JSONDecodeError as e:
|
| 175 |
-
last_error = e
|
| 176 |
-
|
| 177 |
-
if attempt < max_json_retries - 1:
|
| 178 |
-
print(f"⚠️ Attempt {attempt + 1}/{max_json_retries}: JSON parsing failed, retrying with feedback...")
|
| 179 |
-
|
| 180 |
-
# Add feedback about the JSON parsing failure and retry
|
| 181 |
-
error_feedback = f"""Your previous response could not be parsed as valid JSON. The specific error was: {str(e)}
|
| 182 |
-
|
| 183 |
-
IMPORTANT: You must provide a response that:
|
| 184 |
-
1. Contains ONLY valid JSON
|
| 185 |
-
2. Has NO markdown code blocks
|
| 186 |
-
3. Has NO explanatory text
|
| 187 |
-
4. Follows the exact schema requested
|
| 188 |
-
5. Uses proper JSON syntax (quotes, commas, brackets)
|
| 189 |
-
6. AVOID falling into recursive loops when retrieving data from the prompt
|
| 190 |
-
|
| 191 |
-
Here is the original prompt again:
|
| 192 |
-
"""
|
| 193 |
-
# Combine feedback with original prompt
|
| 194 |
-
prompt = error_feedback + prompt
|
| 195 |
-
continue
|
| 196 |
-
else:
|
| 197 |
-
print(f"❌ Failed to parse JSON after {max_json_retries} attempts")
|
| 198 |
-
print("Last response received:")
|
| 199 |
-
print(last_response)
|
| 200 |
-
print(f"Last error: {str(last_error)}")
|
| 201 |
-
raise
|
| 202 |
-
|
| 203 |
-
except Exception as e:
|
| 204 |
-
print(f"❌ Error in generate_json_response: {str(e)}")
|
| 205 |
-
print("Stack trace:")
|
| 206 |
-
print(traceback.format_exc())
|
| 207 |
-
if "last_response" in locals():
|
| 208 |
-
print("\nLast response received:")
|
| 209 |
-
print(last_response)
|
| 210 |
-
raise
|
| 211 |
-
|
| 212 |
-
def create_handler(api_key: Optional[str] = None) -> PromptAnalyzer:
|
| 213 |
-
"""
|
| 214 |
-
Factory function to create a PromptAnalyzer instance.
|
| 215 |
-
"""
|
| 216 |
-
return PromptAnalyzer(api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|