Anshul Prasad commited on
Commit
1c631a4
·
1 Parent(s): fa1422d

simplify code by removing overprotection

Browse files
Files changed (1) hide show
  1. api/generate_response.py +41 -70
api/generate_response.py CHANGED
@@ -1,8 +1,11 @@
 
1
  from openai import OpenAI
2
- import logging, tiktoken, random, threading, time
 
 
3
  from config import API_URL, MODEL, GH_API_TOKEN
4
 
5
- logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
6
 
7
  try:
8
  encoder = tiktoken.encoding_for_model(MODEL)
@@ -10,87 +13,55 @@ except KeyError:
10
  # fallback for custom or unrecognized model names
11
  encoder = tiktoken.get_encoding("cl100k_base")
12
 
13
-
14
- def count_tokens(text: str) -> int:
15
- """Return the number of tokens in a string, using your model's tokenizer."""
16
- return len(encoder.encode(text))
17
-
18
-
19
  try:
20
  client = OpenAI(base_url=API_URL, api_key=GH_API_TOKEN, timeout=60)
21
  logging.info("OpenAI client initialized.")
22
  except Exception as e:
23
- logging.critical(f"Failed to initialize OpenAI client: {e}")
24
  client = None
25
 
26
- # --- Minimal concurrency limiter (per-process) ---
27
- # Tune this to 1 or 2 depending on your provider/account limits.
28
- SEMAPHORE_LIMIT = 1
29
- _semaphore = threading.Semaphore(SEMAPHORE_LIMIT)
30
-
31
 
32
- def generate_response(query, context, max_retries: int = 4, base_backoff: float = 0.5):
 
 
 
33
 
34
  if client is None:
35
  return "Error: AI client not configured."
36
 
37
- logging.info("Starting answer generation...")
38
-
39
  prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
40
- logging.info("Prepared prompt for generation.")
41
- logging.info(f"Total number of tokens in prompt: {count_tokens(prompt)}")
42
-
43
- for attempt in range(max_retries):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
- # Acquire semaphore (blocks this thread until allowed)
46
- with _semaphore:
47
- response = client.chat.completions.create(
48
- messages=[
49
- {
50
- "role": "system",
51
- "content": "You are a helpful assistant.",
52
- },
53
- {
54
- "role": "user",
55
- "content": prompt,
56
- },
57
- ],
58
- temperature=1,
59
- top_p=1,
60
- model=MODEL,
61
- stream=False,
62
- )
63
-
64
- # Extract text defensively (depends on SDK return shape)
65
- try:
66
-
67
- response = response.choices[0].message.content
68
- except Exception:
69
- response = getattr(response, "text", None) or str(response)
70
- logging.warning("Fallback used for response parsing.")
71
-
72
- logging.info("Answer generation succeeded.")
73
- return response
74
 
 
75
  except Exception as e:
76
- msg = str(e)
77
- # Heuristic detection for rate-limit / 429
78
- is_rate_limit = (
79
- "429" in msg
80
- or "RateLimit" in msg
81
- or "Rate limit" in msg
82
- or "RateLimitReached" in msg
83
- )
84
 
85
- if is_rate_limit and attempt < max_retries - 1:
86
- wait = base_backoff * (2**attempt) + random.random() * 0.1
87
- logging.warning(
88
- f"Rate limited by API (attempt {attempt+1}/{max_retries}). "
89
- f"Sleeping {wait:.2f}s before retry. Error: {msg}"
90
- )
91
- time.sleep(wait)
92
- continue
93
- # Non-retryable error or retries exhausted
94
- logging.error(f"Error during API call: {e}")
95
- return "Sorry, there was an error generating the response."
96
- return "Sorry, there was an error generating the response."
 
1
+ import logging
2
  from openai import OpenAI
3
+ import tiktoken
4
+
5
+ from api.generate_response import count_tokens
6
  from config import API_URL, MODEL, GH_API_TOKEN
7
 
8
+ logger = logging.getLogger(__name__)
9
 
10
  try:
11
  encoder = tiktoken.encoding_for_model(MODEL)
 
13
  # fallback for custom or unrecognized model names
14
  encoder = tiktoken.get_encoding("cl100k_base")
15
 
 
 
 
 
 
 
16
  try:
17
  client = OpenAI(base_url=API_URL, api_key=GH_API_TOKEN, timeout=60)
18
  logging.info("OpenAI client initialized.")
19
  except Exception as e:
20
+ logging.critical("Failed to initialize OpenAI client as %s", e)
21
  client = None
22
 
 
 
 
 
 
23
 
24
+ def generate_response(
25
+ query: str,
26
+ context: str,
27
+ ) -> str:
28
 
29
  if client is None:
30
  return "Error: AI client not configured."
31
 
 
 
32
  prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
33
+ logging.info("Total number of tokens in prompt: %s", count_tokens(prompt))
34
+
35
+ try:
36
+
37
+ response = client.chat.completions.create(
38
+ messages=[
39
+ {
40
+ "role": "system",
41
+ "content": "You are a helpful assistant.",
42
+ },
43
+ {
44
+ "role": "user",
45
+ "content": prompt,
46
+ },
47
+ ],
48
+ temperature=1,
49
+ top_p=1,
50
+ model=MODEL,
51
+ stream=False,
52
+ )
53
+
54
+ # Extract text defensively (depends on SDK return shape)
55
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ response = response.choices[0].message.content
58
  except Exception as e:
59
+ response = getattr(response, "text", None) or str(response)
60
+ logging.warning("Fallback used for response parsing as %s", e)
61
+
62
+ logging.info("Answer generation succeeded.")
63
+ return response
 
 
 
64
 
65
+ except Exception as e:
66
+ logging.error("Error during API call as %s", e)
67
+ return "Sorry, there was an error generating the response."