NavyDevilDoc commited on
Commit
9ceba29
·
verified ·
1 Parent(s): 74f60fc

Update src/llm_client.py

Browse files
Files changed (1) hide show
  1. src/llm_client.py +23 -18
src/llm_client.py CHANGED
@@ -1,6 +1,7 @@
1
  import requests
2
  import os
3
- import google.genai as genai
 
4
  from .prompts import get_system_prompt
5
 
6
  def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
@@ -8,34 +9,40 @@ def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
8
  Switchable Brain: Defaults to Gemini (Powerful), falls back to Granite (Private).
9
  """
10
 
11
- # 1. Get the Persona/Prompt from our new library
12
- # This ensures "Instructor Mode" works for BOTH models
13
  system_instruction = get_system_prompt(mode)
14
 
15
- # --- OPTION A: GOOGLE GEMINI (The "Big Gun") ---
16
  if model_provider == "Gemini":
17
  try:
18
  google_key = os.environ.get("GOOGLE_API_KEY")
19
-
20
  if not google_key:
21
  return "Error: GOOGLE_API_KEY not found in Secrets."
22
 
23
- genai.configure(api_key=google_key)
24
-
25
- model = genai.GenerativeModel(
26
- model_name='gemini-1.5-flash',
27
- system_instruction=system_instruction
28
- )
29
 
 
30
  full_prompt = f"USER QUERY: {query}\n\nDOCUMENT CONTEXT:\n{context[:30000]}"
31
 
32
- response = model.generate_content(full_prompt)
 
 
 
 
 
 
 
 
 
 
 
33
  return response.text
34
 
35
  except Exception as e:
36
  return f"Gemini Error: {str(e)}"
37
 
38
- # --- OPTION B: GRANITE / LOCAL SPACE (The "Private Option") ---
39
  else:
40
  hf_token = os.environ.get("HF_TOKEN")
41
  if not hf_token:
@@ -45,10 +52,9 @@ def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
45
 
46
  payload = {
47
  "text": f"USER QUESTION: {query}\n\nDOCUMENT CONTEXT:\n{context[:6000]}",
48
- # CRITICAL FIX: We now inject the dynamic persona here
49
  "persona": system_instruction,
50
- "model": "granite4:latest", # Or gemma3:latest
51
- "max_tokens": 5000
52
  }
53
 
54
  headers = {
@@ -57,8 +63,7 @@ def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
57
  }
58
 
59
  try:
60
- # We increased timeout because local models can be slower
61
- response = requests.post(api_url, json=payload, headers=headers, timeout=600)
62
  if response.status_code == 200:
63
  return response.json().get("response", "Error: Empty response.")
64
  else:
 
1
  import requests
2
  import os
3
+ from google import genai
4
+ from google.genai import types # The new configuration handler
5
  from .prompts import get_system_prompt
6
 
7
  def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
 
9
  Switchable Brain: Defaults to Gemini (Powerful), falls back to Granite (Private).
10
  """
11
 
12
+ # 1. Get the Persona/Prompt
 
13
  system_instruction = get_system_prompt(mode)
14
 
15
+ # --- OPTION A: GOOGLE GEMINI (New SDK) ---
16
  if model_provider == "Gemini":
17
  try:
18
  google_key = os.environ.get("GOOGLE_API_KEY")
 
19
  if not google_key:
20
  return "Error: GOOGLE_API_KEY not found in Secrets."
21
 
22
+ # NEW SYNTAX: Instantiate a Client object directly
23
+ client = genai.Client(api_key=google_key)
 
 
 
 
24
 
25
+ # Construct the prompt
26
  full_prompt = f"USER QUERY: {query}\n\nDOCUMENT CONTEXT:\n{context[:30000]}"
27
 
28
+ # NEW SYNTAX: Call generate_content via the 'models' attribute
29
+ response = client.models.generate_content(
30
+ model='gemini-1.5-flash', # or 'gemini-2.0-flash' if available to you
31
+ contents=full_prompt,
32
+ config=types.GenerateContentConfig(
33
+ system_instruction=system_instruction,
34
+ max_output_tokens=1000,
35
+ temperature=0.3 # Low temperature for factual accuracy
36
+ )
37
+ )
38
+
39
+ # The response object structure is slightly different now
40
  return response.text
41
 
42
  except Exception as e:
43
  return f"Gemini Error: {str(e)}"
44
 
45
+ # --- OPTION B: GRANITE / LOCAL SPACE (Unchanged) ---
46
  else:
47
  hf_token = os.environ.get("HF_TOKEN")
48
  if not hf_token:
 
52
 
53
  payload = {
54
  "text": f"USER QUESTION: {query}\n\nDOCUMENT CONTEXT:\n{context[:6000]}",
 
55
  "persona": system_instruction,
56
+ "model": "granite4:latest",
57
+ "max_tokens": 1024
58
  }
59
 
60
  headers = {
 
63
  }
64
 
65
  try:
66
+ response = requests.post(api_url, json=payload, headers=headers, timeout=120)
 
67
  if response.status_code == 200:
68
  return response.json().get("response", "Error: Empty response.")
69
  else: