File size: 2,862 Bytes
f2d535c
9ceba29
4efb9fa
260d73e
f155d62
 
260d73e
 
 
 
 
f155d62
 
260d73e
f155d62
 
 
 
260d73e
f155d62
260d73e
 
 
 
 
9ceba29
f155d62
260d73e
f155d62
 
260d73e
9ceba29
260d73e
9ceba29
 
 
 
260d73e
 
9ceba29
 
f155d62
 
 
260d73e
f155d62
260d73e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
from google import genai
from google.genai import types
from .prompts import get_system_prompt  # <--- Essential import

def ask_llm(query, context, mode="Executive Summary", model_provider="Gemini"):
    """
    Switchable Brain: Defaults to Gemini (Powerful), falls back to Granite (Private).
    """
    
    # 1. Get the Persona/Prompt
    system_instruction = get_system_prompt(mode)
    
    # --- OPTION A: GOOGLE GEMINI (New SDK) ---
    if model_provider == "Gemini":
        try:
            google_key = os.environ.get("GOOGLE_API_KEY")
            if not google_key:
                return "Error: GOOGLE_API_KEY not found in Secrets."
            
            # DEBUG PRINT: Verify we are running the new code
            print("DEBUG: Initializing new Gemini Client...")

            # NEW SYNTAX: Instantiate a Client object directly
            # (No genai.configure() calls allowed here!)
            client = genai.Client(api_key=google_key)
            
            # Construct the prompt
            full_prompt = f"USER QUERY: {query}\n\nDOCUMENT CONTEXT:\n{context[:30000]}"
            
            # NEW SYNTAX: Call generate_content via the 'models' attribute
            response = client.models.generate_content(
                model='gemini-2.0-flash', 
                contents=full_prompt,
                config=types.GenerateContentConfig(
                    system_instruction=system_instruction,
                    max_output_tokens=1000,
                    temperature=0.3
                )
            )
            
            return response.text
            
        except Exception as e:
            print(f"DEBUG ERROR: {e}")
            return f"Gemini Error: {str(e)}"

    # --- OPTION B: GRANITE / LOCAL SPACE (Private Option) ---
    else:
        import requests # Imported locally to keep dependencies clean if needed
        hf_token = os.environ.get("HF_TOKEN")
        if not hf_token:
            return "Error: HF_TOKEN is missing."

        api_url = "https://navydevildoc-private-granite.hf.space/generate"
        
        payload = {
            "text": f"USER QUESTION: {query}\n\nDOCUMENT CONTEXT:\n{context[:6000]}",
            "persona": system_instruction, 
            "model": "granite4:latest", 
            "max_tokens": 1024
        }
        
        headers = {
            "Authorization": f"Bearer {hf_token}",
            "Content-Type": "application/json"
        }
        
        try:
            response = requests.post(api_url, json=payload, headers=headers, timeout=120)
            if response.status_code == 200:
                return response.json().get("response", "Error: Empty response.")
            else:
                return f"Error {response.status_code}: {response.text}"
        except Exception as e:
            return f"Connection Error: {str(e)}"