Vishwanath77 commited on
Commit
926755f
·
verified ·
1 Parent(s): 028cbc2

Upload llm.py

Browse files
Files changed (1) hide show
  1. src/apps/utils/llm.py +128 -152
src/apps/utils/llm.py CHANGED
@@ -1,152 +1,128 @@
1
- import openai
2
- import os
3
- from dotenv import load_dotenv
4
-
5
- # Explicitly load .env from the src/apps directory
6
- # llm.py is in src/apps/utils/llm.py, so we go up two levels
7
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
8
- env_path = os.path.join(BASE_DIR, '.env')
9
- load_dotenv(env_path)
10
-
11
- def nemotron_llama(query, context, chat_history, role="General"):
12
-
13
- prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
14
- Answer federal and state law questions ONLY based on the provided context.
15
-
16
- ## Role Behavior Rules:
17
- Current Active Role: {role}
18
-
19
- 1. Judge Mode:
20
- - Answer like an experienced judge.
21
- - Focus on legality, neutrality, precedents, logic, and final judgment.
22
- - Avoid emotional language and advocacy.
23
- - Think professionally, critically, and decisively.
24
-
25
- 2. Advocate Mode:
26
- - Answer like a skilled advocate/lawyer.
27
- - Focus on arguments, strategies, loopholes, and persuasion.
28
- - Slightly less neutral than Judge mode.
29
- - More practical and tactical.
30
-
31
- 3. Woman Mode:
32
- - Answer strictly from a woman’s perspective.
33
- - Consider safety, social reality, emotional intelligence, and lived experience.
34
- - Do not generalize or switch to male viewpoints.
35
-
36
- 4. Minor Mode:
37
- - Use very simple language with short explanations.
38
- - Focus only on what is necessary and appropriate for a minor.
39
- - No complex terms, no adult framing.
40
-
41
- 5. Student Mode:
42
- - Answer based on student needs.
43
- - Be clear, structured, and learning-focused.
44
- - Use examples, steps, and explanations helpful for studying or exams.
45
-
46
- 6. Citizen Mode:
47
- - Answer as a helpful legal guide for a common citizen.
48
- - Focus on practical rights, duties, and actionable steps.
49
- - Explain legal jargon in simple, everyday language.
50
- - Be empathetic but objective and informative.
51
-
52
- ## Mandatory Performance Requirements:
53
- - Prioritize clarity over verbosity.
54
- - Responses must be fast and concise.
55
- - Avoid unnecessary explanations unless asked.
56
- - Optimize reasoning speed and reduce delay.
57
- - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
58
- **Title**: [Name]
59
- **Page Numbers**: [Number]
60
- - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
61
- **Title**: [Name]
62
- **Page Numbers**: [Number]
63
- - You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
64
-
65
- Context: {context}
66
- Chat History: {chat_history}
67
- """
68
- # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
69
- formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
70
-
71
- # Merge system prompt into user message to support models that reject 'system' role
72
- messages = [
73
- {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
74
- ]
75
-
76
- # Use OPENROUTER_API_KEY from .env
77
- api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
78
-
79
- # Emergency cleanup for common copy-paste errors
80
- if "sk-or-v1-sk-or-v1-" in api_key:
81
- api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
82
-
83
- if not api_key:
84
- # Fallback to general API_KEY if exists (per ChatGPT advice)
85
- api_key = os.getenv("API_KEY", "").strip()
86
-
87
- print(f"API KEY EXISTS: {bool(api_key)}")
88
- if api_key:
89
- print(f"DEBUG: Key starts with {api_key[:10]}...")
90
-
91
- if not api_key:
92
- raise ValueError(
93
- "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
94
- )
95
-
96
- # Switched to Mistral Small 3.1 to avoid "PROHIBITED_CONTENT" errors
97
- # Mistral is much better for professional/legal reasoning without over-censorship.
98
- primary_model = "mistralai/mistral-small-3.1-24b-instruct:free"
99
-
100
- # Initialize client with recommended OpenRouter headers
101
- client = openai.OpenAI(
102
- base_url="https://openrouter.ai/api/v1",
103
- api_key=api_key,
104
- default_headers={
105
- "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
106
- "X-Title": "Law Bot AI"
107
- }
108
- )
109
-
110
- # Reordered fallbacks
111
- fallback_models = [
112
- "qwen/qwen3-coder:free",
113
- "meta-llama/llama-3.2-3b-instruct:free",
114
- "google/gemma-3-4b-it:free"
115
- ]
116
-
117
- current_model = primary_model
118
- try:
119
- completion = client.chat.completions.create(
120
- model=current_model,
121
- messages=messages,
122
- temperature=0,
123
- stream=True,
124
- max_tokens=1024
125
- )
126
- return completion
127
- except Exception as e:
128
- print(f"Error in primary model ({current_model}): {e}")
129
-
130
- # Sequentially try fallbacks
131
- for fallback_model in fallback_models:
132
- try:
133
- print(f"Attempting fallback to {fallback_model}...")
134
- completion = client.chat.completions.create(
135
- model=fallback_model,
136
- messages=messages,
137
- temperature=0,
138
- stream=True,
139
- max_tokens=1024
140
- )
141
- return completion
142
- except Exception as fallback_error:
143
- print(f"Fallback to {fallback_model} failed: {fallback_error}")
144
- continue
145
-
146
- # If all fail, re-raise the original error or a final one
147
- print("All LLM attempts failed.")
148
- raise e
149
-
150
- def nemotron_llama_raw(query, context, chat_history, role="General"):
151
- # This is a legacy alias if needed by other modules
152
- return nemotron_llama(query, context, chat_history, role)
 
1
+ import openai
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ # Explicitly load .env from the src/apps directory
6
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7
+ env_path = os.path.join(BASE_DIR, '.env')
8
+ load_dotenv(env_path)
9
+
10
+ def nemotron_llama(query, context, chat_history, role="General"):
11
+
12
+ prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
13
+ Answer federal and state law questions ONLY based on the provided context.
14
+
15
+ ## Role Behavior Rules:
16
+ Current Active Role: {role}
17
+
18
+ 1. Judge Mode:
19
+ - Answer like an experienced judge.
20
+ - Focus on legality, neutrality, precedents, logic, and final judgment.
21
+ - Avoid emotional language and advocacy.
22
+ - Think professionally, critically, and decisively.
23
+
24
+ 2. Advocate Mode:
25
+ - Answer like a skilled advocate/lawyer.
26
+ - Focus on arguments, strategies, loopholes, and persuasion.
27
+ - Slightly less neutral than Judge mode.
28
+ - More practical and tactical.
29
+
30
+ 3. Woman Mode:
31
+ - Answer strictly from a woman’s perspective.
32
+ - Consider safety, social reality, emotional intelligence, and lived experience.
33
+ - Do not generalize or switch to male viewpoints.
34
+
35
+ 4. Minor Mode:
36
+ - Use very simple language with short explanations.
37
+ - Focus only on what is necessary and appropriate for a minor.
38
+ - No complex terms, no adult framing.
39
+
40
+ 5. Student Mode:
41
+ - Answer based on student needs.
42
+ - Be clear, structured, and learning-focused.
43
+ - Use examples, steps, and explanations helpful for studying or exams.
44
+
45
+ 6. Citizen Mode:
46
+ - Answer as a helpful legal guide for a common citizen.
47
+ - Focus on practical rights, duties, and actionable steps.
48
+ - Explain legal jargon in simple, everyday language.
49
+ - Be empathetic but objective and informative.
50
+
51
+ ## Mandatory Performance Requirements:
52
+ - Prioritize clarity over verbosity.
53
+ - Responses must be fast and concise.
54
+ - Avoid unnecessary explanations unless asked.
55
+ - Optimize reasoning speed and reduce delay.
56
+ - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
57
+ **Title**: [Name]
58
+ **Page Numbers**: [Number]
59
+ - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
60
+ **Title**: [Name]
61
+ **Page Numbers**: [Number]
62
+ - You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
63
+
64
+ Context: {context}
65
+ Chat History: {chat_history}
66
+ """
67
+ # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
68
+ formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
69
+
70
+ # Merge system prompt into user message to support models that reject 'system' role
71
+ messages = [
72
+ {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
73
+ ]
74
+
75
+ # Use OPENROUTER_API_KEY from .env
76
+ api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
77
+
78
+ # Emergency cleanup for common copy-paste errors
79
+ if "sk-or-v1-sk-or-v1-" in api_key:
80
+ api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
81
+
82
+ if not api_key:
83
+ api_key = os.getenv("API_KEY", "").strip()
84
+
85
+ if not api_key:
86
+ raise ValueError(
87
+ "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
88
+ )
89
+
90
+ # List of models to try sequentially - helps avoid 429 errors
91
+ models = [
92
+ "google/gemma-3-4b-it:free",
93
+ "mistralai/mistral-small-3.1-24b-instruct:free",
94
+ "meta-llama/llama-3.2-3b-instruct:free",
95
+ "qwen/qwen3-coder:free"
96
+ ]
97
+
98
+ client = openai.OpenAI(
99
+ base_url="https://openrouter.ai/api/v1",
100
+ api_key=api_key,
101
+ default_headers={
102
+ "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
103
+ "X-Title": "Law Bot AI"
104
+ }
105
+ )
106
+
107
+ # Try all models in order
108
+ for current_model in models:
109
+ try:
110
+ print(f"Attempting model: {current_model}...")
111
+ completion = client.chat.completions.create(
112
+ model=current_model,
113
+ messages=messages,
114
+ temperature=0,
115
+ stream=True,
116
+ max_tokens=1024
117
+ )
118
+ return completion
119
+ except Exception as e:
120
+ print(f"Error in {current_model}: {e}")
121
+ continue # Try next fallback model
122
+
123
+ # If all models fail, raise the last error
124
+ raise Exception("All LLM models are currently rate-limited or unavailable. Please try again in 1 minute.")
125
+
126
+ def nemotron_llama_raw(query, context, chat_history, role="General"):
127
+ # This is a legacy alias if needed by other modules
128
+ return nemotron_llama(query, context, chat_history, role)