Vishwanath77 commited on
Commit
fc64d75
·
verified ·
1 Parent(s): 83e406c

Upload llm.py

Browse files
Files changed (1) hide show
  1. src/apps/utils/llm.py +39 -89
src/apps/utils/llm.py CHANGED
@@ -2,127 +2,77 @@ import openai
2
  import os
3
  from dotenv import load_dotenv
4
 
5
- # Explicitly load .env from the src/apps directory
6
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7
  env_path = os.path.join(BASE_DIR, '.env')
8
  load_dotenv(env_path)
9
 
10
  def nemotron_llama(query, context, chat_history, role="General"):
11
-
12
- prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
13
- Answer federal and state law questions ONLY based on the provided context.
14
-
15
- ## Role Behavior Rules:
16
- Current Active Role: {role}
17
-
18
- 1. Judge Mode:
19
- - Answer like an experienced judge.
20
- - Focus on legality, neutrality, precedents, logic, and final judgment.
21
- - Avoid emotional language and advocacy.
22
- - Think professionally, critically, and decisively.
23
-
24
- 2. Advocate Mode:
25
- - Answer like a skilled advocate/lawyer.
26
- - Focus on arguments, strategies, loopholes, and persuasion.
27
- - Slightly less neutral than Judge mode.
28
- - More practical and tactical.
29
-
30
- 3. Woman Mode:
31
- - Answer strictly from a woman’s perspective.
32
- - Consider safety, social reality, emotional intelligence, and lived experience.
33
- - Do not generalize or switch to male viewpoints.
34
-
35
- 4. Minor Mode:
36
- - Use very simple language with short explanations.
37
- - Focus only on what is necessary and appropriate for a minor.
38
- - No complex terms, no adult framing.
39
-
40
- 5. Student Mode:
41
- - Answer based on student needs.
42
- - Be clear, structured, and learning-focused.
43
- - Use examples, steps, and explanations helpful for studying or exams.
44
-
45
- 6. Citizen Mode:
46
- - Answer as a helpful legal guide for a common citizen.
47
- - Focus on practical rights, duties, and actionable steps.
48
- - Explain legal jargon in simple, everyday language.
49
- - Be empathetic but objective and informative.
50
-
51
- ## Mandatory Performance Requirements:
52
- - Prioritize clarity over verbosity.
53
- - Responses must be fast and concise.
54
- - Avoid unnecessary explanations unless asked.
55
- - Optimize reasoning speed and reduce delay.
56
- - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
57
- **Title**: [Name]
58
- **Page Numbers**: [Number]
59
- - Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
60
- **Title**: [Name]
61
- **Page Numbers**: [Number]
62
- - You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
63
-
64
- Context: {context}
65
  Chat History: {chat_history}
66
- """
67
- # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
68
- formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
69
 
70
- # Merge system prompt into user message to support models that reject 'system' role
71
- messages = [
72
- {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
73
- ]
74
 
75
- # Use OPENROUTER_API_KEY from .env
76
- api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
77
-
78
- # Emergency cleanup for common copy-paste errors
79
- if "sk-or-v1-sk-or-v1-" in api_key:
80
- api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
81
-
82
- if not api_key:
83
- api_key = os.getenv("API_KEY", "").strip()
84
 
85
- if not api_key:
86
- raise ValueError(
87
- "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
88
- )
89
 
90
- # List of models to try sequentially - helps avoid 429 errors
91
  models = [
92
- "google/gemma-3-4b-it:free",
93
- "mistralai/mistral-small-3.1-24b-instruct:free",
94
  "meta-llama/llama-3.2-3b-instruct:free",
95
- "qwen/qwen3-coder:free"
 
 
 
 
 
96
  ]
97
 
98
  client = openai.OpenAI(
99
  base_url="https://openrouter.ai/api/v1",
100
  api_key=api_key,
101
  default_headers={
102
- "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
103
- "X-Title": "Law Bot AI"
104
  }
105
  )
106
 
107
- # Try all models in order
108
  for current_model in models:
109
  try:
110
- print(f"Attempting model: {current_model}...")
111
  completion = client.chat.completions.create(
112
  model=current_model,
113
  messages=messages,
114
- temperature=0,
115
  stream=True,
116
  max_tokens=1024
117
  )
118
  return completion
119
  except Exception as e:
120
- print(f"Error in {current_model}: {e}")
121
- continue # Try next fallback model
 
 
 
 
 
122
 
123
- # If all models fail, raise the last error
124
- raise Exception("All LLM models are currently rate-limited or unavailable. Please try again in 1 minute.")
125
 
126
  def nemotron_llama_raw(query, context, chat_history, role="General"):
127
- # This is a legacy alias if needed by other modules
128
  return nemotron_llama(query, context, chat_history, role)
 
2
  import os
3
  from dotenv import load_dotenv
4
 
5
+ # Load local environment
6
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7
  env_path = os.path.join(BASE_DIR, '.env')
8
  load_dotenv(env_path)
9
 
10
  def nemotron_llama(query, context, chat_history, role="General"):
11
+ """
12
+ Law Bot Core LLM Logic:
13
+ - Multiple Fallbacks for High Availability
14
+ - Safety Filter Detection & Bypass
15
+ - Judicial Citation Formatting
16
+ """
17
+
18
+ # Precise, concise prompt for legal reasoning
19
+ prompt = f"""Role: {role}
20
+ Legal Context: {context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  Chat History: {chat_history}
 
 
 
22
 
23
+ Task: Answer based strictly on context.
24
+ 1. Be concise & professional.
25
+ 2. If citing, use format: 'Title: [Name] | Page Numbers: [Number]'
26
+ 3. Stay strictly in character as a {role}."""
27
 
28
+ messages = [{"role": "user", "content": f"{prompt}\n\nUser Query: {query}"}]
 
 
 
 
 
 
 
 
29
 
30
+ # API Configuration
31
+ api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
32
+ if not api_key: api_key = os.getenv("API_KEY", "").strip()
33
+ if not api_key: raise ValueError("OPENROUTER_API_KEY missing.")
34
 
35
+ # Optimized list of 7 free models for instant fallback
36
  models = [
 
 
37
  "meta-llama/llama-3.2-3b-instruct:free",
38
+ "qwen/qwen-2.5-72b-instruct:free",
39
+ "mistralai/mistral-small-3.1-24b-instruct:free",
40
+ "google/gemma-3-4b-it:free",
41
+ "liquid/lfm-2.5-1.2b-instruct:free",
42
+ "nvidia/llama-3.1-nemotron-70b-instruct:free",
43
+ "qwen/qwen2.5-7b-instruct:free"
44
  ]
45
 
46
  client = openai.OpenAI(
47
  base_url="https://openrouter.ai/api/v1",
48
  api_key=api_key,
49
  default_headers={
50
+ "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"),
51
+ "X-Title": "Law Bot Pro"
52
  }
53
  )
54
 
 
55
  for current_model in models:
56
  try:
57
+ # Using very low temperature (0.1) for legal precision
58
  completion = client.chat.completions.create(
59
  model=current_model,
60
  messages=messages,
61
+ temperature=0.1,
62
  stream=True,
63
  max_tokens=1024
64
  )
65
  return completion
66
  except Exception as e:
67
+ err_text = str(e).upper()
68
+ # Catch safety filters and skip instantly
69
+ if any(x in err_text for x in ["PROHIBITED", "SAFETY", "FILTER", "BLOCKED"]):
70
+ print(f"DEBUG: {current_model} blocked. Switching...")
71
+ continue
72
+ print(f"DEBUG: {current_model} error: {e}")
73
+ continue
74
 
75
+ raise Exception("System overloaded. Please wait 30 seconds and try again.")
 
76
 
77
  def nemotron_llama_raw(query, context, chat_history, role="General"):
 
78
  return nemotron_llama(query, context, chat_history, role)