Spaces:

Legal-Assistant
/

LawBot

Running

App Files Files Community

Vishwanath77 commited on 26 days ago

Commit

fc64d75

verified ·

1 Parent(s): 83e406c

Upload llm.py

Browse files

Files changed (1) hide show

src/apps/utils/llm.py +39 -89

src/apps/utils/llm.py CHANGED Viewed

@@ -2,127 +2,77 @@ import openai
 import os
 from dotenv import load_dotenv
-# Explicitly load .env from the src/apps directory
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 env_path = os.path.join(BASE_DIR, '.env')
 load_dotenv(env_path)
 def nemotron_llama(query, context, chat_history, role="General"):
-    prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
-Answer federal and state law questions ONLY based on the provided context.
-## Role Behavior Rules:
-Current Active Role: {role}
-1. Judge Mode:
-   - Answer like an experienced judge.
-   - Focus on legality, neutrality, precedents, logic, and final judgment.
-   - Avoid emotional language and advocacy.
-   - Think professionally, critically, and decisively.
-2. Advocate Mode:
-   - Answer like a skilled advocate/lawyer.
-   - Focus on arguments, strategies, loopholes, and persuasion.
-   - Slightly less neutral than Judge mode.
-   - More practical and tactical.
-3. Woman Mode:
-   - Answer strictly from a woman’s perspective.
-   - Consider safety, social reality, emotional intelligence, and lived experience.
-   - Do not generalize or switch to male viewpoints.
-4. Minor Mode:
-   - Use very simple language with short explanations.
-   - Focus only on what is necessary and appropriate for a minor.
-   - No complex terms, no adult framing.
-5. Student Mode:
-   - Answer based on student needs.
-   - Be clear, structured, and learning-focused.
-   - Use examples, steps, and explanations helpful for studying or exams.
-6. Citizen Mode:
-   - Answer as a helpful legal guide for a common citizen.
-   - Focus on practical rights, duties, and actionable steps.
-   - Explain legal jargon in simple, everyday language.
-   - Be empathetic but objective and informative.
-## Mandatory Performance Requirements:
-- Prioritize clarity over verbosity.
-- Responses must be fast and concise.
-- Avoid unnecessary explanations unless asked.
-- Optimize reasoning speed and reduce delay.
-- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
-**Title**: [Name]
-**Page Numbers**: [Number]
-- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
-**Title**: [Name]
-**Page Numbers**: [Number]
-- You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
-Context: {context}
 Chat History: {chat_history}
-"""
-    # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
-    formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
-    # Merge system prompt into user message to support models that reject 'system' role
-    messages = [
-        {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
-    ]
-    # Use OPENROUTER_API_KEY from .env
-    api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
-    # Emergency cleanup for common copy-paste errors
-    if "sk-or-v1-sk-or-v1-" in api_key:
-        api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
-    if not api_key:
-        api_key = os.getenv("API_KEY", "").strip()
-    if not api_key:
-        raise ValueError(
-            "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
-        )
-    # List of models to try sequentially - helps avoid 429 errors
     models = [
-        "google/gemma-3-4b-it:free",
-        "mistralai/mistral-small-3.1-24b-instruct:free",
         "meta-llama/llama-3.2-3b-instruct:free",
-        "qwen/qwen3-coder:free"
     ]
     client = openai.OpenAI(
         base_url="https://openrouter.ai/api/v1",
         api_key=api_key,
         default_headers={
-            "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
-            "X-Title": "Law Bot AI"
         }
     )
-    # Try all models in order
     for current_model in models:
         try:
-            print(f"Attempting model: {current_model}...")
             completion = client.chat.completions.create(
                 model=current_model,
                 messages=messages,
-                temperature=0,
                 stream=True,
                 max_tokens=1024
             )
             return completion
         except Exception as e:
-            print(f"Error in {current_model}: {e}")
-            continue # Try next fallback model
-    # If all models fail, raise the last error
-    raise Exception("All LLM models are currently rate-limited or unavailable. Please try again in 1 minute.")
 def nemotron_llama_raw(query, context, chat_history, role="General"):
-    # This is a legacy alias if needed by other modules
     return nemotron_llama(query, context, chat_history, role)

 import os
 from dotenv import load_dotenv
+# Load local environment
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 env_path = os.path.join(BASE_DIR, '.env')
 load_dotenv(env_path)
 def nemotron_llama(query, context, chat_history, role="General"):
+    """
+    Law Bot Core LLM Logic:
+    - Multiple Fallbacks for High Availability
+    - Safety Filter Detection & Bypass
+    - Judicial Citation Formatting
+    """
+    # Precise, concise prompt for legal reasoning
+    prompt = f"""Role: {role}
+Legal Context: {context}
 Chat History: {chat_history}
+Task: Answer based strictly on context.
+1. Be concise & professional.
+2. If citing, use format: 'Title: [Name] | Page Numbers: [Number]'
+3. Stay strictly in character as a {role}."""
+    messages = [{"role": "user", "content": f"{prompt}\n\nUser Query: {query}"}]
+    # API Configuration
+    api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
+    if not api_key: api_key = os.getenv("API_KEY", "").strip()
+    if not api_key: raise ValueError("OPENROUTER_API_KEY missing.")
+    # Optimized list of 7 free models for instant fallback
     models = [
         "meta-llama/llama-3.2-3b-instruct:free",
+        "qwen/qwen-2.5-72b-instruct:free",
+        "mistralai/mistral-small-3.1-24b-instruct:free",
+        "google/gemma-3-4b-it:free",
+        "liquid/lfm-2.5-1.2b-instruct:free",
+        "nvidia/llama-3.1-nemotron-70b-instruct:free",
+        "qwen/qwen2.5-7b-instruct:free"
     ]
     client = openai.OpenAI(
         base_url="https://openrouter.ai/api/v1",
         api_key=api_key,
         default_headers={
+            "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"),
+            "X-Title": "Law Bot Pro"
         }
     )
     for current_model in models:
         try:
+            # Using very low temperature (0.1) for legal precision
             completion = client.chat.completions.create(
                 model=current_model,
                 messages=messages,
+                temperature=0.1,
                 stream=True,
                 max_tokens=1024
             )
             return completion
         except Exception as e:
+            err_text = str(e).upper()
+            # Catch safety filters and skip instantly
+            if any(x in err_text for x in ["PROHIBITED", "SAFETY", "FILTER", "BLOCKED"]):
+                print(f"DEBUG: {current_model} blocked. Switching...")
+                continue
+            print(f"DEBUG: {current_model} error: {e}")
+            continue
+    raise Exception("System overloaded. Please wait 30 seconds and try again.")
 def nemotron_llama_raw(query, context, chat_history, role="General"):
     return nemotron_llama(query, context, chat_history, role)