Spaces:

Legal-Assistant
/

LawBot

Running

App Files Files Community

Vishwanath77 commited on 26 days ago

Commit

926755f

verified ·

1 Parent(s): 028cbc2

Upload llm.py

Browse files

Files changed (1) hide show

src/apps/utils/llm.py +128 -152

src/apps/utils/llm.py CHANGED Viewed

@@ -1,152 +1,128 @@
-import openai
-import os
-from dotenv import load_dotenv
-# Explicitly load .env from the src/apps directory
-# llm.py is in src/apps/utils/llm.py, so we go up two levels
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-env_path = os.path.join(BASE_DIR, '.env')
-load_dotenv(env_path)
-def nemotron_llama(query, context, chat_history, role="General"):
-    prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
-Answer federal and state law questions ONLY based on the provided context.
-## Role Behavior Rules:
-Current Active Role: {role}
-1. Judge Mode:
-   - Answer like an experienced judge.
-   - Focus on legality, neutrality, precedents, logic, and final judgment.
-   - Avoid emotional language and advocacy.
-   - Think professionally, critically, and decisively.
-2. Advocate Mode:
-   - Answer like a skilled advocate/lawyer.
-   - Focus on arguments, strategies, loopholes, and persuasion.
-   - Slightly less neutral than Judge mode.
-   - More practical and tactical.
-3. Woman Mode:
-   - Answer strictly from a woman’s perspective.
-   - Consider safety, social reality, emotional intelligence, and lived experience.
-   - Do not generalize or switch to male viewpoints.
-4. Minor Mode:
-   - Use very simple language with short explanations.
-   - Focus only on what is necessary and appropriate for a minor.
-   - No complex terms, no adult framing.
-5. Student Mode:
-   - Answer based on student needs.
-   - Be clear, structured, and learning-focused.
-   - Use examples, steps, and explanations helpful for studying or exams.
-6. Citizen Mode:
-   - Answer as a helpful legal guide for a common citizen.
-   - Focus on practical rights, duties, and actionable steps.
-   - Explain legal jargon in simple, everyday language.
-   - Be empathetic but objective and informative.
-## Mandatory Performance Requirements:
-- Prioritize clarity over verbosity.
-- Responses must be fast and concise.
-- Avoid unnecessary explanations unless asked.
-- Optimize reasoning speed and reduce delay.
-- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
-**Title**: [Name]
-**Page Numbers**: [Number]
-- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
-**Title**: [Name]
-**Page Numbers**: [Number]
-- You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
-Context: {context}
-Chat History: {chat_history}
-"""
-    # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
-    formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
-    # Merge system prompt into user message to support models that reject 'system' role
-    messages = [
-        {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
-    ]
-    # Use OPENROUTER_API_KEY from .env
-    api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
-    # Emergency cleanup for common copy-paste errors
-    if "sk-or-v1-sk-or-v1-" in api_key:
-        api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
-    if not api_key:
-        # Fallback to general API_KEY if exists (per ChatGPT advice)
-        api_key = os.getenv("API_KEY", "").strip()
-    print(f"API KEY EXISTS: {bool(api_key)}")
-    if api_key:
-        print(f"DEBUG: Key starts with {api_key[:10]}...")
-    if not api_key:
-        raise ValueError(
-            "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
-        )
-    # Switched to Mistral Small 3.1 to avoid "PROHIBITED_CONTENT" errors
-    # Mistral is much better for professional/legal reasoning without over-censorship.
-    primary_model = "mistralai/mistral-small-3.1-24b-instruct:free"
-    # Initialize client with recommended OpenRouter headers
-    client = openai.OpenAI(
-        base_url="https://openrouter.ai/api/v1",
-        api_key=api_key,
-        default_headers={
-            "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
-            "X-Title": "Law Bot AI"
-        }
-    )
-    # Reordered fallbacks
-    fallback_models = [
-        "qwen/qwen3-coder:free",
-        "meta-llama/llama-3.2-3b-instruct:free",
-        "google/gemma-3-4b-it:free"
-    ]
-    current_model = primary_model
-    try:
-        completion = client.chat.completions.create(
-            model=current_model,
-            messages=messages,
-            temperature=0,
-            stream=True,
-            max_tokens=1024
-        )
-        return completion
-    except Exception as e:
-        print(f"Error in primary model ({current_model}): {e}")
-        # Sequentially try fallbacks
-        for fallback_model in fallback_models:
-            try:
-                print(f"Attempting fallback to {fallback_model}...")
-                completion = client.chat.completions.create(
-                    model=fallback_model,
-                    messages=messages,
-                    temperature=0,
-                    stream=True,
-                    max_tokens=1024
-                )
-                return completion
-            except Exception as fallback_error:
-                print(f"Fallback to {fallback_model} failed: {fallback_error}")
-                continue
-        # If all fail, re-raise the original error or a final one
-        print("All LLM attempts failed.")
-        raise e
-def nemotron_llama_raw(query, context, chat_history, role="General"):
-    # This is a legacy alias if needed by other modules
-    return nemotron_llama(query, context, chat_history, role)

+import openai
+import os
+from dotenv import load_dotenv
+# Explicitly load .env from the src/apps directory
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+env_path = os.path.join(BASE_DIR, '.env')
+load_dotenv(env_path)
+def nemotron_llama(query, context, chat_history, role="General"):
+    prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
+Answer federal and state law questions ONLY based on the provided context.
+## Role Behavior Rules:
+Current Active Role: {role}
+1. Judge Mode:
+   - Answer like an experienced judge.
+   - Focus on legality, neutrality, precedents, logic, and final judgment.
+   - Avoid emotional language and advocacy.
+   - Think professionally, critically, and decisively.
+2. Advocate Mode:
+   - Answer like a skilled advocate/lawyer.
+   - Focus on arguments, strategies, loopholes, and persuasion.
+   - Slightly less neutral than Judge mode.
+   - More practical and tactical.
+3. Woman Mode:
+   - Answer strictly from a woman’s perspective.
+   - Consider safety, social reality, emotional intelligence, and lived experience.
+   - Do not generalize or switch to male viewpoints.
+4. Minor Mode:
+   - Use very simple language with short explanations.
+   - Focus only on what is necessary and appropriate for a minor.
+   - No complex terms, no adult framing.
+5. Student Mode:
+   - Answer based on student needs.
+   - Be clear, structured, and learning-focused.
+   - Use examples, steps, and explanations helpful for studying or exams.
+6. Citizen Mode:
+   - Answer as a helpful legal guide for a common citizen.
+   - Focus on practical rights, duties, and actionable steps.
+   - Explain legal jargon in simple, everyday language.
+   - Be empathetic but objective and informative.
+## Mandatory Performance Requirements:
+- Prioritize clarity over verbosity.
+- Responses must be fast and concise.
+- Avoid unnecessary explanations unless asked.
+- Optimize reasoning speed and reduce delay.
+- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
+**Title**: [Name]
+**Page Numbers**: [Number]
+- Cite your answer with Title and Page Number from the context at the very end of your response in this EXACT format:
+**Title**: [Name]
+**Page Numbers**: [Number]
+- You are currently acting as {role}. You MUST stay in this character. Do NOT switch roles or ask for clarification.
+Context: {context}
+Chat History: {chat_history}
+"""
+    # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
+    formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)
+    # Merge system prompt into user message to support models that reject 'system' role
+    messages = [
+        {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
+    ]
+    # Use OPENROUTER_API_KEY from .env
+    api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
+    # Emergency cleanup for common copy-paste errors
+    if "sk-or-v1-sk-or-v1-" in api_key:
+        api_key = api_key.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
+    if not api_key:
+        api_key = os.getenv("API_KEY", "").strip()
+    if not api_key:
+        raise ValueError(
+            "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
+        )
+    # List of models to try sequentially - helps avoid 429 errors
+    models = [
+        "google/gemma-3-4b-it:free",
+        "mistralai/mistral-small-3.1-24b-instruct:free",
+        "meta-llama/llama-3.2-3b-instruct:free",
+        "qwen/qwen3-coder:free"
+    ]
+    client = openai.OpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key=api_key,
+        default_headers={
+            "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"), # Required for some free models
+            "X-Title": "Law Bot AI"
+        }
+    )
+    # Try all models in order
+    for current_model in models:
+        try:
+            print(f"Attempting model: {current_model}...")
+            completion = client.chat.completions.create(
+                model=current_model,
+                messages=messages,
+                temperature=0,
+                stream=True,
+                max_tokens=1024
+            )
+            return completion
+        except Exception as e:
+            print(f"Error in {current_model}: {e}")
+            continue # Try next fallback model
+    # If all models fail, raise the last error
+    raise Exception("All LLM models are currently rate-limited or unavailable. Please try again in 1 minute.")
+def nemotron_llama_raw(query, context, chat_history, role="General"):
+    # This is a legacy alias if needed by other modules
+    return nemotron_llama(query, context, chat_history, role)