Spaces:

NexusInstruments
/

OmniscientIRIS

Sleeping

App Files Files Community

NexusInstruments commited on Feb 26

Commit

23bd97d

verified ·

1 Parent(s): 03cd1d0

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -21

app.py CHANGED Viewed

@@ -14,9 +14,26 @@ except ImportError:
 # =========================================
 MODEL_OPTIONS = {
-    "Stable (Mistral 7B)": "mistralai/Mistral-7B-Instruct-v0.2",
 }
 PERSONA_PRESETS = {
     "Balanced Assistant": "You are a helpful, intelligent AI assistant.",
@@ -35,7 +52,6 @@ TASK_MODES = {
     "Behavioral Analysis": """
 Conduct a behavioral analysis.
 1. Behavioral Indicators
 2. Emotional Tone
 3. Cognitive Patterns
@@ -43,7 +59,6 @@ Conduct a behavioral analysis.
 5. Risk-Relevant Observations
 6. Alternative Explanations
 7. Limitations
 Material:
 {user_input}
 """
@@ -58,11 +73,12 @@ MAX_INPUT_CHARS = 3000
 # Helpers
 # =========================================
-def get_client(model_name):
     token = os.environ.get("HF_TOKEN")
     if not token:
         raise RuntimeError("HF_TOKEN not set in Space Secrets.")
-    return InferenceClient(model=model_name, token=token)
 def extract_text(file):
@@ -86,8 +102,19 @@ def extract_text(file):
     return ""
 # =========================================
-# Core Logic (Non-Streaming)
 # =========================================
 def generate_response(message, history, model_label, persona_label, task_mode, uploaded_file):
@@ -99,13 +126,12 @@ def generate_response(message, history, model_label, persona_label, task_mode, u
         message = message[:MAX_INPUT_CHARS]
         history = history[-MAX_HISTORY_PAIRS * 2:]
-        model_name = MODEL_OPTIONS[model_label]
-        system_prompt = PERSONA_PRESETS[persona_label]
         temperature = 0.4 if "Forensic" in persona_label else 0.7
-        client = get_client(model_name)
         file_text = extract_text(uploaded_file)
         if file_text:
             file_text = file_text[:MAX_CONTEXT_CHARS]
@@ -117,15 +143,20 @@ def generate_response(message, history, model_label, persona_label, task_mode, u
         formatted_input = TASK_MODES[task_mode].format(user_input=message)
         messages.append({"role": "user", "content": formatted_input})
-        response = client.chat_completion(
-            messages=messages,
-            max_tokens=700,
-            temperature=temperature,
-            top_p=0.95,
-            stream=False,
-        )
-        answer = response.choices[0].message.content
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": answer})
@@ -157,7 +188,7 @@ def export_chat(history):
 with gr.Blocks(theme=gr.themes.Soft(), title="Omniscient IRIS") as demo:
-    gr.Markdown("## Omniscient IRIS — Stable Analysis Assistant")
     with gr.Row():
@@ -176,7 +207,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Omniscient IRIS") as demo:
             model_selector = gr.Dropdown(
                 choices=list(MODEL_OPTIONS.keys()),
-                value="Stable (Mistral 7B)",
                 label="Model"
             )
@@ -226,4 +257,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Omniscient IRIS") as demo:
 if __name__ == "__main__":
     demo.queue()
-    demo.launch()

 # =========================================
 MODEL_OPTIONS = {
+    # Serverless-safe models
+    "Zephyr 7B (Serverless)": {
+        "id": "HuggingFaceH4/zephyr-7b-beta",
+        "fallback": False
+    },
+    "Mixtral 8x7B (Serverless)": {
+        "id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "fallback": False
+    },
+    # Requires Dedicated Endpoint
+    "Mistral 7B (Endpoint Required)": {
+        "id": "mistralai/Mistral-7B-Instruct-v0.2",
+        "fallback": True
+    }
 }
+FALLBACK_MODEL_KEY = "Zephyr 7B (Serverless)"
 PERSONA_PRESETS = {
     "Balanced Assistant": "You are a helpful, intelligent AI assistant.",
     "Behavioral Analysis": """
 Conduct a behavioral analysis.
 1. Behavioral Indicators
 2. Emotional Tone
 3. Cognitive Patterns
 5. Risk-Relevant Observations
 6. Alternative Explanations
 7. Limitations
 Material:
 {user_input}
 """
 # Helpers
 # =========================================
+def get_client(model_id):
     token = os.environ.get("HF_TOKEN")
     if not token:
         raise RuntimeError("HF_TOKEN not set in Space Secrets.")
+    return InferenceClient(model=model_id, token=token)
 def extract_text(file):
     return ""
+def run_model(client, messages, temperature):
+    response = client.chat_completion(
+        messages=messages,
+        max_tokens=700,
+        temperature=temperature,
+        top_p=0.95,
+        stream=False,
+    )
+    return response.choices[0].message.content
 # =========================================
+# Core Logic
 # =========================================
 def generate_response(message, history, model_label, persona_label, task_mode, uploaded_file):
         message = message[:MAX_INPUT_CHARS]
         history = history[-MAX_HISTORY_PAIRS * 2:]
+        model_config = MODEL_OPTIONS[model_label]
+        model_id = model_config["id"]
+        system_prompt = PERSONA_PRESETS[persona_label]
         temperature = 0.4 if "Forensic" in persona_label else 0.7
         file_text = extract_text(uploaded_file)
         if file_text:
             file_text = file_text[:MAX_CONTEXT_CHARS]
         formatted_input = TASK_MODES[task_mode].format(user_input=message)
         messages.append({"role": "user", "content": formatted_input})
+        # Primary model attempt
+        try:
+            client = get_client(model_id)
+            answer = run_model(client, messages, temperature)
+        # Automatic fallback
+        except Exception:
+            fallback_model = MODEL_OPTIONS[FALLBACK_MODEL_KEY]["id"]
+            fallback_client = get_client(fallback_model)
+            answer = (
+                "⚠️ Selected model unavailable. Fallback model used.\n\n"
+                + run_model(fallback_client, messages, temperature)
+            )
         history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": answer})
 with gr.Blocks(theme=gr.themes.Soft(), title="Omniscient IRIS") as demo:
+    gr.Markdown("## Omniscient IRIS — Adaptive Analysis Assistant")
     with gr.Row():
             model_selector = gr.Dropdown(
                 choices=list(MODEL_OPTIONS.keys()),
+                value=FALLBACK_MODEL_KEY,
                 label="Model"
             )
 if __name__ == "__main__":
     demo.queue()
+    demo.launch()