Spaces:

Pontonkid
/

Shinui

Sleeping

App Files Files Community

Pontonkid commited on Nov 21, 2025

Commit

37b828a

verified ·

1 Parent(s): 0ad7393

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +82 -78

src/streamlit_app.py CHANGED Viewed

@@ -1,14 +1,13 @@
 import streamlit as st
 import os
-from datetime import datetime
 from PIL import Image
-from huggingface_hub import InferenceClient
 # -----------------------------------------------------------------------------
-# 0. AUTO-FIX FOR UPLOAD ERROR (RUNS FIRST)
 # -----------------------------------------------------------------------------
-# This creates the hidden folder and config file automatically.
-# You do NOT need to create folders manually.
 config_dir = ".streamlit"
 if not os.path.exists(config_dir):
     os.makedirs(config_dir)
@@ -16,20 +15,33 @@ with open(os.path.join(config_dir, "config.toml"), "w") as f:
     f.write("[server]\nenableXsrfProtection=false\nenableCORS=false\nmaxUploadSize=200\n")
 # -----------------------------------------------------------------------------
-# 1. SETUP & CONFIGURATION
 # -----------------------------------------------------------------------------
-st.set_page_config(page_title="SHINUI | Meta Llama AI", page_icon="✨", layout="wide")
-# RETRIEVE API KEY
-# Ensure you have accepted the license for Llama 3.2 on Hugging Face!
-HF_TOKEN = os.environ.get("HF_TOKEN")
-if not HF_TOKEN:
-    st.error("⚠️ API Key missing! Please add 'HF_TOKEN' in Space Settings > Secrets.")
-    st.stop()
-# INITIALIZE CLIENT
-client = InferenceClient(token=HF_TOKEN)
 # -----------------------------------------------------------------------------
 # 2. STATE MANAGEMENT
@@ -41,50 +53,39 @@ if 'history' not in st.session_state: st.session_state.history = []
 if 'result' not in st.session_state: st.session_state.result = None
 # -----------------------------------------------------------------------------
-# 3. THE BRAIN (Meta Llama 3.2 Vision Logic)
 # -----------------------------------------------------------------------------
-def get_llama_insight(input_type, content):
-    """
-    Handles analysis using Meta-Llama-3.2-11B-Vision-Instruct
-    """
-    # System instruction for the model
-    prompt_text = "You are SHINUI, a medical AI assistant. Analyze the input provided. Structure your answer with: 1. Observation 2. Risk Assessment 3. Recommended Actions. Keep it concise and professional."
     try:
-        # A. VISION (Image Analysis)
         if input_type == "Image":
-            # The client handles the image object directly for this model
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "image"},
-                        {"type": "text", "text": prompt_text}
-                    ]
-                }
-            ]
-            response = client.chat_completion(
-                model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-                messages=messages,
-                max_tokens=500,
-                image=content  # Passing the PIL image here
-            )
-            return response.choices[0].message.content
-        # B. TEXT (Clinical Notes)
         elif input_type == "Text":
-            messages = [
-                {"role": "user", "content": f"{prompt_text}\n\nPatient Notes: {content}"}
-            ]
-            response = client.chat_completion(
-                model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-                messages=messages,
-                max_tokens=500
-            )
-            return response.choices[0].message.content
     except Exception as e:
-        return f"⚠️ Analysis Error: {str(e)}\n(Check if your HF Token has permission for Llama 3.2)"
 # -----------------------------------------------------------------------------
 # 4. UI STYLING (Clean Dark Theme)
@@ -111,7 +112,7 @@ st.markdown("""
 """, unsafe_allow_html=True)
 # -----------------------------------------------------------------------------
-# 5. NAVIGATION HELPERS
 # -----------------------------------------------------------------------------
 def nav_to(page):
     st.session_state.page = page
@@ -138,10 +139,10 @@ def show_landing():
     with c1:
         st.markdown("""
         <h1 style='font-size: 4rem; line-height: 1.1; margin-bottom: 20px;'>
-            Medical Intelligence.<br><span style='color:#38bdf8;'>Powered by Meta Llama.</span>
         </h1>
         <p style='font-size: 1.2rem; color: #94a3b8; margin-bottom: 40px;'>
-            Advanced multimodal analysis for medical imaging and clinical notes.
         </p>
         """, unsafe_allow_html=True)
         b1, b2 = st.columns([1, 2])
@@ -153,8 +154,8 @@ def show_landing():
     with c2:
         st.markdown("""
         <div class='shinui-card'>
-            <h3>🧬 Llama 3.2 Vision</h3>
-            <p style='color:#94a3b8;'>11B Parameter Multimodal Model.</p>
         </div>
         """, unsafe_allow_html=True)
@@ -166,14 +167,14 @@ def show_about():
     <div class='shinui-card'>
         <h2 style='color:#38bdf8'>About SHINUI</h2>
         <p style='font-size:1.1rem; line-height:1.6'>
-            SHINUI leverages the <b>Meta Llama 3.2 11B Vision</b> model to provide state-of-the-art analysis.
-            It allows healthcare professionals and individuals to interpret complex visual and textual data instantly.
         </p>
         <hr style='border-color:#333'>
         <h3>Capabilities</h3>
         <ul>
-            <li><b>Visual Diagnostics:</b> Interpretation of X-rays, MRI scans, and visible symptoms.</li>
-            <li><b>Clinical Notes:</b> Deep understanding of medical text and handwriting.</li>
         </ul>
     </div>
     """, unsafe_allow_html=True)
@@ -195,10 +196,9 @@ def show_login():
 # --- DASHBOARD ---
 def show_dashboard():
-    # SIDEBAR (Sign Out is here)
     with st.sidebar:
         st.markdown(f"### 👤 {st.session_state.user_email}")
-        if st.button("Internal About"): nav_to('about_internal')
         st.markdown("---")
         st.write("HISTORY")
         if st.session_state.history:
@@ -209,8 +209,7 @@ def show_dashboard():
         st.markdown("---")
         if st.button("Sign Out"): sign_out()
-    # MAIN UI
-    st.title("Llama 3.2 Vision Interface")
     t1, t2 = st.tabs(["📷 Image Scan", "📝 Text Analysis"])
     # TAB 1: IMAGE
@@ -218,12 +217,15 @@ def show_dashboard():
         st.markdown("<div class='shinui-card'>", unsafe_allow_html=True)
         img_file = st.file_uploader("Upload Medical Image", type=['png','jpg','jpeg'])
         if img_file and st.button("Analyze Visual"):
-            image = Image.open(img_file)
-            st.image(image, caption="Input", width=300)
-            with st.spinner("Llama Vision Processing..."):
-                res = get_llama_insight("Image", image)
-                st.session_state.result = res
-                st.session_state.history.append(f"Image: {res[:30]}...")
         st.markdown("</div>", unsafe_allow_html=True)
     # TAB 2: TEXT
@@ -231,13 +233,15 @@ def show_dashboard():
         st.markdown("<div class='shinui-card'>", unsafe_allow_html=True)
         txt = st.text_area("Clinical Notes / Symptoms")
         if txt and st.button("Analyze Notes"):
-            with st.spinner("Llama Text Processing..."):
-                res = get_llama_insight("Text", txt)
-                st.session_state.result = res
-                st.session_state.history.append(f"Text: {res[:30]}...")
         st.markdown("</div>", unsafe_allow_html=True)
-    # RESULTS AREA
     if st.session_state.result:
         st.markdown(f"""
         <div class='shinui-card' style='border-left: 5px solid #38bdf8;'>
@@ -253,8 +257,8 @@ def show_about_internal():
     st.markdown("""
     <div class='shinui-card'>
         <h2 style='color:#38bdf8'>System Status</h2>
-        <p><b>Model:</b> Meta Llama 3.2 11B Vision Instruct</p>
-        <p><b>Status:</b> Online</p>
     </div>
     """, unsafe_allow_html=True)

 import streamlit as st
 import os
+import torch
 from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
 # -----------------------------------------------------------------------------
+# 0. AUTO-FIX FOR UPLOAD ERROR (RUNS INSTANTLY)
 # -----------------------------------------------------------------------------
+# This creates the config.toml automatically so uploads work.
 config_dir = ".streamlit"
 if not os.path.exists(config_dir):
     os.makedirs(config_dir)
     f.write("[server]\nenableXsrfProtection=false\nenableCORS=false\nmaxUploadSize=200\n")
 # -----------------------------------------------------------------------------
+# 1. SETUP & MODEL LOADING (AarambhAI Gemma)
 # -----------------------------------------------------------------------------
+st.set_page_config(page_title="SHINUI | Gemma AI", page_icon="✨", layout="wide")
+@st.cache_resource
+def load_model():
+    model_id = "AarambhAI/gemma-like-multimodal-speech-vision-text"
+    # Load Processor and Model
+    # We use trust_remote_code=True because this is a custom architecture
+    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float32, # float32 is safer for CPU
+        device_map="auto",
+        trust_remote_code=True
+    )
+    return model, processor
+# Load Model on App Start
+try:
+    with st.spinner("Initializing Gemma Multimodal Model..."):
+        model, processor = load_model()
+    MODEL_LOADED = True
+except Exception as e:
+    st.error(f"⚠️ Model Load Error: {e}")
+    MODEL_LOADED = False
 # -----------------------------------------------------------------------------
 # 2. STATE MANAGEMENT
 if 'result' not in st.session_state: st.session_state.result = None
 # -----------------------------------------------------------------------------
+# 3. THE BRAIN (Gemma Logic)
 # -----------------------------------------------------------------------------
+def get_gemma_insight(input_type, content):
+    if not MODEL_LOADED:
+        return "Error: Model not loaded."
     try:
+        # A. VISION ANALYSIS
         if input_type == "Image":
+            text_prompt = "Analyze this medical image and list observations."
+            # Gemma format input
+            inputs = processor(text=text_prompt, images=content, return_tensors="pt")
+            # Generate
+            with torch.no_grad():
+                output = model.generate(**inputs, max_new_tokens=200)
+            return processor.batch_decode(output, skip_special_tokens=True)[0]
+        # B. TEXT ANALYSIS
         elif input_type == "Text":
+            text_prompt = f"Medical analysis for: {content}"
+            inputs = processor(text=text_prompt, return_tensors="pt")
+            with torch.no_grad():
+                output = model.generate(**inputs, max_new_tokens=200)
+            return processor.batch_decode(output, skip_special_tokens=True)[0]
     except Exception as e:
+        return f"⚠️ Processing Error: {str(e)}"
 # -----------------------------------------------------------------------------
 # 4. UI STYLING (Clean Dark Theme)
 """, unsafe_allow_html=True)
 # -----------------------------------------------------------------------------
+# 5. NAVIGATION
 # -----------------------------------------------------------------------------
 def nav_to(page):
     st.session_state.page = page
     with c1:
         st.markdown("""
         <h1 style='font-size: 4rem; line-height: 1.1; margin-bottom: 20px;'>
+            Medical Intelligence.<br><span style='color:#38bdf8;'>Runs Locally.</span>
         </h1>
         <p style='font-size: 1.2rem; color: #94a3b8; margin-bottom: 40px;'>
+            SHINUI runs the specialized Gemma Multimodal model for secure analysis.
         </p>
         """, unsafe_allow_html=True)
         b1, b2 = st.columns([1, 2])
     with c2:
         st.markdown("""
         <div class='shinui-card'>
+            <h3>🧬 Gemma Multimodal</h3>
+            <p style='color:#94a3b8;'>Vision, Text & Speech capable.</p>
         </div>
         """, unsafe_allow_html=True)
     <div class='shinui-card'>
         <h2 style='color:#38bdf8'>About SHINUI</h2>
         <p style='font-size:1.1rem; line-height:1.6'>
+            SHINUI utilizes the <b>AarambhAI Gemma-like Multimodal</b> model.
+            This model is unique because it understands images, text, and speech natively in a single architecture.
         </p>
         <hr style='border-color:#333'>
         <h3>Capabilities</h3>
         <ul>
+            <li><b>Visual Diagnostics:</b> Reads medical images.</li>
+            <li><b>Clinical Text:</b> Analyzes symptoms and notes.</li>
         </ul>
     </div>
     """, unsafe_allow_html=True)
 # --- DASHBOARD ---
 def show_dashboard():
     with st.sidebar:
         st.markdown(f"### 👤 {st.session_state.user_email}")
+        if st.button("About System"): nav_to('about_internal')
         st.markdown("---")
         st.write("HISTORY")
         if st.session_state.history:
         st.markdown("---")
         if st.button("Sign Out"): sign_out()
+    st.title("Gemma Interface")
     t1, t2 = st.tabs(["📷 Image Scan", "📝 Text Analysis"])
     # TAB 1: IMAGE
         st.markdown("<div class='shinui-card'>", unsafe_allow_html=True)
         img_file = st.file_uploader("Upload Medical Image", type=['png','jpg','jpeg'])
         if img_file and st.button("Analyze Visual"):
+            if not MODEL_LOADED:
+                st.error("Model failed to load (Check Space Logs).")
+            else:
+                image = Image.open(img_file)
+                st.image(image, width=300)
+                with st.spinner("Gemma Processing..."):
+                    res = get_gemma_insight("Image", image)
+                    st.session_state.result = res
+                    st.session_state.history.append(f"Image: {res[:30]}...")
         st.markdown("</div>", unsafe_allow_html=True)
     # TAB 2: TEXT
         st.markdown("<div class='shinui-card'>", unsafe_allow_html=True)
         txt = st.text_area("Clinical Notes / Symptoms")
         if txt and st.button("Analyze Notes"):
+            if not MODEL_LOADED:
+                st.error("Model failed to load.")
+            else:
+                with st.spinner("Gemma Processing..."):
+                    res = get_gemma_insight("Text", txt)
+                    st.session_state.result = res
+                    st.session_state.history.append(f"Text: {res[:30]}...")
         st.markdown("</div>", unsafe_allow_html=True)
     if st.session_state.result:
         st.markdown(f"""
         <div class='shinui-card' style='border-left: 5px solid #38bdf8;'>
     st.markdown("""
     <div class='shinui-card'>
         <h2 style='color:#38bdf8'>System Status</h2>
+        <p><b>Model:</b> AarambhAI Gemma-like Multimodal</p>
+        <p><b>Backend:</b> Local Transformers</p>
     </div>
     """, unsafe_allow_html=True)