Spaces:

JumaRubea
/

assistant

Sleeping

App Files Files Community

JumaRubea commited on Jul 31, 2025

Commit

043fc8b

verified ·

1 Parent(s): 01498e5

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +139 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,141 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
+import threading
+import torch
+import requests
 import streamlit as st
+from chats import init_db, get_all_chats, create_new_chat, save_message, get_messages, system_prompt
+# Set HF cache directory
+os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+# ------------------ FASTAPI BACKEND ------------------
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+import uvicorn
+from transformers import AutoTokenizer, AutoModelForCausalLM
+app = FastAPI()
+class GenerationRequest(BaseModel):
+    system_message: str
+    user_prompt: str
+# Load model/tokenizer once
+tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+@app.post("/api/ai-generate")
+async def generate_text_stream(request: GenerationRequest):
+    try:
+        messages = [
+            {"role": "system", "content": request.system_message},
+            {"role": "user", "content": request.user_prompt}
+        ]
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(device)
+        def token_stream():
+            generated = inputs["input_ids"]
+            # Generate tokens with return_dict_in_generate=True to access sequences
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=200,
+                do_sample=False,
+                temperature=0.5,
+                top_p=0.9,
+                eos_token_id=None,
+                pad_token_id=tokenizer.eos_token_id,
+                return_dict_in_generate=True,
+                output_scores=False
+            )
+            sequence = outputs.sequences[0]
+            # Decode tokens one by one as they come after prompt length
+            for i in range(generated.shape[-1], sequence.shape[-1]):
+                token_id = sequence[i].unsqueeze(0)
+                text = tokenizer.decode(token_id, skip_special_tokens=True)
+                if text.strip():
+                    yield text
+            yield "\n"
+        return StreamingResponse(token_stream(), media_type="text/plain")
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+def start_fastapi():
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+# Start FastAPI server in background thread
+threading.Thread(target=start_fastapi, daemon=True).start()
+# ------------------ STREAMLIT FRONTEND ------------------
+init_db()
+st.set_page_config(page_title="AI Assistant", page_icon="🤖")
+st.title("🤖 Juma's Assistant")
+st.sidebar.title("💬 Previous Chats")
+all_chats = get_all_chats()
+chat_titles = [f"{title} (ID: {chat_id})" for chat_id, title in all_chats]
+selected_chat_index = st.sidebar.selectbox(
+    "Select Chat", range(len(all_chats)), format_func=lambda i: chat_titles[i] if all_chats else "No chats available"
+)
+selected_chat_id = all_chats[selected_chat_index][0] if all_chats else None
+if st.sidebar.button("🆕 Start New Chat"):
+    selected_chat_id = create_new_chat()
+    st.experimental_rerun()
+if selected_chat_id is None:
+    st.warning("Please start a new chat or select one from the sidebar.")
+    st.stop()
+messages = get_messages(selected_chat_id)
+for role, content in messages:
+    with st.chat_message(role):
+        st.markdown(content)
+user_input = st.chat_input("Type your message...")
+if user_input:
+    st.chat_message("user").markdown(user_input)
+    save_message(selected_chat_id, "user", user_input)
+    with st.spinner("Thinking..."):
+        try:
+            response = requests.post(
+                "http://localhost:8000/api/ai-generate",
+                json={
+                    "system_message": system_prompt(),
+                    "user_prompt": user_input
+                },
+                stream=True,
+                timeout=120,
+            )
+            if response.status_code == 200:
+                full_response = ""
+                placeholder = st.empty()
+                # Stream tokens chunk by chunk
+                for chunk in response.iter_content(chunk_size=1):
+                    if chunk:
+                        decoded = chunk.decode("utf-8")
+                        full_response += decoded
+                        placeholder.markdown(full_response)
+                st.chat_message("assistant").markdown(full_response)
+                save_message(selected_chat_id, "assistant", full_response)
+            else:
+                st.error("API call failed.")
+        except Exception as e:
+            st.error(f"Error: {str(e)}")