Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- Dockerfile +28 -0
- main.py +44 -0
- requirements.txt +7 -0
- start.sh +66 -0
- streamlit_app.py +69 -0
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9-slim
|
| 2 |
+
|
| 3 |
+
# Install curl and Ollama
|
| 4 |
+
RUN apt-get update && apt-get install -y curl && \
|
| 5 |
+
curl -fsSL https://ollama.ai/install.sh | sh && \
|
| 6 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
# Set up user and environment
|
| 9 |
+
RUN useradd -m -u 1000 user
|
| 10 |
+
USER user
|
| 11 |
+
ENV HOME=/home/user \
|
| 12 |
+
PATH="/home/user/.local/bin:$PATH"
|
| 13 |
+
|
| 14 |
+
WORKDIR $HOME/app
|
| 15 |
+
|
| 16 |
+
COPY --chown=user requirements.txt .
|
| 17 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 18 |
+
|
| 19 |
+
COPY --chown=user . .
|
| 20 |
+
|
| 21 |
+
# Make the start script executable
|
| 22 |
+
RUN chmod +x start.sh
|
| 23 |
+
|
| 24 |
+
# Expose ports for FastAPI (7860) and Streamlit (8501)
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
EXPOSE 8501
|
| 27 |
+
|
| 28 |
+
CMD ["./start.sh"]
|
main.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
from fastapi import FastAPI, HTTPException
|
| 4 |
+
from fastapi.responses import StreamingResponse
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from langchain_community.llms import Ollama
|
| 7 |
+
from langchain.callbacks.manager import CallbackManager
|
| 8 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
app = FastAPI()
|
| 14 |
+
MODEL_NAME = 'krishna_choudhary/AI_Assistant_Chatbot'
|
| 15 |
+
|
| 16 |
+
def get_llm():
|
| 17 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
| 18 |
+
return Ollama(model=MODEL_NAME, callback_manager=callback_manager)
|
| 19 |
+
|
| 20 |
+
class Question(BaseModel):
|
| 21 |
+
text: str
|
| 22 |
+
|
| 23 |
+
@app.get("/")
|
| 24 |
+
def read_root():
|
| 25 |
+
return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
|
| 26 |
+
|
| 27 |
+
@app.post("/ask")
|
| 28 |
+
async def ask_question(question: Question):
|
| 29 |
+
try:
|
| 30 |
+
llm = get_llm()
|
| 31 |
+
response = llm.invoke(question.text)
|
| 32 |
+
return {"response": response}
|
| 33 |
+
except Exception as e:
|
| 34 |
+
logger.error(f"Error processing question: {e}")
|
| 35 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@app.on_event("startup")
|
| 39 |
+
async def startup_event():
|
| 40 |
+
logger.info(f"Starting up with model: {MODEL_NAME}")
|
| 41 |
+
|
| 42 |
+
@app.on_event("shutdown")
|
| 43 |
+
async def shutdown_event():
|
| 44 |
+
logger.info("Shutting down")
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
langchain
|
| 4 |
+
langchain_community
|
| 5 |
+
ollama
|
| 6 |
+
streamlit
|
| 7 |
+
requests
|
start.sh
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Set environment variables for optimization
|
| 4 |
+
export OMP_NUM_THREADS=4
|
| 5 |
+
export MKL_NUM_THREADS=4
|
| 6 |
+
export CUDA_VISIBLE_DEVICES=0
|
| 7 |
+
|
| 8 |
+
echo "--- Starting Ollama, FastAPI, and Streamlit ---"
|
| 9 |
+
|
| 10 |
+
# Start Ollama in the background
|
| 11 |
+
echo "Starting Ollama server..."
|
| 12 |
+
ollama serve &
|
| 13 |
+
|
| 14 |
+
# Pull the model if not already present
|
| 15 |
+
MODEL_TO_PULL="krishna_choudhary/AI_Assistant_Chatbot"
|
| 16 |
+
if ! ollama list | grep -q "$MODEL_TO_PULL"; then
|
| 17 |
+
echo "Pulling Ollama model: $MODEL_TO_PULL"
|
| 18 |
+
ollama pull "$MODEL_TO_PULL"
|
| 19 |
+
else
|
| 20 |
+
echo "Ollama model $MODEL_TO_PULL already present."
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
# Wait for Ollama to start up
|
| 24 |
+
max_attempts=90 # Increased attempts as model pulling can take time on first run
|
| 25 |
+
attempt=0
|
| 26 |
+
echo "Waiting for Ollama to start (max $max_attempts seconds)..."
|
| 27 |
+
while ! curl -s http://localhost:11434/api/tags >/dev/null; do
|
| 28 |
+
sleep 1
|
| 29 |
+
attempt=$((attempt + 1))
|
| 30 |
+
if [ $attempt -eq $max_attempts ]; then
|
| 31 |
+
echo "Ollama failed to start within $((max_attempts)) seconds. Exiting."
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
done
|
| 35 |
+
echo "Ollama is ready."
|
| 36 |
+
|
| 37 |
+
# --- DEBUGGING: List files in current directory ---
|
| 38 |
+
echo "--- Files in current directory ($PWD): ---"
|
| 39 |
+
ls -l
|
| 40 |
+
echo "-------------------------------------------"
|
| 41 |
+
|
| 42 |
+
# Start the FastAPI server in the background
|
| 43 |
+
echo "Starting FastAPI server..."
|
| 44 |
+
uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1 --limit-concurrency 20 &
|
| 45 |
+
|
| 46 |
+
# Store the PID of the FastAPI server
|
| 47 |
+
FASTAPI_PID=$!
|
| 48 |
+
echo "FastAPI server started with PID: $FASTAPI_PID"
|
| 49 |
+
|
| 50 |
+
# Wait a moment for FastAPI to start (optional, but good practice)
|
| 51 |
+
sleep 5
|
| 52 |
+
|
| 53 |
+
# Start the Streamlit server in the background
|
| 54 |
+
echo "Starting Streamlit app..."
|
| 55 |
+
# Use 'python -m streamlit' for robustness in Docker environments
|
| 56 |
+
python -m streamlit run streamlit_app.py --server.port 8501 --server.address 0.0.0.0 &
|
| 57 |
+
|
| 58 |
+
# Store the PID of the Streamlit server
|
| 59 |
+
STREAMLIT_PID=$!
|
| 60 |
+
echo "Streamlit app started with PID: $STREAMLIT_PID"
|
| 61 |
+
|
| 62 |
+
echo "All services initiated. Keeping container alive..."
|
| 63 |
+
|
| 64 |
+
# Keep the script running indefinitely, so the Docker container doesn't exit.
|
| 65 |
+
# This is more robust than `wait -n` if one background process exits unexpectedly.
|
| 66 |
+
tail -f /dev/null
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
# FastAPI configuration
|
| 6 |
+
FASTAPI_URL = "http://localhost:7860/ask"
|
| 7 |
+
|
| 8 |
+
st.set_page_config(page_title="Ollama AI Assistant", page_icon="🤖", layout="wide")
|
| 9 |
+
|
| 10 |
+
# --- Session state for chat history ---
|
| 11 |
+
if 'chat_history' not in st.session_state:
|
| 12 |
+
st.session_state.chat_history = [
|
| 13 |
+
{"role": "assistant", "message": "Hello! How can I assist you today?"}
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
# --- App Header ---
|
| 17 |
+
st.title("🤖 Ollama AI Assistant")
|
| 18 |
+
st.caption("Start chatting with our AI assistant. Type your message below and press send.")
|
| 19 |
+
|
| 20 |
+
# --- Chat Display ---
|
| 21 |
+
st.markdown("---")
|
| 22 |
+
for chat in st.session_state.chat_history:
|
| 23 |
+
if chat["role"] == "assistant":
|
| 24 |
+
with st.chat_message("assistant", avatar="🤖"):
|
| 25 |
+
st.write(chat["message"])
|
| 26 |
+
else:
|
| 27 |
+
with st.chat_message("user"):
|
| 28 |
+
st.write(chat["message"])
|
| 29 |
+
|
| 30 |
+
# --- Input Area ---
|
| 31 |
+
with st.form("chat_form", clear_on_submit=True):
|
| 32 |
+
user_prompt = st.text_area(
|
| 33 |
+
"Type your message here...",
|
| 34 |
+
height=100,
|
| 35 |
+
placeholder="e.g., Explain quantum computing in simple terms.",
|
| 36 |
+
label_visibility="collapsed",
|
| 37 |
+
key="user_input_text_area"
|
| 38 |
+
)
|
| 39 |
+
submitted = st.form_submit_button("Send")
|
| 40 |
+
|
| 41 |
+
if submitted and user_prompt:
|
| 42 |
+
st.session_state.chat_history.append({"role": "user", "message": user_prompt})
|
| 43 |
+
with st.chat_message("assistant", avatar="🤖"):
|
| 44 |
+
st.write("Thinking...")
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
payload = {"text": user_prompt}
|
| 48 |
+
headers = {"Content-Type": "application/json"}
|
| 49 |
+
response = requests.post(FASTAPI_URL, data=json.dumps(payload), headers=headers)
|
| 50 |
+
|
| 51 |
+
if response.status_code == 200:
|
| 52 |
+
llm_response = response.json().get("response", "No response received.")
|
| 53 |
+
else:
|
| 54 |
+
llm_response = f"Error: FastAPI server returned {response.status_code}. Details: {response.text}"
|
| 55 |
+
|
| 56 |
+
except requests.exceptions.ConnectionError:
|
| 57 |
+
llm_response = f"Error: Cannot connect to the FastAPI server at {FASTAPI_URL}."
|
| 58 |
+
except Exception as e:
|
| 59 |
+
llm_response = f"Unexpected error: {e}"
|
| 60 |
+
|
| 61 |
+
st.session_state.chat_history.append({"role": "assistant", "message": llm_response})
|
| 62 |
+
st.rerun()
|
| 63 |
+
|
| 64 |
+
elif submitted and not user_prompt:
|
| 65 |
+
st.warning("Please enter a prompt before clicking 'Send'.")
|
| 66 |
+
|
| 67 |
+
# --- Footer ---
|
| 68 |
+
st.markdown("---")
|
| 69 |
+
st.caption("Powered by Ollama, FastAPI, and Streamlit.")
|