Spaces:

mrfirdauss
/

QALocalLLM

Sleeping

App Files Files Community

mrfirdauss commited on Sep 10, 2025

Commit

70db80b

1 Parent(s): 2362139

fix: change ollama to api

Browse files

Files changed (7) hide show

Dockerfile +1 -11
docker-compose.yaml +0 -44
entrypoint.sh +0 -9
requirements.txt +0 -1
src/FinancialAgent.py +1 -3
src/FinancialAgentOllama.py +24 -16
src/OllamaAPI.py +0 -0

Dockerfile CHANGED Viewed

@@ -8,23 +8,13 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
-RUN curl -fsSL https://ollama.com/install.sh | sh
 COPY requirements.txt ./
 RUN pip3 install --no-cache-dir -r requirements.txt
-ENV OLLAMA_HOME=/app/.ollama
-ENV HF_HOME=/app/.cache/huggingface
-RUN mkdir -p $OLLAMA_HOME $HF_HOME
-RUN chmod 777 $OLLAMA_HOME $HF_HOME
 COPY src/ ./src/
 EXPOSE 8501
-EXPOSE 11434
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
-CMD ollama serve & \
-    streamlit run src/streamlit_app.py --server.port=8501 --server.address=0.0.0.0

     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 RUN pip3 install --no-cache-dir -r requirements.txt
 COPY src/ ./src/
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
+CMD ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

docker-compose.yaml DELETED Viewed

@@ -1,44 +0,0 @@
-version: "3.9"
-services:
-  ollama:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    command: >
-      sh -c "
-        ollama serve &
-        sleep 3 &&
-        ollama pull qwen3:4b &&
-        tail -f /dev/null
-      "
-    volumes:
-      - ollama-models:/root/.ollama
-    ports:
-      - "11434:11434"   # Ollama server port
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-  streamlit:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    command: >
-      streamlit run src/streamlit_app.py
-      --server.port=8501
-      --server.address=0.0.0.0
-    ports:
-      - "8501:8501"
-    depends_on:
-      ollama:
-        condition: service_healthy
-    environment:
-      OLLAMA_HOST: http://ollama:11434
-    volumes:
-      - .:/app
-volumes:
-  ollama-models:

entrypoint.sh DELETED Viewed

@@ -1,9 +0,0 @@
-ollama serve &
-echo "serving ollama"
-echo "sleeping for 5 sec"
-sleep 5
-echo "start steamlit"
-streamlit run src/streamlit_app.py --server.port=8501 --server.address=0.0.0.0

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-ollama
 pandas
 streamlit
 matplotlib

 pandas
 streamlit
 matplotlib

src/FinancialAgent.py CHANGED Viewed

@@ -20,11 +20,9 @@ class FinancialAgentFactory(ABC):
             self.st.session_state.messages = []
         self.st.session_state["openai_model"] = self.model_name
-    @abstractmethod
     def render_header(self, header="Financial Agent"):
         self.st.title(header)
-    @abstractmethod
     def render_messages(self):
         """Render previous chat messages."""
         for message in self.st.session_state.messages:

             self.st.session_state.messages = []
         self.st.session_state["openai_model"] = self.model_name
     def render_header(self, header="Financial Agent"):
         self.st.title(header)
     def render_messages(self):
         """Render previous chat messages."""
         for message in self.st.session_state.messages:

src/FinancialAgentOllama.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ollama import chat, pull
 from FinancialAgent import FinancialAgentFactory
 from prompt import REFINERY_PROMPT
 from models import ResponseState
@@ -8,24 +8,25 @@ import streamlit as st
 import matplotlib.pyplot as plt
 from langchain_community.vectorstores import FAISS
 class FinancialAgentOllama(FinancialAgentFactory):
     """Concrete Financial Agent using Ollama."""
-    def __init__(self, st, model_name="deepseek-r1:8b", embedding=None):
-        pull(model_name)
         super().__init__(st, model_name)
-        self.client = chat
         self.vector_db = FAISS.load_local("vs_68bf713eea2c81919ac08298a05d6704", embedding, allow_dangerous_deserialization=True)
     def __stream_answer__(self, instructions, input_messages):
-        response_stream = self.client(
-            message=input_messages + [{"role": "user", "content": instructions}],
             model=self.model_name,
             stream=True
         )
         for chunk in response_stream:
-            yield chunk.message.content
     def generate_final_answer(self, context_prompt):
         """Generate final answer using context."""
@@ -68,20 +69,27 @@ class FinancialAgentOllama(FinancialAgentFactory):
             self.st.markdown(prompt)
         # Step 1: Run refinery prompt
-        response = self.client(
-            message=[{"role": m["role"], "content": m["content"]} for m in self.st.session_state.messages] +
-                    [{"role": "user", "content": REFINERY_PROMPT.format(
                         response_format=ResponseState.model_json_schema(),
                         df_head=self.df.head().to_markdown(),
                         df_columns=self.df.columns.tolist(),
                         df_sample=self.df.sample(5).to_markdown()
-                    )}],
-            model=self.model_name,
-            stream=False,
-            format=ResponseState
         )
-        response_state: ResponseState = ResponseState.model_validate_json(response.message.content)
         # Step 2: Check if context is needed
         if response_state.isNeedContext:

 from FinancialAgent import FinancialAgentFactory
 from prompt import REFINERY_PROMPT
 from models import ResponseState
 import matplotlib.pyplot as plt
 from langchain_community.vectorstores import FAISS
+from OllamaAPI import OllamaAPIClient
 class FinancialAgentOllama(FinancialAgentFactory):
     """Concrete Financial Agent using Ollama."""
+    def __init__(self, st, model_name="qwen3:4b", url="https://mrfirdauss-ollama-api.hf.space", embedding=None):
         super().__init__(st, model_name)
+        self.client = OllamaAPIClient(url)
         self.vector_db = FAISS.load_local("vs_68bf713eea2c81919ac08298a05d6704", embedding, allow_dangerous_deserialization=True)
     def __stream_answer__(self, instructions, input_messages):
+        response_stream = self.client.chat(
             model=self.model_name,
+            messages=input_messages + [{"role": "user", "content": instructions}],
             stream=True
         )
         for chunk in response_stream:
+            if "message" in chunk and "content" in chunk["message"]:
+                yield chunk["message"]["content"]
     def generate_final_answer(self, context_prompt):
         """Generate final answer using context."""
             self.st.markdown(prompt)
         # Step 1: Run refinery prompt
+        response = self.client.chat(
+            model=self.model_name,
+            messages=[{"role": m["role"], "content": m["content"]}
+                    for m in self.st.session_state.messages] + [
+                {
+                    "role": "user",
+                    "content": REFINERY_PROMPT.format(
                         response_format=ResponseState.model_json_schema(),
                         df_head=self.df.head().to_markdown(),
                         df_columns=self.df.columns.tolist(),
                         df_sample=self.df.sample(5).to_markdown()
+                    )
+                }
+            ],
+            format= ResponseState,
+            stream=False
         )
+        response_state: ResponseState = ResponseState.model_validate_json(
+            response["message"]["content"]
+        )
         # Step 2: Check if context is needed
         if response_state.isNeedContext:

src/OllamaAPI.py ADDED Viewed

File without changes