teofizzy commited on
Commit
f8266e7
·
1 Parent(s): c60e74e

changed to use huggingface serverless endpoint with local CPU as a fallback

Browse files
Dockerfile CHANGED
@@ -34,15 +34,16 @@ COPY --chown=user . $HOME/app
34
 
35
  # 7. Startup
36
  # We clone the dataset. git-lfs ensures we get the big files.
 
37
  CMD git clone https://huggingface.co/datasets/teofizzy/mshauri-data data_download && \
38
  mv data_download/mshauri_fedha_v6.db . && \
39
  mv data_download/mshauri_fedha_chroma_db . && \
40
  rm -rf data_download && \
41
- echo "⬇️ Starting Ollama..." && \
42
  ollama serve & \
43
  sleep 10 && \
44
- echo "⬇️ Pulling Models..." && \
45
- ollama pull qwen2.5:7b && \
46
  ollama pull nomic-embed-text && \
47
- echo "Models Ready. Launching App..." && \
48
  streamlit run src/app.py --server.port 7860 --server.address 0.0.0.0
 
34
 
35
  # 7. Startup
36
  # We clone the dataset. git-lfs ensures we get the big files.
37
+ # CHANGE: Pulling 'qwen2.5:3b' instead of '7b' for a faster fallback.
38
  CMD git clone https://huggingface.co/datasets/teofizzy/mshauri-data data_download && \
39
  mv data_download/mshauri_fedha_v6.db . && \
40
  mv data_download/mshauri_fedha_chroma_db . && \
41
  rm -rf data_download && \
42
+ echo "Starting Ollama..." && \
43
  ollama serve & \
44
  sleep 10 && \
45
+ echo "Pulling Fallback Model (3B)..." && \
46
+ ollama pull qwen2.5:3b && \
47
  ollama pull nomic-embed-text && \
48
+ echo "Models Ready. Launching App..." && \
49
  streamlit run src/app.py --server.port 7860 --server.address 0.0.0.0
requirements.txt CHANGED
@@ -3,7 +3,8 @@ pandas
3
  numpy
4
  langchain-ollama
5
  langchain-community
 
6
  langchain-chroma
7
  chromadb
8
  huggingface_hub
9
- pysqlite3-binary
 
3
  numpy
4
  langchain-ollama
5
  langchain-community
6
+ langchain-huggingface
7
  langchain-chroma
8
  chromadb
9
  huggingface_hub
10
+ pysqlite3-binary
src/app.py CHANGED
@@ -28,7 +28,6 @@ if "messages" not in st.session_state:
28
 
29
  if "agent" not in st.session_state:
30
  with st.spinner("Initializing Mshauri Brain (Loading Models & Data)..."):
31
- # --- THE FIX IS HERE ---
32
  # SQLAlchemy requires a URI starting with sqlite:///
33
  # We use 4 slashes (sqlite:////) because it is an absolute path on Linux
34
  sql_path = f"sqlite:///{os.path.join(current_dir, 'mshauri_fedha_v6.db')}"
@@ -37,15 +36,14 @@ if "agent" not in st.session_state:
37
  # Check if data exists (Debugging for Space deployment)
38
  real_db_path = os.path.join(current_dir, "mshauri_fedha_v6.db")
39
  if not os.path.exists(real_db_path):
40
- st.error(f"Database not found at {real_db_path}. Did the clone fail?")
41
  st.stop()
42
 
43
  try:
44
- # Force the 7b model here to ensure CPU compatibility
45
  st.session_state.agent = create_mshauri_agent(
46
  sql_db_path=sql_path,
47
- vector_db_path=vector_path,
48
- llm_model="qwen2.5:7b"
49
  )
50
  st.success("System Ready!")
51
  except Exception as e:
 
28
 
29
  if "agent" not in st.session_state:
30
  with st.spinner("Initializing Mshauri Brain (Loading Models & Data)..."):
 
31
  # SQLAlchemy requires a URI starting with sqlite:///
32
  # We use 4 slashes (sqlite:////) because it is an absolute path on Linux
33
  sql_path = f"sqlite:///{os.path.join(current_dir, 'mshauri_fedha_v6.db')}"
 
36
  # Check if data exists (Debugging for Space deployment)
37
  real_db_path = os.path.join(current_dir, "mshauri_fedha_v6.db")
38
  if not os.path.exists(real_db_path):
39
+ st.error(f"Database not found at {real_db_path}. Did the clone fail?")
40
  st.stop()
41
 
42
  try:
43
+ # mshauri_demo.py to intelligently pick the API or Local model.
44
  st.session_state.agent = create_mshauri_agent(
45
  sql_db_path=sql_path,
46
+ vector_db_path=vector_path
 
47
  )
48
  st.success("System Ready!")
49
  except Exception as e:
src/load/mshauri_demo.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import re
3
  import sys
4
  import io
5
- from contextlib import redirect_stdout # <--- FIXED: Missing Import
 
6
  from langchain_ollama import ChatOllama
7
  from langchain_community.utilities import SQLDatabase
8
  from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
@@ -56,39 +57,39 @@ class SimpleReActAgent:
56
 
57
  # IMPROVED PROMPT: Explicitly tells agent to switch strategies if SQL fails
58
  self.prompt_template = """You are Mshauri Fedha, a senior financial advisor for Kenya.
59
- Your goal is to provide accurate, data-backed advice.
60
-
61
- RULES:
62
- 1. CITATIONS: You MUST cite your sources (,).
63
- - SQL Data ->
64
- - Text Data ->
65
- - Code -> PythonREPLTool
66
- 2. STRATEGY:
67
- - First, check SQL tables ('sql_db_list_tables').
68
- - IF the tables listed do NOT match the user's question, IMMEDIATELY switch to 'search_financial_reports_and_news'.
69
- - Do NOT keep asking for tables if they are clearly not there.
70
- 3. ADVICE: After presenting facts, add an "Advisory Opinion" section.
71
- 4. CONFIDENCE: If data is old, state "Low Confidence".
72
-
73
- Tools Available:
74
- {tool_desc}
75
-
76
- Use the following format:
77
-
78
- Question: the input question you must answer
79
- Thought: you should always think about what to do
80
- Thought: look at the tools and the question. Which tool is best?
81
- Action: the action to take, should be one of [{tool_names}]
82
- Action Input: the input to the action
83
- Observation: the result of the action
84
- ... (repeat Thought/Action/Observation as needed)
85
- Thought: I have enough info.
86
- Final Answer: the final answer with citations.
87
-
88
- Begin!
89
-
90
- Question: {input}
91
- Thought:{agent_scratchpad}"""
92
 
93
  def invoke(self, inputs):
94
  query = inputs["input"]
@@ -128,7 +129,7 @@ Thought:{agent_scratchpad}"""
128
 
129
  if action_name in self.tools:
130
  if self.verbose:
131
- print(f"🛠️ Calling '{action_name}' with: '{action_input}'")
132
 
133
  try:
134
  tool = self.tools[action_name]
@@ -142,7 +143,7 @@ Thought:{agent_scratchpad}"""
142
  # --- ADDED LOGGING HERE ---
143
  if self.verbose:
144
  # Print first 200 chars so we can see if it worked
145
- print(f"👀 Observation: {str(tool_result)[:200]}...")
146
 
147
  observation = f"\nObservation: {tool_result}\n"
148
  else:
@@ -163,22 +164,31 @@ def create_mshauri_agent(
163
  sql_db_path=DEFAULT_SQL_DB,
164
  vector_db_path=DEFAULT_VECTOR_DB,
165
  llm_model=DEFAULT_LLM_MODEL,
166
- ollama_url=DEFAULT_OLLAMA_URL
167
- ):
168
  print(f"⚙️ Initializing Mshauri Fedha (Model: {llm_model})...")
169
 
170
  # 1. Initialize LLM
171
- try:
172
- llm = ChatOllama(model=llm_model, base_url=ollama_url, temperature=0.1)
173
- except Exception as e:
174
- print(f" Error connecting to Ollama: {e}")
175
- return None
176
-
 
 
 
 
 
 
 
 
 
 
177
  # 2. LEFT BRAIN (SQL)
178
  if "sqlite" in sql_db_path:
179
  real_path = sql_db_path.replace("sqlite:///", "")
180
  if not os.path.exists(real_path):
181
- print(f"⚠️ Warning: SQL Database not found at {real_path}")
182
 
183
  db = SQLDatabase.from_uri(sql_db_path)
184
  sql_toolkit = SQLDatabaseToolkit(db=db, llm=llm)
@@ -205,7 +215,7 @@ def create_mshauri_agent(
205
  tools = sql_tools + [retriever_tool, repl_tool]
206
  agent = SimpleReActAgent(llm, tools)
207
 
208
- print(" Mshauri Agent Ready (Zero-Dependency Mode).")
209
  return agent
210
 
211
  def ask_mshauri(agent, query):
 
2
  import re
3
  import sys
4
  import io
5
+ from contextlib import redirect_stdout
6
+ from langchain_huggingface import HuggingFaceEndpoint
7
  from langchain_ollama import ChatOllama
8
  from langchain_community.utilities import SQLDatabase
9
  from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
 
57
 
58
  # IMPROVED PROMPT: Explicitly tells agent to switch strategies if SQL fails
59
  self.prompt_template = """You are Mshauri Fedha, a senior financial advisor for Kenya.
60
+ Your goal is to provide accurate, data-backed advice.
61
+
62
+ RULES:
63
+ 1. CITATIONS: You MUST cite your sources (,).
64
+ - SQL Data ->
65
+ - Text Data ->
66
+ - Code -> PythonREPLTool
67
+ 2. STRATEGY:
68
+ - First, check SQL tables ('sql_db_list_tables').
69
+ - IF the tables listed do NOT match the user's question, IMMEDIATELY switch to 'search_financial_reports_and_news'.
70
+ - Do NOT keep asking for tables if they are clearly not there.
71
+ 3. ADVICE: After presenting facts, add an "Advisory Opinion" section.
72
+ 4. CONFIDENCE: If data is old, state "Low Confidence".
73
+
74
+ Tools Available:
75
+ {tool_desc}
76
+
77
+ Use the following format:
78
+
79
+ Question: the input question you must answer
80
+ Thought: you should always think about what to do
81
+ Thought: look at the tools and the question. Which tool is best?
82
+ Action: the action to take, should be one of [{tool_names}]
83
+ Action Input: the input to the action
84
+ Observation: the result of the action
85
+ ... (repeat Thought/Action/Observation as needed)
86
+ Thought: I have enough info.
87
+ Final Answer: the final answer with citations.
88
+
89
+ Begin!
90
+
91
+ Question: {input}
92
+ Thought:{agent_scratchpad}"""
93
 
94
  def invoke(self, inputs):
95
  query = inputs["input"]
 
129
 
130
  if action_name in self.tools:
131
  if self.verbose:
132
+ print(f"Calling '{action_name}' with: '{action_input}'")
133
 
134
  try:
135
  tool = self.tools[action_name]
 
143
  # --- ADDED LOGGING HERE ---
144
  if self.verbose:
145
  # Print first 200 chars so we can see if it worked
146
+ print(f"Observation: {str(tool_result)[:200]}...")
147
 
148
  observation = f"\nObservation: {tool_result}\n"
149
  else:
 
164
  sql_db_path=DEFAULT_SQL_DB,
165
  vector_db_path=DEFAULT_VECTOR_DB,
166
  llm_model=DEFAULT_LLM_MODEL,
167
+ ollama_url=DEFAULT_OLLAMA_URL):
 
168
  print(f"⚙️ Initializing Mshauri Fedha (Model: {llm_model})...")
169
 
170
  # 1. Initialize LLM
171
+ hf_token = os.getenv("HF_TOKEN")
172
+
173
+ if hf_token:
174
+ print("Using Hugging Face Serverless API")
175
+ # We can use the massive 72B model because we aren't hosting it!
176
+ llm = HuggingFaceEndpoint(
177
+ repo_id="Qwen/Qwen2.5-72B-Instruct",
178
+ task="text-generation",
179
+ max_new_tokens=512,
180
+ repetition_penalty=1.1,
181
+ temperature=0.2,
182
+ huggingfacehub_api_token=hf_token
183
+ )
184
+ else:
185
+ print("Using Local CPU Ollama (Slow)")
186
+ llm = ChatOllama(model="qwen2.5:7b", base_url=ollama_url, temperature=0.1)
187
  # 2. LEFT BRAIN (SQL)
188
  if "sqlite" in sql_db_path:
189
  real_path = sql_db_path.replace("sqlite:///", "")
190
  if not os.path.exists(real_path):
191
+ print(f"Warning: SQL Database not found at {real_path}")
192
 
193
  db = SQLDatabase.from_uri(sql_db_path)
194
  sql_toolkit = SQLDatabaseToolkit(db=db, llm=llm)
 
215
  tools = sql_tools + [retriever_tool, repl_tool]
216
  agent = SimpleReActAgent(llm, tools)
217
 
218
+ print(" Mshauri Agent Ready (Zero-Dependency Mode).")
219
  return agent
220
 
221
  def ask_mshauri(agent, query):
src/load/start_ollama.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import subprocess
3
  import time
4
  import requests
 
 
5
  from pathlib import Path
6
 
7
  def start_ollama_server():
@@ -49,9 +51,7 @@ def start_ollama_server():
49
  print(f" Failed to start server: {e}")
50
  return False
51
 
52
- import requests
53
- import json
54
- import sys
55
 
56
  def pull_embedding_model(model_name="nomic-embed-text"):
57
  url = "http://127.0.0.1:25000/api/pull"
 
2
  import subprocess
3
  import time
4
  import requests
5
+ import json
6
+ import sys
7
  from pathlib import Path
8
 
9
  def start_ollama_server():
 
51
  print(f" Failed to start server: {e}")
52
  return False
53
 
54
+
 
 
55
 
56
  def pull_embedding_model(model_name="nomic-embed-text"):
57
  url = "http://127.0.0.1:25000/api/pull"