FocusFlow Assistant commited on
Commit
8cab7c7
·
1 Parent(s): 7676567

Add hybrid deployment: Ollama (local) + Hugging Face (cloud)

Browse files

- Created backend/config.py for LLM provider switching
- Added get_llm() function with environment-based configuration
- Supports LLM_PROVIDER env var (ollama/huggingface)
- Updated requirements.txt with HuggingFace dependencies
- Replaced all hardcoded Ollama calls with get_llm()

Local mode (default): Uses Ollama llama3.2:1b for offline
Cloud mode: Uses HF Llama-3-8B via Inference API for demo

Files changed (3) hide show
  1. backend/config.py +50 -0
  2. backend/rag_engine.py +31 -4
  3. requirements.txt +4 -0
backend/config.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration system for FocusFlow LLM providers.
3
+ Supports both local (Ollama) and cloud (Hugging Face) deployments.
4
+ """
5
+ import os
6
+ from enum import Enum
7
+
8
+ class LLMProvider(Enum):
9
+ """Available LLM providers"""
10
+ OLLAMA = "ollama"
11
+ HUGGINGFACE = "huggingface"
12
+
13
+ # Read from environment variable, default to Ollama (local)
14
+ USE_PROVIDER = os.getenv("LLM_PROVIDER", "ollama").lower()
15
+
16
+ # Configuration for both providers
17
+ CONFIG = {
18
+ "llm_provider": LLMProvider.OLLAMA if USE_PROVIDER == "ollama" else LLMProvider.HUGGINGFACE,
19
+
20
+ # Local Ollama configuration (offline mode)
21
+ "ollama": {
22
+ "model": "llama3.2:1b",
23
+ "base_url": "http://localhost:11434"
24
+ },
25
+
26
+ # Hugging Face configuration (cloud demo mode)
27
+ "huggingface": {
28
+ "model": "meta-llama/Meta-Llama-3-8B-Instruct",
29
+ "api_token": os.getenv("HUGGINGFACE_API_TOKEN", ""),
30
+ "max_length": 512,
31
+ "temperature": 0.7
32
+ }
33
+ }
34
+
35
+ def get_llm_provider():
36
+ """Get the current LLM provider"""
37
+ return CONFIG["llm_provider"]
38
+
39
+ def get_llm_config():
40
+ """Get configuration for the current provider"""
41
+ provider = get_llm_provider()
42
+ return CONFIG[provider.value]
43
+
44
+ def is_local_mode():
45
+ """Check if running in local (offline) mode"""
46
+ return get_llm_provider() == LLMProvider.OLLAMA
47
+
48
+ def is_cloud_mode():
49
+ """Check if running in cloud (online demo) mode"""
50
+ return get_llm_provider() == LLMProvider.HUGGINGFACE
backend/rag_engine.py CHANGED
@@ -4,9 +4,36 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  from langchain_chroma import Chroma
5
  from langchain_community.embeddings import OllamaEmbeddings
6
  from langchain_community.llms import Ollama
 
7
 
8
  CACHE_DIR = "./chroma_db"
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def ingest_document(file_path: str):
11
  """
12
  Ingests a PDF document into the vector database.
@@ -105,7 +132,7 @@ def generate_study_plan(user_request: str):
105
  persist_directory=CACHE_DIR,
106
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
107
  )
108
- llm = Ollama(model="llama3.2:1b")
109
 
110
  # 1. Extract number of days from request (default to 5 if not specified)
111
  import re
@@ -215,7 +242,7 @@ def generate_lesson_content(topic_title: str):
215
  persist_directory=CACHE_DIR,
216
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
217
  )
218
- llm = Ollama(model="llama3.2:1b")
219
 
220
  # 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
221
  docs = vector_store.similarity_search(topic_title, k=8)
@@ -309,7 +336,7 @@ def query_knowledge_base(question: str, history: list = []):
309
  persist_directory=CACHE_DIR,
310
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
311
  )
312
- llm = Ollama(model="llama3.2:1b")
313
 
314
  # 1. Search
315
  docs = vector_store.similarity_search(question, k=3)
@@ -350,7 +377,7 @@ def generate_quiz_data(topic_title: str):
350
  persist_directory=CACHE_DIR,
351
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
352
  )
353
- llm = Ollama(model="llama3.2:1b")
354
 
355
  # 1. Search Context
356
  docs = vector_store.similarity_search(topic_title, k=3)
 
4
  from langchain_chroma import Chroma
5
  from langchain_community.embeddings import OllamaEmbeddings
6
  from langchain_community.llms import Ollama
7
+ from backend.config import get_llm_provider, get_llm_config, LLMProvider
8
 
9
  CACHE_DIR = "./chroma_db"
10
 
11
+ def get_llm():
12
+ """
13
+ Get LLM instance based on environment configuration.
14
+ Supports both local (Ollama) and cloud (Hugging Face) modes.
15
+ """
16
+ provider = get_llm_provider()
17
+ config = get_llm_config()
18
+
19
+ if provider == LLMProvider.OLLAMA:
20
+ # Local mode - uses Ollama for offline inference
21
+ return Ollama(
22
+ model=config["model"],
23
+ base_url=config.get("base_url", "http://localhost:11434")
24
+ )
25
+ else:
26
+ # Cloud mode - uses Hugging Face Inference API
27
+ from langchain_huggingface import HuggingFaceEndpoint
28
+ return HuggingFaceEndpoint(
29
+ repo_id=config["model"],
30
+ huggingfacehub_api_token=config["api_token"],
31
+ max_length=config.get("max_length", 512),
32
+ temperature=config.get("temperature", 0.7),
33
+ task="text-generation"
34
+ )
35
+
36
+
37
  def ingest_document(file_path: str):
38
  """
39
  Ingests a PDF document into the vector database.
 
132
  persist_directory=CACHE_DIR,
133
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
134
  )
135
+ llm = get_llm()
136
 
137
  # 1. Extract number of days from request (default to 5 if not specified)
138
  import re
 
242
  persist_directory=CACHE_DIR,
243
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
244
  )
245
+ llm = get_llm()
246
 
247
  # 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
248
  docs = vector_store.similarity_search(topic_title, k=8)
 
336
  persist_directory=CACHE_DIR,
337
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
338
  )
339
+ llm = get_llm()
340
 
341
  # 1. Search
342
  docs = vector_store.similarity_search(question, k=3)
 
377
  persist_directory=CACHE_DIR,
378
  embedding_function=OllamaEmbeddings(model="nomic-embed-text")
379
  )
380
+ llm = get_llm()
381
 
382
  # 1. Search Context
383
  docs = vector_store.similarity_search(topic_title, k=3)
requirements.txt CHANGED
@@ -17,3 +17,7 @@ beautifulsoup4>=4.12.0
17
  youtube-transcript-api>=0.6.0
18
  pypdf>=3.17.0
19
  python-dotenv>=1.0.0
 
 
 
 
 
17
  youtube-transcript-api>=0.6.0
18
  pypdf>=3.17.0
19
  python-dotenv>=1.0.0
20
+
21
+ # Hugging Face dependencies for cloud deployment
22
+ huggingface-hub>=0.20.0
23
+ langchain-huggingface>=0.0.1