FocusFlow Assistant commited on
Commit ·
8cab7c7
1
Parent(s): 7676567
Add hybrid deployment: Ollama (local) + Hugging Face (cloud)
Browse files- Created backend/config.py for LLM provider switching
- Added get_llm() function with environment-based configuration
- Supports LLM_PROVIDER env var (ollama/huggingface)
- Updated requirements.txt with HuggingFace dependencies
- Replaced all hardcoded Ollama calls with get_llm()
Local mode (default): Uses Ollama llama3.2:1b for offline
Cloud mode: Uses HF Llama-3-8B via Inference API for demo
- backend/config.py +50 -0
- backend/rag_engine.py +31 -4
- requirements.txt +4 -0
backend/config.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration system for FocusFlow LLM providers.
|
| 3 |
+
Supports both local (Ollama) and cloud (Hugging Face) deployments.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
from enum import Enum
|
| 7 |
+
|
| 8 |
+
class LLMProvider(Enum):
|
| 9 |
+
"""Available LLM providers"""
|
| 10 |
+
OLLAMA = "ollama"
|
| 11 |
+
HUGGINGFACE = "huggingface"
|
| 12 |
+
|
| 13 |
+
# Read from environment variable, default to Ollama (local)
|
| 14 |
+
USE_PROVIDER = os.getenv("LLM_PROVIDER", "ollama").lower()
|
| 15 |
+
|
| 16 |
+
# Configuration for both providers
|
| 17 |
+
CONFIG = {
|
| 18 |
+
"llm_provider": LLMProvider.OLLAMA if USE_PROVIDER == "ollama" else LLMProvider.HUGGINGFACE,
|
| 19 |
+
|
| 20 |
+
# Local Ollama configuration (offline mode)
|
| 21 |
+
"ollama": {
|
| 22 |
+
"model": "llama3.2:1b",
|
| 23 |
+
"base_url": "http://localhost:11434"
|
| 24 |
+
},
|
| 25 |
+
|
| 26 |
+
# Hugging Face configuration (cloud demo mode)
|
| 27 |
+
"huggingface": {
|
| 28 |
+
"model": "meta-llama/Meta-Llama-3-8B-Instruct",
|
| 29 |
+
"api_token": os.getenv("HUGGINGFACE_API_TOKEN", ""),
|
| 30 |
+
"max_length": 512,
|
| 31 |
+
"temperature": 0.7
|
| 32 |
+
}
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
def get_llm_provider():
|
| 36 |
+
"""Get the current LLM provider"""
|
| 37 |
+
return CONFIG["llm_provider"]
|
| 38 |
+
|
| 39 |
+
def get_llm_config():
|
| 40 |
+
"""Get configuration for the current provider"""
|
| 41 |
+
provider = get_llm_provider()
|
| 42 |
+
return CONFIG[provider.value]
|
| 43 |
+
|
| 44 |
+
def is_local_mode():
|
| 45 |
+
"""Check if running in local (offline) mode"""
|
| 46 |
+
return get_llm_provider() == LLMProvider.OLLAMA
|
| 47 |
+
|
| 48 |
+
def is_cloud_mode():
|
| 49 |
+
"""Check if running in cloud (online demo) mode"""
|
| 50 |
+
return get_llm_provider() == LLMProvider.HUGGINGFACE
|
backend/rag_engine.py
CHANGED
|
@@ -4,9 +4,36 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
| 4 |
from langchain_chroma import Chroma
|
| 5 |
from langchain_community.embeddings import OllamaEmbeddings
|
| 6 |
from langchain_community.llms import Ollama
|
|
|
|
| 7 |
|
| 8 |
CACHE_DIR = "./chroma_db"
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def ingest_document(file_path: str):
|
| 11 |
"""
|
| 12 |
Ingests a PDF document into the vector database.
|
|
@@ -105,7 +132,7 @@ def generate_study_plan(user_request: str):
|
|
| 105 |
persist_directory=CACHE_DIR,
|
| 106 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 107 |
)
|
| 108 |
-
llm =
|
| 109 |
|
| 110 |
# 1. Extract number of days from request (default to 5 if not specified)
|
| 111 |
import re
|
|
@@ -215,7 +242,7 @@ def generate_lesson_content(topic_title: str):
|
|
| 215 |
persist_directory=CACHE_DIR,
|
| 216 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 217 |
)
|
| 218 |
-
llm =
|
| 219 |
|
| 220 |
# 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
|
| 221 |
docs = vector_store.similarity_search(topic_title, k=8)
|
|
@@ -309,7 +336,7 @@ def query_knowledge_base(question: str, history: list = []):
|
|
| 309 |
persist_directory=CACHE_DIR,
|
| 310 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 311 |
)
|
| 312 |
-
llm =
|
| 313 |
|
| 314 |
# 1. Search
|
| 315 |
docs = vector_store.similarity_search(question, k=3)
|
|
@@ -350,7 +377,7 @@ def generate_quiz_data(topic_title: str):
|
|
| 350 |
persist_directory=CACHE_DIR,
|
| 351 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 352 |
)
|
| 353 |
-
llm =
|
| 354 |
|
| 355 |
# 1. Search Context
|
| 356 |
docs = vector_store.similarity_search(topic_title, k=3)
|
|
|
|
| 4 |
from langchain_chroma import Chroma
|
| 5 |
from langchain_community.embeddings import OllamaEmbeddings
|
| 6 |
from langchain_community.llms import Ollama
|
| 7 |
+
from backend.config import get_llm_provider, get_llm_config, LLMProvider
|
| 8 |
|
| 9 |
CACHE_DIR = "./chroma_db"
|
| 10 |
|
| 11 |
+
def get_llm():
|
| 12 |
+
"""
|
| 13 |
+
Get LLM instance based on environment configuration.
|
| 14 |
+
Supports both local (Ollama) and cloud (Hugging Face) modes.
|
| 15 |
+
"""
|
| 16 |
+
provider = get_llm_provider()
|
| 17 |
+
config = get_llm_config()
|
| 18 |
+
|
| 19 |
+
if provider == LLMProvider.OLLAMA:
|
| 20 |
+
# Local mode - uses Ollama for offline inference
|
| 21 |
+
return Ollama(
|
| 22 |
+
model=config["model"],
|
| 23 |
+
base_url=config.get("base_url", "http://localhost:11434")
|
| 24 |
+
)
|
| 25 |
+
else:
|
| 26 |
+
# Cloud mode - uses Hugging Face Inference API
|
| 27 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
| 28 |
+
return HuggingFaceEndpoint(
|
| 29 |
+
repo_id=config["model"],
|
| 30 |
+
huggingfacehub_api_token=config["api_token"],
|
| 31 |
+
max_length=config.get("max_length", 512),
|
| 32 |
+
temperature=config.get("temperature", 0.7),
|
| 33 |
+
task="text-generation"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
def ingest_document(file_path: str):
|
| 38 |
"""
|
| 39 |
Ingests a PDF document into the vector database.
|
|
|
|
| 132 |
persist_directory=CACHE_DIR,
|
| 133 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 134 |
)
|
| 135 |
+
llm = get_llm()
|
| 136 |
|
| 137 |
# 1. Extract number of days from request (default to 5 if not specified)
|
| 138 |
import re
|
|
|
|
| 242 |
persist_directory=CACHE_DIR,
|
| 243 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 244 |
)
|
| 245 |
+
llm = get_llm()
|
| 246 |
|
| 247 |
# 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
|
| 248 |
docs = vector_store.similarity_search(topic_title, k=8)
|
|
|
|
| 336 |
persist_directory=CACHE_DIR,
|
| 337 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 338 |
)
|
| 339 |
+
llm = get_llm()
|
| 340 |
|
| 341 |
# 1. Search
|
| 342 |
docs = vector_store.similarity_search(question, k=3)
|
|
|
|
| 377 |
persist_directory=CACHE_DIR,
|
| 378 |
embedding_function=OllamaEmbeddings(model="nomic-embed-text")
|
| 379 |
)
|
| 380 |
+
llm = get_llm()
|
| 381 |
|
| 382 |
# 1. Search Context
|
| 383 |
docs = vector_store.similarity_search(topic_title, k=3)
|
requirements.txt
CHANGED
|
@@ -17,3 +17,7 @@ beautifulsoup4>=4.12.0
|
|
| 17 |
youtube-transcript-api>=0.6.0
|
| 18 |
pypdf>=3.17.0
|
| 19 |
python-dotenv>=1.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
youtube-transcript-api>=0.6.0
|
| 18 |
pypdf>=3.17.0
|
| 19 |
python-dotenv>=1.0.0
|
| 20 |
+
|
| 21 |
+
# Hugging Face dependencies for cloud deployment
|
| 22 |
+
huggingface-hub>=0.20.0
|
| 23 |
+
langchain-huggingface>=0.0.1
|