Spaces:

noodledom
/

focusflow

Sleeping

FocusFlow Assistant commited on Jan 7

Commit

8cab7c7

1 Parent(s): 7676567

Add hybrid deployment: Ollama (local) + Hugging Face (cloud)

- Created backend/config.py for LLM provider switching
- Added get_llm() function with environment-based configuration
- Supports LLM_PROVIDER env var (ollama/huggingface)
- Updated requirements.txt with HuggingFace dependencies
- Replaced all hardcoded Ollama calls with get_llm()

Local mode (default): Uses Ollama llama3.2:1b for offline
Cloud mode: Uses HF Llama-3-8B via Inference API for demo

Files changed (3) hide show

backend/config.py +50 -0
backend/rag_engine.py +31 -4
requirements.txt +4 -0

backend/config.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""
+Configuration system for FocusFlow LLM providers.
+Supports both local (Ollama) and cloud (Hugging Face) deployments.
+"""
+import os
+from enum import Enum
+class LLMProvider(Enum):
+    """Available LLM providers"""
+    OLLAMA = "ollama"
+    HUGGINGFACE = "huggingface"
+# Read from environment variable, default to Ollama (local)
+USE_PROVIDER = os.getenv("LLM_PROVIDER", "ollama").lower()
+# Configuration for both providers
+CONFIG = {
+    "llm_provider": LLMProvider.OLLAMA if USE_PROVIDER == "ollama" else LLMProvider.HUGGINGFACE,
+    # Local Ollama configuration (offline mode)
+    "ollama": {
+        "model": "llama3.2:1b",
+        "base_url": "http://localhost:11434"
+    },
+    # Hugging Face configuration (cloud demo mode)
+    "huggingface": {
+        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "api_token": os.getenv("HUGGINGFACE_API_TOKEN", ""),
+        "max_length": 512,
+        "temperature": 0.7
+    }
+}
+def get_llm_provider():
+    """Get the current LLM provider"""
+    return CONFIG["llm_provider"]
+def get_llm_config():
+    """Get configuration for the current provider"""
+    provider = get_llm_provider()
+    return CONFIG[provider.value]
+def is_local_mode():
+    """Check if running in local (offline) mode"""
+    return get_llm_provider() == LLMProvider.OLLAMA
+def is_cloud_mode():
+    """Check if running in cloud (online demo) mode"""
+    return get_llm_provider() == LLMProvider.HUGGINGFACE

backend/rag_engine.py CHANGED Viewed

@@ -4,9 +4,36 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_chroma import Chroma
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain_community.llms import Ollama
 CACHE_DIR = "./chroma_db"
 def ingest_document(file_path: str):
     """
     Ingests a PDF document into the vector database.
@@ -105,7 +132,7 @@ def generate_study_plan(user_request: str):
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
-    llm = Ollama(model="llama3.2:1b")
     # 1. Extract number of days from request (default to 5 if not specified)
     import re
@@ -215,7 +242,7 @@ def generate_lesson_content(topic_title: str):
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
-    llm = Ollama(model="llama3.2:1b")
     # 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
     docs = vector_store.similarity_search(topic_title, k=8)
@@ -309,7 +336,7 @@ def query_knowledge_base(question: str, history: list = []):
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
-    llm = Ollama(model="llama3.2:1b")
     # 1. Search
     docs = vector_store.similarity_search(question, k=3)
@@ -350,7 +377,7 @@ def generate_quiz_data(topic_title: str):
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
-    llm = Ollama(model="llama3.2:1b")
     # 1. Search Context
     docs = vector_store.similarity_search(topic_title, k=3)

 from langchain_chroma import Chroma
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain_community.llms import Ollama
+from backend.config import get_llm_provider, get_llm_config, LLMProvider
 CACHE_DIR = "./chroma_db"
+def get_llm():
+    """
+    Get LLM instance based on environment configuration.
+    Supports both local (Ollama) and cloud (Hugging Face) modes.
+    """
+    provider = get_llm_provider()
+    config = get_llm_config()
+    if provider == LLMProvider.OLLAMA:
+        # Local mode - uses Ollama for offline inference
+        return Ollama(
+            model=config["model"],
+            base_url=config.get("base_url", "http://localhost:11434")
+        )
+    else:
+        # Cloud mode - uses Hugging Face Inference API
+        from langchain_huggingface import HuggingFaceEndpoint
+        return HuggingFaceEndpoint(
+            repo_id=config["model"],
+            huggingfacehub_api_token=config["api_token"],
+            max_length=config.get("max_length", 512),
+            temperature=config.get("temperature", 0.7),
+            task="text-generation"
+        )
 def ingest_document(file_path: str):
     """
     Ingests a PDF document into the vector database.
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
+    llm = get_llm()
     # 1. Extract number of days from request (default to 5 if not specified)
     import re
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
+    llm = get_llm()
     # 1. Search DB for comprehensive context (increased from 4 to 8 chunks)
     docs = vector_store.similarity_search(topic_title, k=8)
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
+    llm = get_llm()
     # 1. Search
     docs = vector_store.similarity_search(question, k=3)
         persist_directory=CACHE_DIR,
         embedding_function=OllamaEmbeddings(model="nomic-embed-text")
     )
+    llm = get_llm()
     # 1. Search Context
     docs = vector_store.similarity_search(topic_title, k=3)

requirements.txt CHANGED Viewed

@@ -17,3 +17,7 @@ beautifulsoup4>=4.12.0
 youtube-transcript-api>=0.6.0
 pypdf>=3.17.0
 python-dotenv>=1.0.0

 youtube-transcript-api>=0.6.0
 pypdf>=3.17.0
 python-dotenv>=1.0.0
+# Hugging Face dependencies for cloud deployment
+huggingface-hub>=0.20.0
+langchain-huggingface>=0.0.1