37-AN commited on
Commit ·
31cd25b
1
Parent(s): 8faa239
Initial commit for Hugging Face Space deployment
Browse files- Dockerfile +5 -1
- app/core/llm.py +58 -17
Dockerfile
CHANGED
|
@@ -30,10 +30,14 @@ COPY . .
|
|
| 30 |
RUN mkdir -p data/documents data/vector_db && \
|
| 31 |
chmod -R 777 data
|
| 32 |
|
| 33 |
-
# Set environment
|
| 34 |
ENV TOKENIZERS_PARALLELISM=false
|
| 35 |
ENV HF_HOME=/app/.cache
|
| 36 |
ENV XDG_CACHE_HOME=/app/.cache
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# Expose the port required by Hugging Face Spaces
|
| 39 |
EXPOSE 7860
|
|
|
|
| 30 |
RUN mkdir -p data/documents data/vector_db && \
|
| 31 |
chmod -R 777 data
|
| 32 |
|
| 33 |
+
# Set environment variables
|
| 34 |
ENV TOKENIZERS_PARALLELISM=false
|
| 35 |
ENV HF_HOME=/app/.cache
|
| 36 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 37 |
+
ENV HUGGINGFACEHUB_API_TOKEN=""
|
| 38 |
+
ENV HF_API_KEY=""
|
| 39 |
+
ENV LLM_MODEL="google/flan-t5-small"
|
| 40 |
+
ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
|
| 41 |
|
| 42 |
# Expose the port required by Hugging Face Spaces
|
| 43 |
EXPOSE 7860
|
app/core/llm.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from langchain.llms import HuggingFaceHub
|
|
|
|
| 2 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 3 |
from langchain.chains import LLMChain
|
| 4 |
from langchain.prompts import PromptTemplate
|
|
@@ -11,20 +12,53 @@ from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATU
|
|
| 11 |
|
| 12 |
def get_llm():
|
| 13 |
"""Initialize and return the language model."""
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def get_embeddings():
|
| 30 |
"""Initialize and return the embeddings model."""
|
|
@@ -39,10 +73,17 @@ def get_embeddings():
|
|
| 39 |
cache_dir = None
|
| 40 |
|
| 41 |
# SentenceTransformers can be used locally without an API key
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def get_chat_model():
|
| 48 |
"""
|
|
|
|
| 1 |
from langchain.llms import HuggingFaceHub
|
| 2 |
+
from langchain_community.llms import HuggingFaceEndpoint
|
| 3 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 4 |
from langchain.chains import LLMChain
|
| 5 |
from langchain.prompts import PromptTemplate
|
|
|
|
| 12 |
|
| 13 |
def get_llm():
|
| 14 |
"""Initialize and return the language model."""
|
| 15 |
+
# Set up cache directories with proper permissions
|
| 16 |
+
cache_dir = "/app/models"
|
| 17 |
+
if not os.path.exists(cache_dir):
|
| 18 |
+
try:
|
| 19 |
+
os.makedirs(cache_dir, exist_ok=True)
|
| 20 |
+
os.chmod(cache_dir, 0o777)
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Warning: Could not create cache directory: {e}")
|
| 23 |
+
cache_dir = None
|
| 24 |
+
|
| 25 |
+
# Set environment variable for Hugging Face Hub
|
| 26 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
|
| 27 |
|
| 28 |
+
# For Hugging Face Spaces, we'll use a simpler model approach
|
| 29 |
+
# that doesn't require authentication for free models
|
| 30 |
+
try:
|
| 31 |
+
if HF_API_KEY:
|
| 32 |
+
# If we have an API key, use the HuggingFaceHub
|
| 33 |
+
llm = HuggingFaceHub(
|
| 34 |
+
huggingfacehub_api_token=HF_API_KEY,
|
| 35 |
+
repo_id=LLM_MODEL,
|
| 36 |
+
model_kwargs={
|
| 37 |
+
"temperature": DEFAULT_TEMPERATURE,
|
| 38 |
+
"max_length": MAX_TOKENS
|
| 39 |
+
}
|
| 40 |
+
)
|
| 41 |
+
else:
|
| 42 |
+
# If no API key, inform the user
|
| 43 |
+
print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
|
| 44 |
+
llm = HuggingFaceEndpoint(
|
| 45 |
+
endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
|
| 46 |
+
task="text-generation",
|
| 47 |
+
model_kwargs={
|
| 48 |
+
"temperature": DEFAULT_TEMPERATURE,
|
| 49 |
+
"max_length": MAX_TOKENS
|
| 50 |
+
}
|
| 51 |
+
)
|
| 52 |
+
return llm
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"Error initializing Hugging Face LLM: {e}")
|
| 55 |
+
print("Using a fallback approach with a mock LLM.")
|
| 56 |
+
|
| 57 |
+
# Create a very simple mock LLM for fallback
|
| 58 |
+
from langchain.llms.fake import FakeListLLM
|
| 59 |
+
return FakeListLLM(
|
| 60 |
+
responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
|
| 61 |
+
)
|
| 62 |
|
| 63 |
def get_embeddings():
|
| 64 |
"""Initialize and return the embeddings model."""
|
|
|
|
| 73 |
cache_dir = None
|
| 74 |
|
| 75 |
# SentenceTransformers can be used locally without an API key
|
| 76 |
+
try:
|
| 77 |
+
return HuggingFaceEmbeddings(
|
| 78 |
+
model_name=EMBEDDING_MODEL,
|
| 79 |
+
cache_folder=cache_dir
|
| 80 |
+
)
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error initializing embeddings: {e}")
|
| 83 |
+
|
| 84 |
+
# Create mock embeddings that return random vectors for fallback
|
| 85 |
+
from langchain.embeddings.fake import FakeEmbeddings
|
| 86 |
+
return FakeEmbeddings(size=384) # Standard size for small embedding models
|
| 87 |
|
| 88 |
def get_chat_model():
|
| 89 |
"""
|