Spaces:

p3rc03
/

2B

Sleeping

App Files Files Community

37-AN commited on May 12, 2025

Commit

31cd25b

1 Parent(s): 8faa239

Initial commit for Hugging Face Space deployment

Browse files

Files changed (2) hide show

Dockerfile +5 -1
app/core/llm.py +58 -17

Dockerfile CHANGED Viewed

@@ -30,10 +30,14 @@ COPY . .
 RUN mkdir -p data/documents data/vector_db && \
     chmod -R 777 data
-# Set environment variable to avoid TOKENIZERS_PARALLELISM warning
 ENV TOKENIZERS_PARALLELISM=false
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 # Expose the port required by Hugging Face Spaces
 EXPOSE 7860

 RUN mkdir -p data/documents data/vector_db && \
     chmod -R 777 data
+# Set environment variables
 ENV TOKENIZERS_PARALLELISM=false
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
+ENV HUGGINGFACEHUB_API_TOKEN=""
+ENV HF_API_KEY=""
+ENV LLM_MODEL="google/flan-t5-small"
+ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
 # Expose the port required by Hugging Face Spaces
 EXPOSE 7860

app/core/llm.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from langchain.llms import HuggingFaceHub
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -11,20 +12,53 @@ from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATU
 def get_llm():
     """Initialize and return the language model."""
-    if not HF_API_KEY:
-        # Can still work without API key but with rate limits
-        print("Warning: Hugging Face API key not set. Using models without authentication.")
-    llm = HuggingFaceHub(
-        huggingfacehub_api_token=HF_API_KEY,
-        repo_id=LLM_MODEL,
-        model_kwargs={
-            "temperature": DEFAULT_TEMPERATURE,
-            "max_length": MAX_TOKENS
-        }
-    )
-    return llm
 def get_embeddings():
     """Initialize and return the embeddings model."""
@@ -39,10 +73,17 @@ def get_embeddings():
             cache_dir = None
     # SentenceTransformers can be used locally without an API key
-    return HuggingFaceEmbeddings(
-        model_name=EMBEDDING_MODEL,
-        cache_folder=cache_dir
-    )
 def get_chat_model():
     """

 from langchain.llms import HuggingFaceHub
+from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 def get_llm():
     """Initialize and return the language model."""
+    # Set up cache directories with proper permissions
+    cache_dir = "/app/models"
+    if not os.path.exists(cache_dir):
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            os.chmod(cache_dir, 0o777)
+        except Exception as e:
+            print(f"Warning: Could not create cache directory: {e}")
+            cache_dir = None
+    # Set environment variable for Hugging Face Hub
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
+    # For Hugging Face Spaces, we'll use a simpler model approach
+    # that doesn't require authentication for free models
+    try:
+        if HF_API_KEY:
+            # If we have an API key, use the HuggingFaceHub
+            llm = HuggingFaceHub(
+                huggingfacehub_api_token=HF_API_KEY,
+                repo_id=LLM_MODEL,
+                model_kwargs={
+                    "temperature": DEFAULT_TEMPERATURE,
+                    "max_length": MAX_TOKENS
+                }
+            )
+        else:
+            # If no API key, inform the user
+            print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
+            llm = HuggingFaceEndpoint(
+                endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
+                task="text-generation",
+                model_kwargs={
+                    "temperature": DEFAULT_TEMPERATURE,
+                    "max_length": MAX_TOKENS
+                }
+            )
+        return llm
+    except Exception as e:
+        print(f"Error initializing Hugging Face LLM: {e}")
+        print("Using a fallback approach with a mock LLM.")
+        # Create a very simple mock LLM for fallback
+        from langchain.llms.fake import FakeListLLM
+        return FakeListLLM(
+            responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
+        )
 def get_embeddings():
     """Initialize and return the embeddings model."""
             cache_dir = None
     # SentenceTransformers can be used locally without an API key
+    try:
+        return HuggingFaceEmbeddings(
+            model_name=EMBEDDING_MODEL,
+            cache_folder=cache_dir
+        )
+    except Exception as e:
+        print(f"Error initializing embeddings: {e}")
+        # Create mock embeddings that return random vectors for fallback
+        from langchain.embeddings.fake import FakeEmbeddings
+        return FakeEmbeddings(size=384)  # Standard size for small embedding models
 def get_chat_model():
     """