Spaces:

Sebunya
/

AskXeno

Build error

App Files Files Community

github-actions commited on Jan 19

Commit

0aa781d

1 Parent(s): 96380a9

Sync from GitHub

Browse files

Files changed (7) hide show

.github/workflows/hugging_face_sync_dev.yml +32 -0
app.py +8 -5
docker-compose.yml +5 -0
requirements.txt +5 -4
src/config.py +4 -3
src/response_generator.py +7 -6
src/vector_store.py +17 -15

.github/workflows/hugging_face_sync_dev.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: Sync to Hugging Face Space
+on:
+  push:
+    branches:
+      - development
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout GitHub repo
+        uses: actions/checkout@v3
+      - name: Clone Hugging Face Space
+        run: |
+          git clone https://mukiibi:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mukiibi/AskXeno hf_space
+      - name: Sync files
+        run: |
+          rsync -av --delete --exclude ".git" ./ hf_space/ --ignore-errors --exclude "hf_space"
+      - name: Commit & push
+        run: |
+          cd hf_space
+          git config user.email "actions@github.com"
+          git config user.name "github-actions"
+          git add .
+          git commit -m "Sync from GitHub" || echo "No changes to commit"
+          git push origin main

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import pandas as pd
 import torch
 import numpy as np
 from sentence_transformers import util
-import google.generativeai as genai
 import chromadb
 from langchain_chroma import Chroma
 import gspread
@@ -47,8 +47,9 @@ timer = PipelineTimer()
 # Ensure API Key is set
 if "GEMINI_API_KEY" not in os.environ:
     print("WARNING: GEMINI_API_KEY environment variable not found.")
-genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
 embedding_model = "models/embedding-001"
 llm_model_name = "models/gemma-3-4b-it"
 collection_name = "xeno_collection"
@@ -356,14 +357,16 @@ def process_context(results, cosine_scores, max_results=2):
 # === LLM Generation ===
 def generate_xeno_response(context, question, chat_history):
     with timer.time_step("llm_generation"):
-        model = genai.GenerativeModel(llm_model_name)
         formatted_history = "\n".join(
             [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
         ) if chat_history else "None"
         prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
-        response = model.generate_content(prompt)
         return response.text.strip()
 # === Main Interface Logic ===

 import torch
 import numpy as np
 from sentence_transformers import util
+from google import genai
 import chromadb
 from langchain_chroma import Chroma
 import gspread
 # Ensure API Key is set
 if "GEMINI_API_KEY" not in os.environ:
     print("WARNING: GEMINI_API_KEY environment variable not found.")
+# Initialize the client
+genai_client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
 embedding_model = "models/embedding-001"
 llm_model_name = "models/gemma-3-4b-it"
 collection_name = "xeno_collection"
 # === LLM Generation ===
 def generate_xeno_response(context, question, chat_history):
     with timer.time_step("llm_generation"):
         formatted_history = "\n".join(
             [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
         ) if chat_history else "None"
         prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
+        response = genai_client.models.generate_content(
+            model=llm_model_name,
+            contents={"text": prompt},
+        )
         return response.text.strip()
 # === Main Interface Logic ===

docker-compose.yml CHANGED Viewed

@@ -9,6 +9,11 @@ services:
       - GEMINI_API_KEY=${GEMINI_API_KEY}
       - GOOGLE_SHEETS_CREDENTIALS=${GOOGLE_SHEETS_CREDENTIALS}
     volumes:
       - xeno_db:/tmp/xeno_db
       - chroma_cache:/root/.cache/chroma
       - ./xeno_memory.db:/app/xeno_memory.db

       - GEMINI_API_KEY=${GEMINI_API_KEY}
       - GOOGLE_SHEETS_CREDENTIALS=${GOOGLE_SHEETS_CREDENTIALS}
     volumes:
+      # Mount source code for live debugging (no rebuild needed)
+      - ./app.py:/app/app.py
+      - ./src:/app/src
+      - ./XENO_Uganda_KnowledgeBase_Advisory.json:/app/XENO_Uganda_KnowledgeBase_Advisory.json
+      # Data volumes
       - xeno_db:/tmp/xeno_db
       - chroma_cache:/root/.cache/chroma
       - ./xeno_memory.db:/app/xeno_memory.db

requirements.txt CHANGED Viewed

@@ -1,20 +1,21 @@
-huggingface_hub
 gradio
 pydantic==2.10.6
 pandas
-torch
 numpy
 sentence-transformers
-google-generativeai
 chromadb
 langgraph
 langgraph-checkpoint-sqlite
 langchain-chroma
 gspread
 google-auth
 python-dateutil
 # Testing dependencies
 pytest>=7.0.0
 pytest-cov>=4.0.0
-pytest-mock>=0.10.0 # Note: I corrected the mock version based on common practice, but check your actual need.

+huggingface_hub==0.25.2
 gradio
 pydantic==2.10.6
 pandas
+torch==2.3.1
 numpy
 sentence-transformers
+google-genai
 chromadb
 langgraph
 langgraph-checkpoint-sqlite
 langchain-chroma
 gspread
 google-auth
 python-dateutil
 # Testing dependencies
 pytest>=7.0.0
 pytest-cov>=4.0.0
+pytest-mock>=0.10.0

src/config.py CHANGED Viewed

@@ -3,17 +3,18 @@ Configuration module for XENO Bot
 Handles environment variables and application settings
 """
 import os
-import google.generativeai as genai
 # === API Configuration ===
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     raise ValueError("GEMINI_API_KEY environment variable not set.")
-genai.configure(api_key=GEMINI_API_KEY)
 # === Model Configuration ===
-EMBEDDING_MODEL = "models/embedding-001"
 LLM_MODEL_NAME = "models/gemma-3-4b-it"
 # === Database Configuration ===

 Handles environment variables and application settings
 """
 import os
+from google import genai
 # === API Configuration ===
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     raise ValueError("GEMINI_API_KEY environment variable not set.")
+# Initialize the genai client
+client = genai.Client(api_key=GEMINI_API_KEY)
 # === Model Configuration ===
+EMBEDDING_MODEL = "text-embedding-004"
 LLM_MODEL_NAME = "models/gemma-3-4b-it"
 # === Database Configuration ===

src/response_generator.py CHANGED Viewed

@@ -2,9 +2,9 @@
 Response Generation module for XENO Bot
 Handles LLM response generation
 """
-import google.generativeai as genai
 from typing import List, Dict
-from src.config import LLM_MODEL_NAME, SYSTEM_PROMPT
 def generate_xeno_response(context: str, question: str, chat_history: List[Dict[str, str]], timer=None) -> str:
@@ -29,8 +29,6 @@ def generate_xeno_response(context: str, question: str, chat_history: List[Dict[
 def _generate_response_impl(context: str, question: str, chat_history: List[Dict[str, str]]) -> str:
     """Internal implementation of response generation"""
-    model = genai.GenerativeModel(LLM_MODEL_NAME)
     # Format chat history
     formatted_history = "\n".join(
         [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
@@ -40,9 +38,12 @@ def _generate_response_impl(context: str, question: str, chat_history: List[Dict
     prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
     # Generate response
-    response = model.generate_content(prompt)
-    return response.text.strip()
 def format_chat_history(messages: List[Dict[str, str]]) -> str:

 Response Generation module for XENO Bot
 Handles LLM response generation
 """
+from google import genai
 from typing import List, Dict
+from src.config import LLM_MODEL_NAME, SYSTEM_PROMPT, client
 def generate_xeno_response(context: str, question: str, chat_history: List[Dict[str, str]], timer=None) -> str:
 def _generate_response_impl(context: str, question: str, chat_history: List[Dict[str, str]]) -> str:
     """Internal implementation of response generation"""
     # Format chat history
     formatted_history = "\n".join(
         [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
     prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
     # Generate response
+    response = client.generate_content(
+        model=LLM_MODEL_NAME,
+        contents={"text": prompt}
+    )
+    return response.text
 def format_chat_history(messages: List[Dict[str, str]]) -> str:

src/vector_store.py CHANGED Viewed

@@ -8,8 +8,9 @@ import torch
 from langchain_chroma import Chroma
 from sentence_transformers import util
 from typing import List, Tuple, Any
-import google.generativeai as genai
 from src.config import (
     COLLECTION_NAME,
     CHROMA_DB_PATH,
     RAG_TOP_K,
@@ -78,22 +79,23 @@ def generate_embeddings(query: str, documents: List[Any], timer=None) -> Tuple[L
 def _generate_embeddings_impl(query: str, documents: List[Any]) -> Tuple[List[float], List[List[float]]]:
     """Internal implementation of embedding generation"""
-    # Generate query embedding
-    query_embedding = genai.embed_content(
         model=EMBEDDING_MODEL,
-        content=query,
-        task_type="retrieval_query"
-    )['embedding']
-    # Generate document embeddings
-    doc_embeddings = [
-        genai.embed_content(
-            model=EMBEDDING_MODEL,
-            content=doc.page_content,
-            task_type="retrieval_document"
-        )['embedding']
-        for doc in documents
-    ]
     return query_embedding, doc_embeddings

 from langchain_chroma import Chroma
 from sentence_transformers import util
 from typing import List, Tuple, Any
+from google import genai
 from src.config import (
+    client,
     COLLECTION_NAME,
     CHROMA_DB_PATH,
     RAG_TOP_K,
 def _generate_embeddings_impl(query: str, documents: List[Any]) -> Tuple[List[float], List[List[float]]]:
     """Internal implementation of embedding generation"""
+    # 1. Update query embedding access
+    query_result = client.models.embed_content(
         model=EMBEDDING_MODEL,
+        contents=query
+    )
+    # The SDK returns an EmbedContentResponse object with an 'embeddings' attribute
+    query_embedding = query_result.embeddings[0].values
+    # 2. Update document embeddings access
+    doc_contents = [doc.page_content for doc in documents]
+    doc_results = client.models.embed_content(
+        model=EMBEDDING_MODEL,
+        contents=doc_contents
+    )
+    # Map the list of embedding objects to a list of vector values
+    doc_embeddings = [e.values for e in doc_results.embeddings]
     return query_embedding, doc_embeddings