github-actions commited on
Commit
0aa781d
·
1 Parent(s): 96380a9

Sync from GitHub

Browse files
.github/workflows/hugging_face_sync_dev.yml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face Space
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - development
7
+
8
+ jobs:
9
+ sync:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout GitHub repo
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Clone Hugging Face Space
17
+ run: |
18
+ git clone https://mukiibi:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mukiibi/AskXeno hf_space
19
+
20
+ - name: Sync files
21
+ run: |
22
+ rsync -av --delete --exclude ".git" ./ hf_space/ --ignore-errors --exclude "hf_space"
23
+
24
+
25
+ - name: Commit & push
26
+ run: |
27
+ cd hf_space
28
+ git config user.email "actions@github.com"
29
+ git config user.name "github-actions"
30
+ git add .
31
+ git commit -m "Sync from GitHub" || echo "No changes to commit"
32
+ git push origin main
app.py CHANGED
@@ -9,7 +9,7 @@ import pandas as pd
9
  import torch
10
  import numpy as np
11
  from sentence_transformers import util
12
- import google.generativeai as genai
13
  import chromadb
14
  from langchain_chroma import Chroma
15
  import gspread
@@ -47,8 +47,9 @@ timer = PipelineTimer()
47
  # Ensure API Key is set
48
  if "GEMINI_API_KEY" not in os.environ:
49
  print("WARNING: GEMINI_API_KEY environment variable not found.")
50
-
51
- genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
 
52
  embedding_model = "models/embedding-001"
53
  llm_model_name = "models/gemma-3-4b-it"
54
  collection_name = "xeno_collection"
@@ -356,14 +357,16 @@ def process_context(results, cosine_scores, max_results=2):
356
  # === LLM Generation ===
357
  def generate_xeno_response(context, question, chat_history):
358
  with timer.time_step("llm_generation"):
359
- model = genai.GenerativeModel(llm_model_name)
360
  formatted_history = "\n".join(
361
  [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
362
  ) if chat_history else "None"
363
 
364
  prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
365
 
366
- response = model.generate_content(prompt)
 
 
 
367
  return response.text.strip()
368
 
369
  # === Main Interface Logic ===
 
9
  import torch
10
  import numpy as np
11
  from sentence_transformers import util
12
+ from google import genai
13
  import chromadb
14
  from langchain_chroma import Chroma
15
  import gspread
 
47
  # Ensure API Key is set
48
  if "GEMINI_API_KEY" not in os.environ:
49
  print("WARNING: GEMINI_API_KEY environment variable not found.")
50
+
51
+ # Initialize the client
52
+ genai_client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
53
  embedding_model = "models/embedding-001"
54
  llm_model_name = "models/gemma-3-4b-it"
55
  collection_name = "xeno_collection"
 
357
  # === LLM Generation ===
358
  def generate_xeno_response(context, question, chat_history):
359
  with timer.time_step("llm_generation"):
 
360
  formatted_history = "\n".join(
361
  [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
362
  ) if chat_history else "None"
363
 
364
  prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
365
 
366
+ response = genai_client.models.generate_content(
367
+ model=llm_model_name,
368
+ contents={"text": prompt},
369
+ )
370
  return response.text.strip()
371
 
372
  # === Main Interface Logic ===
docker-compose.yml CHANGED
@@ -9,6 +9,11 @@ services:
9
  - GEMINI_API_KEY=${GEMINI_API_KEY}
10
  - GOOGLE_SHEETS_CREDENTIALS=${GOOGLE_SHEETS_CREDENTIALS}
11
  volumes:
 
 
 
 
 
12
  - xeno_db:/tmp/xeno_db
13
  - chroma_cache:/root/.cache/chroma
14
  - ./xeno_memory.db:/app/xeno_memory.db
 
9
  - GEMINI_API_KEY=${GEMINI_API_KEY}
10
  - GOOGLE_SHEETS_CREDENTIALS=${GOOGLE_SHEETS_CREDENTIALS}
11
  volumes:
12
+ # Mount source code for live debugging (no rebuild needed)
13
+ - ./app.py:/app/app.py
14
+ - ./src:/app/src
15
+ - ./XENO_Uganda_KnowledgeBase_Advisory.json:/app/XENO_Uganda_KnowledgeBase_Advisory.json
16
+ # Data volumes
17
  - xeno_db:/tmp/xeno_db
18
  - chroma_cache:/root/.cache/chroma
19
  - ./xeno_memory.db:/app/xeno_memory.db
requirements.txt CHANGED
@@ -1,20 +1,21 @@
1
- huggingface_hub
2
  gradio
3
  pydantic==2.10.6
4
  pandas
5
- torch
6
  numpy
7
  sentence-transformers
8
- google-generativeai
9
  chromadb
10
  langgraph
11
  langgraph-checkpoint-sqlite
12
  langchain-chroma
13
  gspread
14
  google-auth
 
15
  python-dateutil
16
 
17
  # Testing dependencies
18
  pytest>=7.0.0
19
  pytest-cov>=4.0.0
20
- pytest-mock>=0.10.0 # Note: I corrected the mock version based on common practice, but check your actual need.
 
1
+ huggingface_hub==0.25.2
2
  gradio
3
  pydantic==2.10.6
4
  pandas
5
+ torch==2.3.1
6
  numpy
7
  sentence-transformers
8
+ google-genai
9
  chromadb
10
  langgraph
11
  langgraph-checkpoint-sqlite
12
  langchain-chroma
13
  gspread
14
  google-auth
15
+
16
  python-dateutil
17
 
18
  # Testing dependencies
19
  pytest>=7.0.0
20
  pytest-cov>=4.0.0
21
+ pytest-mock>=0.10.0
src/config.py CHANGED
@@ -3,17 +3,18 @@ Configuration module for XENO Bot
3
  Handles environment variables and application settings
4
  """
5
  import os
6
- import google.generativeai as genai
7
 
8
  # === API Configuration ===
9
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
10
  if not GEMINI_API_KEY:
11
  raise ValueError("GEMINI_API_KEY environment variable not set.")
12
 
13
- genai.configure(api_key=GEMINI_API_KEY)
 
14
 
15
  # === Model Configuration ===
16
- EMBEDDING_MODEL = "models/embedding-001"
17
  LLM_MODEL_NAME = "models/gemma-3-4b-it"
18
 
19
  # === Database Configuration ===
 
3
  Handles environment variables and application settings
4
  """
5
  import os
6
+ from google import genai
7
 
8
  # === API Configuration ===
9
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
10
  if not GEMINI_API_KEY:
11
  raise ValueError("GEMINI_API_KEY environment variable not set.")
12
 
13
+ # Initialize the genai client
14
+ client = genai.Client(api_key=GEMINI_API_KEY)
15
 
16
  # === Model Configuration ===
17
+ EMBEDDING_MODEL = "text-embedding-004"
18
  LLM_MODEL_NAME = "models/gemma-3-4b-it"
19
 
20
  # === Database Configuration ===
src/response_generator.py CHANGED
@@ -2,9 +2,9 @@
2
  Response Generation module for XENO Bot
3
  Handles LLM response generation
4
  """
5
- import google.generativeai as genai
6
  from typing import List, Dict
7
- from src.config import LLM_MODEL_NAME, SYSTEM_PROMPT
8
 
9
 
10
  def generate_xeno_response(context: str, question: str, chat_history: List[Dict[str, str]], timer=None) -> str:
@@ -29,8 +29,6 @@ def generate_xeno_response(context: str, question: str, chat_history: List[Dict[
29
 
30
  def _generate_response_impl(context: str, question: str, chat_history: List[Dict[str, str]]) -> str:
31
  """Internal implementation of response generation"""
32
- model = genai.GenerativeModel(LLM_MODEL_NAME)
33
-
34
  # Format chat history
35
  formatted_history = "\n".join(
36
  [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
@@ -40,9 +38,12 @@ def _generate_response_impl(context: str, question: str, chat_history: List[Dict
40
  prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
41
 
42
  # Generate response
43
- response = model.generate_content(prompt)
 
 
 
44
 
45
- return response.text.strip()
46
 
47
 
48
  def format_chat_history(messages: List[Dict[str, str]]) -> str:
 
2
  Response Generation module for XENO Bot
3
  Handles LLM response generation
4
  """
5
+ from google import genai
6
  from typing import List, Dict
7
+ from src.config import LLM_MODEL_NAME, SYSTEM_PROMPT, client
8
 
9
 
10
  def generate_xeno_response(context: str, question: str, chat_history: List[Dict[str, str]], timer=None) -> str:
 
29
 
30
  def _generate_response_impl(context: str, question: str, chat_history: List[Dict[str, str]]) -> str:
31
  """Internal implementation of response generation"""
 
 
32
  # Format chat history
33
  formatted_history = "\n".join(
34
  [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
 
38
  prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
39
 
40
  # Generate response
41
+ response = client.generate_content(
42
+ model=LLM_MODEL_NAME,
43
+ contents={"text": prompt}
44
+ )
45
 
46
+ return response.text
47
 
48
 
49
  def format_chat_history(messages: List[Dict[str, str]]) -> str:
src/vector_store.py CHANGED
@@ -8,8 +8,9 @@ import torch
8
  from langchain_chroma import Chroma
9
  from sentence_transformers import util
10
  from typing import List, Tuple, Any
11
- import google.generativeai as genai
12
  from src.config import (
 
13
  COLLECTION_NAME,
14
  CHROMA_DB_PATH,
15
  RAG_TOP_K,
@@ -78,22 +79,23 @@ def generate_embeddings(query: str, documents: List[Any], timer=None) -> Tuple[L
78
 
79
  def _generate_embeddings_impl(query: str, documents: List[Any]) -> Tuple[List[float], List[List[float]]]:
80
  """Internal implementation of embedding generation"""
81
- # Generate query embedding
82
- query_embedding = genai.embed_content(
83
  model=EMBEDDING_MODEL,
84
- content=query,
85
- task_type="retrieval_query"
86
- )['embedding']
 
87
 
88
- # Generate document embeddings
89
- doc_embeddings = [
90
- genai.embed_content(
91
- model=EMBEDDING_MODEL,
92
- content=doc.page_content,
93
- task_type="retrieval_document"
94
- )['embedding']
95
- for doc in documents
96
- ]
97
 
98
  return query_embedding, doc_embeddings
99
 
 
8
  from langchain_chroma import Chroma
9
  from sentence_transformers import util
10
  from typing import List, Tuple, Any
11
+ from google import genai
12
  from src.config import (
13
+ client,
14
  COLLECTION_NAME,
15
  CHROMA_DB_PATH,
16
  RAG_TOP_K,
 
79
 
80
  def _generate_embeddings_impl(query: str, documents: List[Any]) -> Tuple[List[float], List[List[float]]]:
81
  """Internal implementation of embedding generation"""
82
+ # 1. Update query embedding access
83
+ query_result = client.models.embed_content(
84
  model=EMBEDDING_MODEL,
85
+ contents=query
86
+ )
87
+ # The SDK returns an EmbedContentResponse object with an 'embeddings' attribute
88
+ query_embedding = query_result.embeddings[0].values
89
 
90
+ # 2. Update document embeddings access
91
+ doc_contents = [doc.page_content for doc in documents]
92
+ doc_results = client.models.embed_content(
93
+ model=EMBEDDING_MODEL,
94
+ contents=doc_contents
95
+ )
96
+
97
+ # Map the list of embedding objects to a list of vector values
98
+ doc_embeddings = [e.values for e in doc_results.embeddings]
99
 
100
  return query_embedding, doc_embeddings
101