update model embedding, fix dockerfile
Browse files- Dockerfile +3 -2
- README.md +3 -1
- app/api/mongodb_routes.py +4 -0
- app/api/rag_routes.py +24 -4
- app/database/pinecone.py +32 -3
- app/models/mongodb_models.py +1 -1
- app/utils/pdf_processor.py +6 -2
Dockerfile
CHANGED
|
@@ -2,14 +2,15 @@ FROM python:3.11-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
# Cài đặt các gói hệ thống cần thiết
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
build-essential \
|
| 8 |
curl \
|
| 9 |
-
software-properties-common \
|
| 10 |
git \
|
| 11 |
gcc \
|
| 12 |
python3-dev \
|
|
|
|
|
|
|
| 13 |
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
|
| 15 |
# Sao chép các file yêu cầu trước để tận dụng cache của Docker
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
# Cài đặt các gói hệ thống cần thiết (tối giản, tương thích Debian trixie)
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
build-essential \
|
| 8 |
curl \
|
|
|
|
| 9 |
git \
|
| 10 |
gcc \
|
| 11 |
python3-dev \
|
| 12 |
+
libpq-dev \
|
| 13 |
+
pkg-config \
|
| 14 |
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
|
| 16 |
# Sao chép các file yêu cầu trước để tận dụng cache của Docker
|
README.md
CHANGED
|
@@ -190,7 +190,9 @@ PINECONE_API_KEY=your-pinecone-api-key
|
|
| 190 |
PINECONE_INDEX_NAME=your-pinecone-index-name
|
| 191 |
PINECONE_ENVIRONMENT=gcp-starter
|
| 192 |
|
| 193 |
-
# Google Gemini API key
|
|
|
|
|
|
|
| 194 |
GOOGLE_API_KEY=your-google-api-key
|
| 195 |
|
| 196 |
# WebSocket configuration
|
|
|
|
| 190 |
PINECONE_INDEX_NAME=your-pinecone-index-name
|
| 191 |
PINECONE_ENVIRONMENT=gcp-starter
|
| 192 |
|
| 193 |
+
# Google Gemini API key (REQUIRED for embeddings)
|
| 194 |
+
# Get your API key from: https://makersuite.google.com/app/apikey
|
| 195 |
+
# Note: This project uses Gemini embedding-001 model which has free tier limits
|
| 196 |
GOOGLE_API_KEY=your-google-api-key
|
| 197 |
|
| 198 |
# WebSocket configuration
|
app/api/mongodb_routes.py
CHANGED
|
@@ -167,6 +167,10 @@ async def get_history(user_id: str, n: int = Query(3, ge=1, le=10)):
|
|
| 167 |
|
| 168 |
- **user_id**: User's ID from Telegram
|
| 169 |
- **n**: Number of most recent interactions to return (default: 3, min: 1, max: 10)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
"""
|
| 171 |
try:
|
| 172 |
# Kiểm tra kết nối MongoDB
|
|
|
|
| 167 |
|
| 168 |
- **user_id**: User's ID from Telegram
|
| 169 |
- **n**: Number of most recent interactions to return (default: 3, min: 1, max: 10)
|
| 170 |
+
|
| 171 |
+
Returns a formatted string with conversation history in the format:
|
| 172 |
+
User: [question]
|
| 173 |
+
Bot: [answer]
|
| 174 |
"""
|
| 175 |
try:
|
| 176 |
# Kiểm tra kết nối MongoDB
|
app/api/rag_routes.py
CHANGED
|
@@ -58,7 +58,8 @@ logger = logging.getLogger(__name__)
|
|
| 58 |
|
| 59 |
# Configure Google Gemini API
|
| 60 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 61 |
-
|
|
|
|
| 62 |
KEYWORD_LIST = os.getenv("KEYWORDS")
|
| 63 |
|
| 64 |
# Create router
|
|
@@ -159,8 +160,19 @@ Your response:
|
|
| 159 |
async def get_embedding(text: str):
|
| 160 |
"""Get embedding from Google Gemini API"""
|
| 161 |
try:
|
| 162 |
-
#
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
# Generate embedding
|
| 166 |
result = await embedding_model.aembed_query(text)
|
|
@@ -169,10 +181,18 @@ async def get_embedding(text: str):
|
|
| 169 |
return {
|
| 170 |
"embedding": result,
|
| 171 |
"text": text,
|
| 172 |
-
"model": "embedding-
|
| 173 |
}
|
| 174 |
except Exception as e:
|
| 175 |
logger.error(f"Error generating embedding: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
raise HTTPException(status_code=500, detail=f"Failed to generate embedding: {str(e)}")
|
| 177 |
|
| 178 |
# Endpoint for generating embeddings
|
|
|
|
| 58 |
|
| 59 |
# Configure Google Gemini API
|
| 60 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 61 |
+
if GOOGLE_API_KEY:
|
| 62 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 63 |
KEYWORD_LIST = os.getenv("KEYWORDS")
|
| 64 |
|
| 65 |
# Create router
|
|
|
|
| 160 |
async def get_embedding(text: str):
|
| 161 |
"""Get embedding from Google Gemini API"""
|
| 162 |
try:
|
| 163 |
+
# Check if Google API key is available
|
| 164 |
+
if not GOOGLE_API_KEY:
|
| 165 |
+
raise HTTPException(status_code=500, detail="Google API key not configured")
|
| 166 |
+
|
| 167 |
+
# Log API key for debugging (masked)
|
| 168 |
+
masked_key = GOOGLE_API_KEY[:8] + "..." + GOOGLE_API_KEY[-4:] if len(GOOGLE_API_KEY) > 12 else "***"
|
| 169 |
+
logger.info(f"Using Google API key for embedding: {masked_key}")
|
| 170 |
+
|
| 171 |
+
# Initialize embedding model (using latest model)
|
| 172 |
+
embedding_model = GoogleGenerativeAIEmbeddings(
|
| 173 |
+
model="models/text-embedding-004", # Updated to latest model with correct prefix
|
| 174 |
+
google_api_key=GOOGLE_API_KEY
|
| 175 |
+
)
|
| 176 |
|
| 177 |
# Generate embedding
|
| 178 |
result = await embedding_model.aembed_query(text)
|
|
|
|
| 181 |
return {
|
| 182 |
"embedding": result,
|
| 183 |
"text": text,
|
| 184 |
+
"model": "models/text-embedding-004"
|
| 185 |
}
|
| 186 |
except Exception as e:
|
| 187 |
logger.error(f"Error generating embedding: {e}")
|
| 188 |
+
|
| 189 |
+
# Check for quota exceeded error
|
| 190 |
+
if "quota" in str(e).lower() or "429" in str(e):
|
| 191 |
+
raise HTTPException(
|
| 192 |
+
status_code=429,
|
| 193 |
+
detail="Google API quota exceeded. Please check your billing or wait for quota reset."
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
raise HTTPException(status_code=500, detail=f"Failed to generate embedding: {str(e)}")
|
| 197 |
|
| 198 |
# Endpoint for generating embeddings
|
app/database/pinecone.py
CHANGED
|
@@ -410,8 +410,25 @@ class ThresholdRetriever(BaseRetriever):
|
|
| 410 |
embedding = self.embeddings.embed_query(query)
|
| 411 |
except Exception as e:
|
| 412 |
logger.error(f"Error generating embedding: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
# Fallback to creating a new embedding model if needed
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
embedding = embedding_model.embed_query(query)
|
| 416 |
|
| 417 |
# Perform search with advanced options - avoid asyncio.run()
|
|
@@ -523,8 +540,20 @@ def get_chain(
|
|
| 523 |
start_time = time.time()
|
| 524 |
logger.info("Initializing new retriever chain with threshold-based filtering")
|
| 525 |
|
| 526 |
-
#
|
| 527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
|
| 529 |
# Get index
|
| 530 |
pinecone_index = get_pinecone_index()
|
|
|
|
| 410 |
embedding = self.embeddings.embed_query(query)
|
| 411 |
except Exception as e:
|
| 412 |
logger.error(f"Error generating embedding: {e}")
|
| 413 |
+
|
| 414 |
+
# Check for quota exceeded error
|
| 415 |
+
if "quota" in str(e).lower() or "429" in str(e):
|
| 416 |
+
logger.error("Google API quota exceeded. Please check your billing or wait for quota reset.")
|
| 417 |
+
return None
|
| 418 |
+
|
| 419 |
# Fallback to creating a new embedding model if needed
|
| 420 |
+
if not GOOGLE_API_KEY:
|
| 421 |
+
logger.error("Google API key not configured for fallback embedding")
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
# Log API key for debugging (masked)
|
| 425 |
+
masked_key = GOOGLE_API_KEY[:8] + "..." + GOOGLE_API_KEY[-4:] if len(GOOGLE_API_KEY) > 12 else "***"
|
| 426 |
+
logger.info(f"Using Google API key for fallback: {masked_key}")
|
| 427 |
+
|
| 428 |
+
embedding_model = GoogleGenerativeAIEmbeddings(
|
| 429 |
+
model="models/text-embedding-004", # Updated to latest model with correct prefix
|
| 430 |
+
google_api_key=GOOGLE_API_KEY
|
| 431 |
+
)
|
| 432 |
embedding = embedding_model.embed_query(query)
|
| 433 |
|
| 434 |
# Perform search with advanced options - avoid asyncio.run()
|
|
|
|
| 540 |
start_time = time.time()
|
| 541 |
logger.info("Initializing new retriever chain with threshold-based filtering")
|
| 542 |
|
| 543 |
+
# Check if Google API key is available
|
| 544 |
+
if not GOOGLE_API_KEY:
|
| 545 |
+
logger.error("Google API key not configured")
|
| 546 |
+
return None
|
| 547 |
+
|
| 548 |
+
# Log API key for debugging (masked)
|
| 549 |
+
masked_key = GOOGLE_API_KEY[:8] + "..." + GOOGLE_API_KEY[-4:] if len(GOOGLE_API_KEY) > 12 else "***"
|
| 550 |
+
logger.info(f"Using Google API key: {masked_key}")
|
| 551 |
+
|
| 552 |
+
# Initialize embeddings model (using latest model)
|
| 553 |
+
embeddings = GoogleGenerativeAIEmbeddings(
|
| 554 |
+
model="models/text-embedding-004", # Updated to latest model with correct prefix
|
| 555 |
+
google_api_key=GOOGLE_API_KEY
|
| 556 |
+
)
|
| 557 |
|
| 558 |
# Get index
|
| 559 |
pinecone_index = get_pinecone_index()
|
app/models/mongodb_models.py
CHANGED
|
@@ -52,4 +52,4 @@ class QuestionAnswer(BaseModel):
|
|
| 52 |
|
| 53 |
class HistoryResponse(BaseModel):
|
| 54 |
"""Response model for history"""
|
| 55 |
-
history:
|
|
|
|
| 52 |
|
| 53 |
class HistoryResponse(BaseModel):
|
| 54 |
"""Response model for history"""
|
| 55 |
+
history: str
|
app/utils/pdf_processor.py
CHANGED
|
@@ -99,6 +99,10 @@ class PDFProcessor:
|
|
| 99 |
if not self.google_api_key:
|
| 100 |
raise ValueError("Google API key not found in environment variables")
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
genai.configure(api_key=self.google_api_key)
|
| 103 |
|
| 104 |
# First, get the expected dimensions from Pinecone
|
|
@@ -110,9 +114,9 @@ class PDFProcessor:
|
|
| 110 |
pinecone_dimension = stats.dimension
|
| 111 |
logger.info(f"[{self.correlation_id}] Pinecone index dimension: {pinecone_dimension}")
|
| 112 |
|
| 113 |
-
# Create embedding model
|
| 114 |
embedding_model = GoogleGenerativeAIEmbeddings(
|
| 115 |
-
model="models/embedding-
|
| 116 |
google_api_key=self.google_api_key,
|
| 117 |
task_type="retrieval_document" # Use document embedding mode for longer text
|
| 118 |
)
|
|
|
|
| 99 |
if not self.google_api_key:
|
| 100 |
raise ValueError("Google API key not found in environment variables")
|
| 101 |
|
| 102 |
+
# Log API key for debugging (masked)
|
| 103 |
+
masked_key = self.google_api_key[:8] + "..." + self.google_api_key[-4:] if len(self.google_api_key) > 12 else "***"
|
| 104 |
+
logger.info(f"[{self.correlation_id}] Using Google API key: {masked_key}")
|
| 105 |
+
|
| 106 |
genai.configure(api_key=self.google_api_key)
|
| 107 |
|
| 108 |
# First, get the expected dimensions from Pinecone
|
|
|
|
| 114 |
pinecone_dimension = stats.dimension
|
| 115 |
logger.info(f"[{self.correlation_id}] Pinecone index dimension: {pinecone_dimension}")
|
| 116 |
|
| 117 |
+
# Create embedding model (using latest model)
|
| 118 |
embedding_model = GoogleGenerativeAIEmbeddings(
|
| 119 |
+
model="models/text-embedding-004", # Updated to latest model with correct prefix
|
| 120 |
google_api_key=self.google_api_key,
|
| 121 |
task_type="retrieval_document" # Use document embedding mode for longer text
|
| 122 |
)
|