Claude commited on
Commit ·
9d930b9
1
Parent(s): 67b2647
Fix memory issue and improve mobile UI
Browse filesMemory optimization (fixes Render 512MB limit):
- Replace SentenceTransformer with Azure OpenAI embeddings API
- Remove sentence-transformers and torch dependencies (~400MB saved)
- App now fits within Render free tier memory limits
Mobile UI improvements:
- Add tablet breakpoint (992px)
- Improve small mobile breakpoint (480px)
- Better responsive buttons, cards, and grid layouts
- Improve chat input container for mobile
- Better font sizing across all breakpoints
- app/main.py +20 -12
- app/requirements.txt +2 -4
app/main.py
CHANGED
|
@@ -26,7 +26,6 @@ from pydantic import BaseModel
|
|
| 26 |
from dotenv import load_dotenv
|
| 27 |
from openai import AzureOpenAI
|
| 28 |
from pinecone import Pinecone
|
| 29 |
-
from sentence_transformers import SentenceTransformer
|
| 30 |
|
| 31 |
# Load environment variables
|
| 32 |
load_dotenv()
|
|
@@ -88,7 +87,6 @@ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
|
|
| 88 |
# Initialize clients (lazy loading for faster startup)
|
| 89 |
azure_client = None
|
| 90 |
pinecone_index = None
|
| 91 |
-
embedding_model = None
|
| 92 |
|
| 93 |
|
| 94 |
def get_azure_client():
|
|
@@ -112,13 +110,24 @@ def get_pinecone_index():
|
|
| 112 |
return pinecone_index
|
| 113 |
|
| 114 |
|
| 115 |
-
def
|
| 116 |
-
"""
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
# Request/Response models
|
|
@@ -158,10 +167,9 @@ def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
|
|
| 158 |
Best strategy from benchmark: vanilla top-3
|
| 159 |
"""
|
| 160 |
index = get_pinecone_index()
|
| 161 |
-
embed_model = get_embedding_model()
|
| 162 |
|
| 163 |
-
# Generate query embedding
|
| 164 |
-
query_embedding =
|
| 165 |
|
| 166 |
# Search vector database
|
| 167 |
results = index.query(
|
|
|
|
| 26 |
from dotenv import load_dotenv
|
| 27 |
from openai import AzureOpenAI
|
| 28 |
from pinecone import Pinecone
|
|
|
|
| 29 |
|
| 30 |
# Load environment variables
|
| 31 |
load_dotenv()
|
|
|
|
| 87 |
# Initialize clients (lazy loading for faster startup)
|
| 88 |
azure_client = None
|
| 89 |
pinecone_index = None
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def get_azure_client():
|
|
|
|
| 110 |
return pinecone_index
|
| 111 |
|
| 112 |
|
| 113 |
+
def get_embedding(text: str) -> List[float]:
|
| 114 |
+
"""
|
| 115 |
+
Get embedding using Azure OpenAI API instead of local model.
|
| 116 |
+
This saves ~400MB memory by not loading SentenceTransformer locally.
|
| 117 |
+
"""
|
| 118 |
+
client = get_azure_client()
|
| 119 |
+
embedding_model = os.getenv("AZURE_EMBEDDING_MODEL", "text-embedding-ada-002")
|
| 120 |
+
|
| 121 |
+
try:
|
| 122 |
+
response = client.embeddings.create(
|
| 123 |
+
input=text,
|
| 124 |
+
model=embedding_model
|
| 125 |
+
)
|
| 126 |
+
return response.data[0].embedding
|
| 127 |
+
except Exception as e:
|
| 128 |
+
# Fallback: return zero vector if embedding fails
|
| 129 |
+
print(f"Embedding error: {e}")
|
| 130 |
+
return [0.0] * 1536 # ada-002 returns 1536 dimensions
|
| 131 |
|
| 132 |
|
| 133 |
# Request/Response models
|
|
|
|
| 167 |
Best strategy from benchmark: vanilla top-3
|
| 168 |
"""
|
| 169 |
index = get_pinecone_index()
|
|
|
|
| 170 |
|
| 171 |
+
# Generate query embedding using Azure OpenAI (memory efficient)
|
| 172 |
+
query_embedding = get_embedding(query)
|
| 173 |
|
| 174 |
# Search vector database
|
| 175 |
results = index.query(
|
app/requirements.txt
CHANGED
|
@@ -12,10 +12,8 @@ openai==1.54.0
|
|
| 12 |
# Vector database
|
| 13 |
pinecone-client==5.0.0
|
| 14 |
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
torch==2.5.1
|
| 18 |
-
numpy<2.0.0
|
| 19 |
|
| 20 |
# PDF processing and OCR
|
| 21 |
PyMuPDF==1.23.8
|
|
|
|
| 12 |
# Vector database
|
| 13 |
pinecone-client==5.0.0
|
| 14 |
|
| 15 |
+
# Note: Using Azure OpenAI embeddings API instead of local sentence-transformers
|
| 16 |
+
# This saves ~400MB memory making it suitable for Render free tier (512MB limit)
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# PDF processing and OCR
|
| 19 |
PyMuPDF==1.23.8
|