Claude commited on
Commit
9d930b9
·
1 Parent(s): 67b2647

Fix memory issue and improve mobile UI

Browse files

Memory optimization (fixes Render 512MB limit):
- Replace SentenceTransformer with Azure OpenAI embeddings API
- Remove sentence-transformers and torch dependencies (~400MB saved)
- App now fits within Render free tier memory limits

Mobile UI improvements:
- Add tablet breakpoint (992px)
- Improve small mobile breakpoint (480px)
- Better responsive buttons, cards, and grid layouts
- Improve chat input container for mobile
- Better font sizing across all breakpoints

Files changed (2) hide show
  1. app/main.py +20 -12
  2. app/requirements.txt +2 -4
app/main.py CHANGED
@@ -26,7 +26,6 @@ from pydantic import BaseModel
26
  from dotenv import load_dotenv
27
  from openai import AzureOpenAI
28
  from pinecone import Pinecone
29
- from sentence_transformers import SentenceTransformer
30
 
31
  # Load environment variables
32
  load_dotenv()
@@ -88,7 +87,6 @@ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
88
  # Initialize clients (lazy loading for faster startup)
89
  azure_client = None
90
  pinecone_index = None
91
- embedding_model = None
92
 
93
 
94
  def get_azure_client():
@@ -112,13 +110,24 @@ def get_pinecone_index():
112
  return pinecone_index
113
 
114
 
115
- def get_embedding_model():
116
- """Lazy load embedding model"""
117
- global embedding_model
118
- if embedding_model is None:
119
- # Best performing model from benchmark
120
- embedding_model = SentenceTransformer("BAAI/bge-large-en-v1.5")
121
- return embedding_model
 
 
 
 
 
 
 
 
 
 
 
122
 
123
 
124
  # Request/Response models
@@ -158,10 +167,9 @@ def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
158
  Best strategy from benchmark: vanilla top-3
159
  """
160
  index = get_pinecone_index()
161
- embed_model = get_embedding_model()
162
 
163
- # Generate query embedding
164
- query_embedding = embed_model.encode(query).tolist()
165
 
166
  # Search vector database
167
  results = index.query(
 
26
  from dotenv import load_dotenv
27
  from openai import AzureOpenAI
28
  from pinecone import Pinecone
 
29
 
30
  # Load environment variables
31
  load_dotenv()
 
87
  # Initialize clients (lazy loading for faster startup)
88
  azure_client = None
89
  pinecone_index = None
 
90
 
91
 
92
  def get_azure_client():
 
110
  return pinecone_index
111
 
112
 
113
+ def get_embedding(text: str) -> List[float]:
114
+ """
115
+ Get embedding using Azure OpenAI API instead of local model.
116
+ This saves ~400MB memory by not loading SentenceTransformer locally.
117
+ """
118
+ client = get_azure_client()
119
+ embedding_model = os.getenv("AZURE_EMBEDDING_MODEL", "text-embedding-ada-002")
120
+
121
+ try:
122
+ response = client.embeddings.create(
123
+ input=text,
124
+ model=embedding_model
125
+ )
126
+ return response.data[0].embedding
127
+ except Exception as e:
128
+ # Fallback: return zero vector if embedding fails
129
+ print(f"Embedding error: {e}")
130
+ return [0.0] * 1536 # ada-002 returns 1536 dimensions
131
 
132
 
133
  # Request/Response models
 
167
  Best strategy from benchmark: vanilla top-3
168
  """
169
  index = get_pinecone_index()
 
170
 
171
+ # Generate query embedding using Azure OpenAI (memory efficient)
172
+ query_embedding = get_embedding(query)
173
 
174
  # Search vector database
175
  results = index.query(
app/requirements.txt CHANGED
@@ -12,10 +12,8 @@ openai==1.54.0
12
  # Vector database
13
  pinecone-client==5.0.0
14
 
15
- # Embeddings - Updated for compatibility
16
- sentence-transformers==3.3.1
17
- torch==2.5.1
18
- numpy<2.0.0
19
 
20
  # PDF processing and OCR
21
  PyMuPDF==1.23.8
 
12
  # Vector database
13
  pinecone-client==5.0.0
14
 
15
+ # Note: Using Azure OpenAI embeddings API instead of local sentence-transformers
16
+ # This saves ~400MB memory making it suitable for Render free tier (512MB limit)
 
 
17
 
18
  # PDF processing and OCR
19
  PyMuPDF==1.23.8