bhoomika19 commited on
Commit
30ac8b8
·
verified ·
1 Parent(s): 9b60645

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env.example +243 -0
  2. Dockerfile +34 -0
  3. app.py +16 -0
  4. docker-compose.yml +86 -0
  5. requirements.txt +34 -0
.env.example ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment Configuration Template
2
+ # Copy this file to .env and fill in your actual values
3
+
4
+ # =============================================================================
5
+ # Core Application Settings
6
+ # =============================================================================
7
+ ENVIRONMENT=development
8
+ DEBUG=true
9
+ LOG_LEVEL=INFO
10
+ APP_NAME=Context-Aware Research Assistant
11
+ APP_VERSION=1.0.0
12
+
13
+ # =============================================================================
14
+ # API Keys (Required)
15
+ # =============================================================================
16
+
17
+ # Google Gemini API Key (Required)
18
+ # Get your free API key from: https://makersuite.google.com/app/apikey
19
+ GOOGLE_API_KEY=your_gemini_api_key_here
20
+
21
+ # Google Custom Search API (Required for web search)
22
+ # Get from: https://console.cloud.google.com/
23
+ GOOGLE_CSE_ID=your_custom_search_engine_id_here
24
+
25
+ # Together AI API Key (Required for Llama models)
26
+ # Sign up at: https://together.ai/
27
+ TOGETHER_API_KEY=your_together_api_key_here
28
+
29
+ # Tavily Search API Key (Optional but recommended)
30
+ # Sign up at: https://tavily.com/
31
+ TAVILY_API_KEY=your_tavily_api_key_here
32
+
33
+ # LangSmith (Optional - for monitoring and debugging)
34
+ LANGSMITH_API_KEY=your_langsmith_api_key_here
35
+ LANGCHAIN_TRACING_V2=true
36
+ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
37
+ LANGCHAIN_PROJECT=context-aware-research-app
38
+
39
+ # =============================================================================
40
+ # Application Configuration
41
+ # =============================================================================
42
+
43
+ # Supabase Configuration (Optional - for database features)
44
+ # Sign up at: https://supabase.com/
45
+ SUPABASE_URL=your_supabase_project_url_here
46
+ SUPABASE_KEY=your_supabase_anon_key_here
47
+ SUPABASE_SERVICE_KEY=your_supabase_service_role_key_here
48
+ DATABASE_URL=postgresql://postgres:your_password@db.your-project-ref.supabase.co:5432/postgres
49
+
50
+ # Server Configuration
51
+ API_HOST=0.0.0.0
52
+ API_PORT=8000
53
+ API_WORKERS=1
54
+ FRONTEND_PORT=8501
55
+
56
+ # Request Settings
57
+ API_TIMEOUT=60
58
+ MAX_SOURCES=10
59
+ MAX_RETRIES=3
60
+ REQUEST_TIMEOUT=300
61
+
62
+ # Rate Limiting
63
+ REQUESTS_PER_MINUTE=60
64
+ RATE_LIMIT_PER_MINUTE=30
65
+ BURST_LIMIT=100
66
+
67
+ # =============================================================================
68
+ # Research Configuration
69
+ # =============================================================================
70
+
71
+ # Search Settings
72
+ DEFAULT_SEARCH_DEPTH=2
73
+ MAX_SEARCH_RESULTS=20
74
+ CONTENT_FETCH_TIMEOUT=30
75
+ MAX_CONTENT_LENGTH=50000
76
+
77
+ # Quality Thresholds
78
+ MIN_CONFIDENCE_SCORE=5.0
79
+ MIN_WORD_COUNT=100
80
+
81
+ # Context Management
82
+ MAX_CONTEXT_HISTORY=5
83
+ CONTEXT_SUMMARY_LENGTH=1000
84
+
85
+ # =============================================================================
86
+ # LLM Configuration
87
+ # =============================================================================
88
+
89
+ # Model Selection
90
+ DEFAULT_LLM_PROVIDER=gemini
91
+ GEMINI_MODEL=gemini-1.5-flash
92
+ PRIMARY_MODEL=gemini-1.5-flash
93
+ FALLBACK_MODEL=gemini-1.5-flash
94
+ ANALYSIS_MODEL=gemini-1.5-pro
95
+
96
+ # Token Limits
97
+ MAX_TOKENS=4096
98
+ MAX_INPUT_TOKENS=30000
99
+ MAX_OUTPUT_TOKENS=8000
100
+ TEMPERATURE=0.1
101
+
102
+ # =============================================================================
103
+ # Cache and Performance
104
+ # =============================================================================
105
+
106
+ # Caching
107
+ CACHE_TTL=3600
108
+ ENABLE_CACHE=true
109
+ CACHE_TYPE=memory
110
+
111
+ # Performance
112
+ PARALLEL_REQUESTS=5
113
+ BATCH_SIZE=10
114
+ ENABLE_STREAMING=true
115
+
116
+ # =============================================================================
117
+ # Security Settings
118
+ # =============================================================================
119
+
120
+ # CORS
121
+ CORS_ORIGINS=["http://localhost:8501", "http://127.0.0.1:8501"]
122
+ CORS_METHODS=["GET", "POST", "OPTIONS"]
123
+ CORS_HEADERS=["*"]
124
+
125
+ # Security Headers
126
+ ENABLE_SECURITY_HEADERS=true
127
+ SECRET_KEY=your_secret_key_here_change_in_production
128
+
129
+ # =============================================================================
130
+ # Monitoring and Observability
131
+ # =============================================================================
132
+
133
+ # Logging
134
+ LOG_FILE=logs/app.log
135
+ LOG_ROTATION=daily
136
+ LOG_RETENTION_DAYS=30
137
+
138
+ # Metrics
139
+ ENABLE_METRICS=true
140
+ METRICS_PORT=9090
141
+
142
+ # =============================================================================
143
+ # Feature Flags
144
+ # =============================================================================
145
+
146
+ # Experimental Features
147
+ ENABLE_PDF_EXPORT=false
148
+ ENABLE_WORD_CLOUD=false
149
+ ENABLE_ANALYTICS_DASHBOARD=false
150
+ ENABLE_VOICE_INTERFACE=false
151
+
152
+ # Data Sources
153
+ ENABLE_ACADEMIC_SEARCH=false
154
+ ENABLE_NEWS_SEARCH=true
155
+ ENABLE_SOCIAL_MEDIA=false
156
+
157
+ # =============================================================================
158
+ # Development Settings
159
+ # =============================================================================
160
+
161
+ # Development Only
162
+ RELOAD_ON_CHANGE=true
163
+ ENABLE_DEBUG_TOOLBAR=true
164
+ SHOW_SQL_QUERIES=false
165
+ MOCK_EXTERNAL_APIS=false
166
+
167
+ # Testing
168
+ ENABLE_TEST_MODE=false
169
+
170
+ # =============================================================================
171
+ # Production Overrides
172
+ # =============================================================================
173
+
174
+ # Uncomment these for production deployment
175
+ # ENVIRONMENT=production
176
+ # DEBUG=false
177
+ # LOG_LEVEL=WARNING
178
+ # ENABLE_SECURITY_HEADERS=true
179
+ # ENABLE_RATE_LIMITING=true
180
+ # CACHE_TYPE=redis
181
+ # REDIS_URL=redis://localhost:6379/0
182
+
183
+ # =============================================================================
184
+ # STREAMLIT CLOUD DEPLOYMENT
185
+ # =============================================================================
186
+ # When deploying to Streamlit Cloud, add these to your app secrets:
187
+ #
188
+ # Required secrets:
189
+ # GOOGLE_API_KEY = "your_gemini_api_key_here"
190
+ # GOOGLE_CSE_ID = "your_custom_search_engine_id_here"
191
+ # TOGETHER_API_KEY = "your_together_api_key_here"
192
+ # TAVILY_API_KEY = "your_tavily_api_key_here"
193
+ # LANGSMITH_API_KEY = "your_langsmith_api_key_here"
194
+ #
195
+ # Optional secrets:
196
+ # SUPABASE_URL = "your_supabase_project_url_here"
197
+ # SUPABASE_KEY = "your_supabase_anon_key_here"
198
+ # SUPABASE_SERVICE_KEY = "your_supabase_service_role_key_here"
199
+ #
200
+ # Configuration secrets:
201
+ # LANGCHAIN_TRACING_V2 = "true"
202
+ # LANGCHAIN_PROJECT = "context-aware-research-app"
203
+ # DEFAULT_LLM_PROVIDER = "gemini"
204
+ # GEMINI_MODEL = "gemini-1.5-flash"
205
+ # MAX_TOKENS = "4096"
206
+ # TEMPERATURE = "0.1"
207
+ #
208
+ # =============================================================================
209
+ # API SETUP INSTRUCTIONS
210
+ # =============================================================================
211
+ #
212
+ # 1. Google API Setup:
213
+ # a) Go to: https://console.cloud.google.com/
214
+ # b) Create new project or select existing
215
+ # c) Enable "Custom Search API"
216
+ # d) Go to "Credentials" → "Create Credentials" → "API Key"
217
+ # e) Copy the API key to GOOGLE_API_KEY
218
+ # f) Go to: https://cse.google.com/cse/
219
+ # g) Create a new search engine
220
+ # h) Copy the Search Engine ID to GOOGLE_CSE_ID
221
+ #
222
+ # 2. Together AI Setup:
223
+ # a) Sign up at: https://together.ai/
224
+ # b) Go to "API Keys" section
225
+ # c) Generate new API key
226
+ # d) Copy to TOGETHER_API_KEY
227
+ #
228
+ # 3. LangSmith Setup (Optional):
229
+ # a) Sign up at: https://smith.langchain.com/
230
+ # b) Go to "Settings" → "API Keys"
231
+ # c) Generate new API key
232
+ # d) Copy to LANGSMITH_API_KEY
233
+ #
234
+ # 4. Tavily Setup (Optional):
235
+ # a) Sign up at: https://tavily.com/
236
+ # b) Get API key from dashboard
237
+ # c) Copy to TAVILY_API_KEY
238
+ #
239
+ # 5. Supabase Setup (Optional):
240
+ # a) Sign up at: https://supabase.com/
241
+ # b) Create new project
242
+ # c) Go to "Settings" → "API"
243
+ # d) Copy URL and keys to respective variables
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV PYTHONPATH=/app
7
+
8
+ # Set work directory
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y \
13
+ build-essential \
14
+ curl \
15
+ software-properties-common \
16
+ git \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Install Python dependencies
20
+ COPY requirements.txt .
21
+ RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy application code
24
+ COPY . .
25
+
26
+ # Expose port 7860 (Hugging Face Spaces standard)
27
+ EXPOSE 7860
28
+
29
+ # Health check for backend
30
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
31
+ CMD curl -f http://localhost:7860/health || exit 1
32
+
33
+ # Run the application via app.py entry point
34
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entry point for Hugging Face Spaces deployment.
3
+ """
4
+ import uvicorn
5
+ import os
6
+ from app.main import app
7
+
8
+ if __name__ == "__main__":
9
+ port = int(os.getenv("PORT", 7860)) # Hugging Face uses port 7860
10
+ uvicorn.run(
11
+ app,
12
+ host="0.0.0.0",
13
+ port=port,
14
+ workers=1,
15
+ log_level="info"
16
+ )
docker-compose.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # Full-stack service (both backend and frontend)
5
+ research-app:
6
+ build: .
7
+ container_name: research-app-fullstack
8
+ ports:
9
+ - "8000:8000" # FastAPI backend
10
+ - "8501:8501" # Streamlit frontend
11
+ environment:
12
+ - START_MODE=both
13
+ - GOOGLE_API_KEY=${GOOGLE_API_KEY}
14
+ - GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
15
+ - TOGETHER_API_KEY=${TOGETHER_API_KEY}
16
+ - LANGSMITH_API_KEY=${LANGSMITH_API_KEY}
17
+ - TAVILY_API_KEY=${TAVILY_API_KEY}
18
+ - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2:-true}
19
+ - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT:-context-aware-research-app}
20
+ - DEFAULT_LLM_PROVIDER=${DEFAULT_LLM_PROVIDER:-gemini}
21
+ - GEMINI_MODEL=${GEMINI_MODEL:-gemini-1.5-flash}
22
+ - SUPABASE_URL=${SUPABASE_URL}
23
+ - SUPABASE_KEY=${SUPABASE_KEY}
24
+ - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY}
25
+ - DATABASE_URL=${DATABASE_URL}
26
+ env_file:
27
+ - .env
28
+ restart: unless-stopped
29
+ healthcheck:
30
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
31
+ interval: 30s
32
+ timeout: 10s
33
+ retries: 3
34
+ start_period: 40s
35
+
36
+ # Backend-only service
37
+ backend:
38
+ build: .
39
+ container_name: research-app-backend
40
+ ports:
41
+ - "8000:8000"
42
+ environment:
43
+ - START_MODE=backend
44
+ - GOOGLE_API_KEY=${GOOGLE_API_KEY}
45
+ - GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
46
+ - TOGETHER_API_KEY=${TOGETHER_API_KEY}
47
+ - LANGSMITH_API_KEY=${LANGSMITH_API_KEY}
48
+ - TAVILY_API_KEY=${TAVILY_API_KEY}
49
+ - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2:-true}
50
+ - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT:-context-aware-research-app}
51
+ - DEFAULT_LLM_PROVIDER=${DEFAULT_LLM_PROVIDER:-gemini}
52
+ - GEMINI_MODEL=${GEMINI_MODEL:-gemini-1.5-flash}
53
+ - SUPABASE_URL=${SUPABASE_URL}
54
+ - SUPABASE_KEY=${SUPABASE_KEY}
55
+ - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY}
56
+ - DATABASE_URL=${DATABASE_URL}
57
+ env_file:
58
+ - .env
59
+ restart: unless-stopped
60
+ profiles:
61
+ - backend-only
62
+
63
+ # Frontend-only service (connects to external backend)
64
+ frontend:
65
+ build: .
66
+ container_name: research-app-frontend
67
+ ports:
68
+ - "8501:8501"
69
+ environment:
70
+ - START_MODE=frontend
71
+ - API_URL=${API_URL:-http://localhost:8000}
72
+ restart: unless-stopped
73
+ profiles:
74
+ - frontend-only
75
+ depends_on:
76
+ - backend
77
+
78
+ # Networks
79
+ networks:
80
+ default:
81
+ name: research-app-network
82
+
83
+ # Volumes for persistent data (if using local database)
84
+ volumes:
85
+ app_data:
86
+ driver: local
requirements.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core LangChain and LangGraph dependencies
2
+ langgraph>=0.2.0
3
+ langchain>=0.2.0
4
+ langchain-core>=0.2.0
5
+ langchain-google-genai>=1.0.0
6
+ langchain-together>=0.1.0
7
+ langsmith>=0.1.0
8
+
9
+ # API and Web Framework
10
+ fastapi>=0.104.0
11
+ uvicorn[standard]>=0.24.0
12
+ pydantic>=2.5.0
13
+ pydantic-settings>=2.1.0
14
+
15
+ # HTTP and Web Scraping
16
+ httpx>=0.25.0
17
+ requests>=2.31.0
18
+ beautifulsoup4>=4.12.0
19
+ aiohttp>=3.9.0
20
+ brotli>=1.0.0
21
+ ddgs>=9.5.0
22
+
23
+ # External APIs for enhanced search
24
+ tavily-python>=0.3.0
25
+
26
+ # Utilities
27
+ python-dotenv>=1.0.0
28
+ structlog>=23.2.0
29
+ typer>=0.9.0
30
+ rich>=13.7.0
31
+
32
+ # Data processing
33
+ pandas>=2.1.0
34
+ numpy>=1.24.0