sliitguy commited on
Commit
f0eeb06
·
0 Parent(s):

updated initial files

Browse files
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. .gitignore +21 -0
  3. .python-version +1 -0
  4. Dockerfile +17 -0
  5. README.md +34 -0
  6. app.py +442 -0
  7. main.py +6 -0
  8. pyproject.toml +23 -0
  9. requirements.txt +15 -0
  10. uv.lock +0 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .env
13
+
14
+ __pycache__/
15
+ *.pyc
16
+ .env
17
+ .venv/
18
+ venv/
19
+ greenstep_education.db/
20
+ *.log
21
+ .DS_Store
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 as base image
2
+ FROM python:3.10.9
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy all files to container
8
+ COPY . /app
9
+
10
+ # Install Python dependencies
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ # Expose port 7860 (Hugging Face Spaces default port)
14
+ EXPOSE 7860
15
+
16
+ # Run the FastAPI app
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GreenStep Education Chatbot
3
+ emoji: 🌱
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ ---
11
+
12
+ # GreenStep Education Assistant
13
+
14
+ An AI-powered chatbot for reforestation education, providing information about tree planting, forest conservation, and environmental sustainability.
15
+
16
+ ## Features
17
+ - RAG-based question answering using PDF educational content
18
+ - MongoDB chat history persistence
19
+ - Multi-method PDF text extraction
20
+ - Conversational AI with context awareness
21
+
22
+ ## API Endpoints
23
+ - `POST /ask` - Ask questions about reforestation
24
+ - `POST /history` - Get chat history for a session
25
+ - `DELETE /history/{session_id}` - Clear session history
26
+ - `GET /health` - Health check endpoint
27
+ - `GET /` - API information
28
+
29
+ ## Environment Variables Required
30
+ Set these in your Space's Settings:
31
+ - `HF_TOKEN` - Hugging Face API token
32
+ - `GROQ_API_KEY` - Groq API key
33
+ - `MONGODB_URL` - MongoDB connection string
34
+ - `PDF_PATH` - Path to educational PDF (default: ./reforestation_content.pdf)
app.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import json
4
+ import re
5
+ import logging
6
+ import tempfile
7
+ import base64
8
+ from uuid import uuid4
9
+ from typing import Optional, List
10
+ from fastapi import FastAPI, UploadFile, File, HTTPException
11
+ from fastapi.responses import JSONResponse
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from pydantic import BaseModel
14
+ from dotenv import load_dotenv
15
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
16
+ from langchain.chains.combine_documents import create_stuff_documents_chain
17
+ from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
18
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
19
+ from langchain_core.documents import Document
20
+ from langchain_groq import ChatGroq
21
+ from langchain_huggingface import HuggingFaceEmbeddings
22
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
23
+ from langchain_community.document_loaders import PyPDFLoader
24
+ from langchain_chroma import Chroma
25
+ from pymongo import MongoClient
26
+
27
+ # Alternative PDF libraries for fallback
28
+ try:
29
+ from pypdf import PdfReader
30
+ PYPDF_AVAILABLE = True
31
+ except ImportError:
32
+ PYPDF_AVAILABLE = False
33
+
34
+ try:
35
+ import fitz # PyMuPDF
36
+ PYMUPDF_AVAILABLE = True
37
+ except ImportError:
38
+ PYMUPDF_AVAILABLE = False
39
+
40
+
41
+ # Configure logging
42
+ logging.basicConfig(level=logging.INFO)
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ # Load environment variables
47
+ load_dotenv()
48
+ HF_TOKEN = os.getenv("HF_TOKEN")
49
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
50
+ MONGODB_URL = os.getenv("MONGODB_URL")
51
+ MONGODB_DATABASE = os.getenv("MONGODB_DATABASE", "greenstep_education")
52
+ MONGODB_COLLECTION = os.getenv("MONGODB_COLLECTION", "chat_history")
53
+ HOST = os.getenv("HOST", "0.0.0.0")
54
+ PORT = int(os.getenv("PORT", 5000))
55
+ PDF_PATH = os.getenv("PDF_PATH", "./reforestation_content.pdf")
56
+
57
+
58
+ # Validate environment variables
59
+ if not all([HF_TOKEN, GROQ_API_KEY, PDF_PATH, MONGODB_URL]):
60
+ logger.error("Missing required environment variables")
61
+ raise RuntimeError("Environment variables not set. Please check HF_TOKEN, GROQ_API_KEY, PDF_PATH, and MONGODB_URL")
62
+
63
+
64
+ # Initialize MongoDB client
65
+ try:
66
+ mongo_client = MongoClient(MONGODB_URL)
67
+ mongo_client.admin.command('ping')
68
+ logger.info("MongoDB connection successful")
69
+ except Exception as e:
70
+ logger.error(f"Failed to connect to MongoDB: {str(e)}")
71
+ raise RuntimeError("MongoDB connection failed")
72
+
73
+
74
+ # Initialize FastAPI app
75
+ app = FastAPI(
76
+ title="GreenStep Education API",
77
+ description="Educational chatbot API for GreenStep reforestation app.",
78
+ version="1.0.0",
79
+ )
80
+
81
+
82
+ # Configure CORS
83
+ app.add_middleware(
84
+ CORSMiddleware,
85
+ allow_origins=["*"],
86
+ allow_credentials=True,
87
+ allow_methods=["GET", "POST", "DELETE"],
88
+ allow_headers=["*"],
89
+ )
90
+
91
+
92
+ # Initialize RAG components
93
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
94
+ llm = ChatGroq(model_name="openai/gpt-oss-20b")
95
+
96
+
97
+ def extract_text_with_pypdf(file_path: str) -> List[Document]:
98
+ """Extract text using pypdf library directly"""
99
+ try:
100
+ reader = PdfReader(file_path)
101
+ documents = []
102
+
103
+ for page_num, page in enumerate(reader.pages):
104
+ text = page.extract_text()
105
+ if text.strip(): # Only add non-empty pages
106
+ doc = Document(
107
+ page_content=text,
108
+ metadata={"source": file_path, "page": page_num}
109
+ )
110
+ documents.append(doc)
111
+
112
+ logger.info(f"pypdf extracted text from {len(documents)} pages")
113
+ return documents
114
+ except Exception as e:
115
+ logger.error(f"pypdf extraction failed: {str(e)}")
116
+ return []
117
+
118
+
119
+ def extract_text_with_pymupdf(file_path: str) -> List[Document]:
120
+ """Extract text using PyMuPDF (fitz) library - often better for complex PDFs"""
121
+ try:
122
+ doc = fitz.open(file_path)
123
+ documents = []
124
+
125
+ for page_num in range(len(doc)):
126
+ page = doc.load_page(page_num)
127
+ text = page.get_text()
128
+ if text.strip(): # Only add non-empty pages
129
+ document = Document(
130
+ page_content=text,
131
+ metadata={"source": file_path, "page": page_num}
132
+ )
133
+ documents.append(document)
134
+
135
+ doc.close()
136
+ logger.info(f"PyMuPDF extracted text from {len(documents)} pages")
137
+ return documents
138
+ except Exception as e:
139
+ logger.error(f"PyMuPDF extraction failed: {str(e)}")
140
+ return []
141
+
142
+
143
+ def process_pdf(file_path: str):
144
+ """Process PDF with multiple fallback methods for robust text extraction"""
145
+ try:
146
+ # Check if file exists
147
+ if not os.path.exists(file_path):
148
+ raise FileNotFoundError(f"PDF file not found at: {file_path}")
149
+
150
+ logger.info(f"Processing PDF from: {file_path}")
151
+ documents = []
152
+
153
+ # Method 1: Try LangChain's PyPDFLoader (uses pypdf internally)
154
+ try:
155
+ logger.info("Attempting extraction with PyPDFLoader...")
156
+ loader = PyPDFLoader(file_path)
157
+ documents = loader.load()
158
+
159
+ if documents and any(doc.page_content.strip() for doc in documents):
160
+ logger.info(f"PyPDFLoader successfully loaded {len(documents)} pages")
161
+ else:
162
+ documents = []
163
+ logger.warning("PyPDFLoader returned empty documents")
164
+ except Exception as e:
165
+ logger.warning(f"PyPDFLoader failed: {str(e)}")
166
+
167
+ # Method 2: Try direct pypdf if available and previous method failed
168
+ if not documents and PYPDF_AVAILABLE:
169
+ logger.info("Attempting extraction with pypdf directly...")
170
+ documents = extract_text_with_pypdf(file_path)
171
+
172
+ # Method 3: Try PyMuPDF as fallback (often best for complex PDFs)
173
+ if not documents and PYMUPDF_AVAILABLE:
174
+ logger.info("Attempting extraction with PyMuPDF (fitz)...")
175
+ documents = extract_text_with_pymupdf(file_path)
176
+
177
+ # Validate that documents were loaded
178
+ if not documents:
179
+ raise ValueError(
180
+ "Failed to extract text from PDF with all available methods. "
181
+ "The PDF might be:\n"
182
+ "1. Empty or corrupted\n"
183
+ "2. Password-protected\n"
184
+ "3. Scanned images without OCR (consider using pytesseract)\n"
185
+ "4. Using unsupported encryption"
186
+ )
187
+
188
+ # Check if any text was actually extracted
189
+ total_text = "".join([doc.page_content for doc in documents])
190
+ if not total_text.strip():
191
+ raise ValueError("No text content found in PDF. It may contain only images.")
192
+
193
+ logger.info(f"Successfully extracted {len(total_text)} characters from {len(documents)} pages")
194
+
195
+ # Split documents into chunks
196
+ text_splitter = RecursiveCharacterTextSplitter(
197
+ chunk_size=5000,
198
+ chunk_overlap=500,
199
+ length_function=len,
200
+ separators=["\n\n", "\n", ". ", " ", ""]
201
+ )
202
+ splits = text_splitter.split_documents(documents)
203
+
204
+ # Filter out empty chunks
205
+ splits = [doc for doc in splits if doc.page_content.strip()]
206
+
207
+ if not splits:
208
+ raise ValueError("Text splitting resulted in zero valid chunks.")
209
+
210
+ logger.info(f"Created {len(splits)} text chunks for vectorization")
211
+
212
+ # Create vectorstore
213
+ vectorstore = Chroma.from_documents(
214
+ documents=splits,
215
+ embedding=embeddings,
216
+ persist_directory="./greenstep_education.db"
217
+ )
218
+
219
+ logger.info("Vectorstore created successfully")
220
+ return vectorstore
221
+
222
+ except FileNotFoundError as e:
223
+ logger.error(f"File not found: {str(e)}")
224
+ raise RuntimeError(f"PDF file not found: {str(e)}")
225
+ except ValueError as e:
226
+ logger.error(f"Invalid PDF content: {str(e)}")
227
+ raise RuntimeError(f"PDF processing failed: {str(e)}")
228
+ except Exception as e:
229
+ logger.error(f"Unexpected error processing PDF: {str(e)}", exc_info=True)
230
+ raise RuntimeError(f"PDF processing failed: {str(e)}")
231
+
232
+
233
+ def get_session_history(session_id: str) -> MongoDBChatMessageHistory:
234
+ """Get MongoDB chat message history for a session"""
235
+ return MongoDBChatMessageHistory(
236
+ connection_string=MONGODB_URL,
237
+ session_id=session_id,
238
+ database_name=MONGODB_DATABASE,
239
+ collection_name=MONGODB_COLLECTION,
240
+ create_index=True
241
+ )
242
+
243
+
244
+ # Initialize vectorstore
245
+ try:
246
+ logger.info(f"Initializing vectorstore from PDF: {PDF_PATH}")
247
+ vectorstore = process_pdf(PDF_PATH)
248
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
249
+ logger.info("Vectorstore initialized successfully")
250
+ except Exception as e:
251
+ logger.error(f"Vectorstore initialization failed: {str(e)}")
252
+ logger.error("\nTroubleshooting steps:")
253
+ logger.error("1. Verify PDF file exists at the specified path")
254
+ logger.error("2. Ensure PDF contains extractable text (not just scanned images)")
255
+ logger.error("3. Check if PDF is password-protected")
256
+ logger.error("4. Try opening the PDF manually to verify it's not corrupted")
257
+ logger.error("\nInstall additional libraries for better PDF support:")
258
+ logger.error(" pip install pypdf pymupdf")
259
+ raise RuntimeError(f"Vectorstore initialization failed: {str(e)}")
260
+
261
+
262
+ class QuestionRequest(BaseModel):
263
+ session_id: str
264
+ question: str
265
+
266
+
267
+ class QuestionResponse(BaseModel):
268
+ answer: str
269
+
270
+
271
+ class SessionHistoryRequest(BaseModel):
272
+ session_id: str
273
+
274
+
275
+ class SessionHistoryResponse(BaseModel):
276
+ session_id: str
277
+ message_count: int
278
+ messages: List[dict]
279
+
280
+
281
+ @app.post(
282
+ "/ask",
283
+ response_model=QuestionResponse,
284
+ summary="Ask the GreenStep education assistant",
285
+ description="Submit a question to learn about reforestation, trees, forests, and environmental conservation."
286
+ )
287
+ async def ask_question(request: QuestionRequest):
288
+ """Handle question and maintain chat history in MongoDB"""
289
+ session_id = request.session_id
290
+ question = request.question
291
+ logger.info(f"Received question for session {session_id}: {question}")
292
+
293
+ try:
294
+ # Get MongoDB chat history
295
+ history = get_session_history(session_id)
296
+ all_messages = history.messages
297
+ last_messages = all_messages[-6:] if len(all_messages) > 6 else all_messages
298
+
299
+ # Contextualize question
300
+ contextualize_q_prompt = ChatPromptTemplate.from_messages([
301
+ ("system", "Rephrase the user's question considering the chat history to provide better context."),
302
+ MessagesPlaceholder("chat_history"),
303
+ ("human", "{input}")
304
+ ])
305
+
306
+ history_aware_retriever = create_history_aware_retriever(
307
+ llm, retriever, contextualize_q_prompt
308
+ )
309
+
310
+ # System prompt for GreenStep
311
+ system_prompt = """You are the GreenStep Education Assistant, a friendly, knowledgeable, and inspiring chatbot
312
+ designed to educate users about reforestation, tree planting, forest conservation, and environmental
313
+ sustainability within the GreenStep app's Education tab.
314
+
315
+ Your primary mission is to empower users with accurate, actionable knowledge about forests and their
316
+ role in combating climate change, while fostering a deep appreciation for nature and encouraging
317
+ environmental action.
318
+
319
+ Use the following verified educational content to answer questions:
320
+
321
+ {context}
322
+
323
+ Your responses should be:
324
+ 1. Educational and engaging about reforestation, tree species, planting techniques, and environmental benefits
325
+ 2. Scientifically accurate based on the provided educational content
326
+ 3. Inspiring and action-oriented, motivating users to participate in reforestation
327
+ 4. Accessible to diverse audiences with clear, jargon-free language
328
+ 5. Balanced and honest about both opportunities and challenges
329
+ 6. Interactive and conversational, building on previous discussions
330
+ 7. Positive and solutions-focused, emphasizing hope and agency
331
+
332
+ Remember: Transform users from passive learners into informed environmental advocates who
333
+ understand reforestation science and feel empowered to contribute through GreenStep.
334
+ """
335
+
336
+ qa_prompt = ChatPromptTemplate.from_messages([
337
+ ("system", system_prompt),
338
+ MessagesPlaceholder("chat_history"),
339
+ ("human", "{input}")
340
+ ])
341
+
342
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
343
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
344
+
345
+ # Get response
346
+ result = rag_chain.invoke({
347
+ "input": question,
348
+ "chat_history": last_messages
349
+ })
350
+ raw_answer = result["answer"]
351
+
352
+ # Remove <think>...</think> blocks
353
+ cleaned_answer = re.sub(r"<think>.*?</think>\s*", "", raw_answer, flags=re.DOTALL).strip()
354
+
355
+ # Update history
356
+ history.add_user_message(question)
357
+ history.add_ai_message(cleaned_answer)
358
+
359
+ logger.info(f"Response saved to MongoDB for session {session_id}")
360
+ return QuestionResponse(answer=cleaned_answer)
361
+
362
+ except Exception as e:
363
+ logger.error(f"Error processing question: {str(e)}")
364
+ raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
365
+
366
+
367
+ @app.post("/history", response_model=SessionHistoryResponse)
368
+ async def get_history(request: SessionHistoryRequest):
369
+ """Retrieve chat history for a session"""
370
+ try:
371
+ history = get_session_history(request.session_id)
372
+ messages = history.messages
373
+ messages_dict = [{"type": msg.type, "content": msg.content} for msg in messages]
374
+ return SessionHistoryResponse(
375
+ session_id=request.session_id,
376
+ message_count=len(messages),
377
+ messages=messages_dict
378
+ )
379
+ except Exception as e:
380
+ logger.error(f"Error retrieving history: {str(e)}")
381
+ raise HTTPException(status_code=500, detail=f"Failed to retrieve history: {str(e)}")
382
+
383
+
384
+ @app.delete("/history/{session_id}")
385
+ async def clear_history(session_id: str):
386
+ """Clear chat history for a session"""
387
+ try:
388
+ history = get_session_history(session_id)
389
+ history.clear()
390
+ logger.info(f"Cleared history for session {session_id}")
391
+ return {"message": f"History cleared for session {session_id}"}
392
+ except Exception as e:
393
+ logger.error(f"Error clearing history: {str(e)}")
394
+ raise HTTPException(status_code=500, detail=f"Failed to clear history: {str(e)}")
395
+
396
+
397
+ @app.get("/health")
398
+ async def health_check():
399
+ """Health check endpoint"""
400
+ try:
401
+ mongo_client.admin.command('ping')
402
+ mongo_status = "connected"
403
+ except Exception as e:
404
+ mongo_status = f"disconnected: {str(e)}"
405
+
406
+ return {
407
+ "status": "healthy",
408
+ "app": "GreenStep Education Assistant",
409
+ "mongodb": mongo_status,
410
+ "vectorstore": "initialized" if vectorstore else "not initialized",
411
+ "pdf_libraries": {
412
+ "pypdf": PYPDF_AVAILABLE,
413
+ "pymupdf": PYMUPDF_AVAILABLE
414
+ }
415
+ }
416
+
417
+
418
+ @app.get("/")
419
+ async def root():
420
+ return {
421
+ "message": "Welcome to GreenStep Education API",
422
+ "description": "Learn about reforestation, tree planting, and environmental conservation",
423
+ "endpoints": {
424
+ "ask_question": "/ask",
425
+ "get_history": "/history",
426
+ "clear_history": "/history/{session_id}",
427
+ "health_check": "/health",
428
+ "documentation": "/docs"
429
+ }
430
+ }
431
+
432
+
433
+ @app.on_event("shutdown")
434
+ async def shutdown_event():
435
+ """Close MongoDB connection"""
436
+ mongo_client.close()
437
+ logger.info("MongoDB connection closed")
438
+
439
+
440
+ if __name__ == "__main__":
441
+ import uvicorn
442
+ uvicorn.run(app, host=HOST, port=PORT)
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def main():
2
+ print("Hello from greenstepchatbot!")
3
+
4
+
5
+ if __name__ == "__main__":
6
+ main()
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "greenstepchatbot"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "dotenv>=0.9.9",
9
+ "fastapi>=0.119.0",
10
+ "langchain>=0.3.27",
11
+ "langchain-chroma>=0.2.6",
12
+ "langchain-community>=0.3.31",
13
+ "langchain-core>=0.3.79",
14
+ "langchain-groq>=0.3.8",
15
+ "langchain-huggingface>=0.3.1",
16
+ "langchain-mongodb>=0.7.1",
17
+ "pillow>=11.3.0",
18
+ "pydantic>=2.12.1",
19
+ "pypdf>=6.1.1",
20
+ "sentence-transformers>=5.1.1",
21
+ "streamlit>=1.50.0",
22
+ "uvicorn>=0.37.0",
23
+ ]
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ langchain
4
+ langchain_groq
5
+ langchain_core
6
+ langchain_community
7
+ langchain_chroma
8
+ langchain_huggingface
9
+ dotenv
10
+ pydantic
11
+ pillow
12
+ sentence-transformers
13
+ pypdf
14
+ streamlit
15
+ langchain_mongodb
uv.lock ADDED
The diff for this file is too large to render. See raw diff