Tahasaif3 commited on
Commit
fe10c91
·
1 Parent(s): a9e8205

'changes'

Browse files
.env ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ OPENAI_API_KEY=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
2
+ NEON_DATABASE_URL=postgresql://neondb_owner:npg_Wm9euiPcsS6Q@ep-wispy-mountain-ah1rydmt-pooler.c-3.us-east-1.aws.neon.tech/neondb?sslmode=require&channel_binding=require
3
+ QDRANT_URL=https://29508df2-9cd7-4a02-b7c8-a69aac0cbc10.us-east-1-1.aws.cloud.qdrant.io
4
+ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.pOFSRAp6xMM69VgHnM0IGyqqKmWfSge1W2mrixWdcFg
5
+ GEMINI_API_KEY=AIzaSyAK9HhquXdPHV01qLjYszI6j19n1yGP9Js
DockerFile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.11-slim
3
+
4
+ # Set work directory
5
+ WORKDIR /app
6
+
7
+ # Install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy project files
12
+ COPY . .
13
+
14
+ # Expose the port Hugging Face expects
15
+ EXPOSE 7860
16
+
17
+ # Command to run FastAPI with uvicorn
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/config.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic_settings import BaseSettings
3
+
4
+ class Settings(BaseSettings):
5
+ OPENAI_API_KEY: str
6
+ GEMINI_API_KEY: str
7
+ # Set to "openai" or "gemini" to choose the AI provider
8
+ AI_PROVIDER: str = "openai"
9
+ DATABASE_URL: str = os.getenv("DATABASE_URL", "")
10
+ NEON_DATABASE_URL: str = os.getenv("NEON_DATABASE_URL", "")
11
+ QDRANT_URL: str = os.getenv("QDRANT_URL", "http://localhost:6333")
12
+ QDRANT_API_KEY: str = os.getenv("QDRANT_API_KEY", "")
13
+ OPENAI_MODEL_CHAT: str = "gpt-4o-mini"
14
+ OPENAI_MODEL_EMBEDDING: str = "text-embedding-3-small"
15
+ GEMINI_MODEL_CHAT: str = "gemini-2.0-flash"
16
+ GEMINI_MODEL_EMBEDDING: str = "embedding-001"
17
+
18
+ class Config:
19
+ env_file = ".env"
20
+ extra = "ignore"
21
+
22
+ settings = Settings()
app/database.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker
3
+ from sqlalchemy.ext.declarative import declarative_base
4
+ from app.config import settings
5
+
6
+ # Use NEON_DATABASE_URL if available, otherwise fall back to DATABASE_URL
7
+ SQLALCHEMY_DATABASE_URL = settings.NEON_DATABASE_URL or settings.DATABASE_URL or "sqlite:///./test.db"
8
+ engine = create_engine(SQLALCHEMY_DATABASE_URL)
9
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
10
+ Base = declarative_base()
11
+
12
+ def get_db():
13
+ db = SessionLocal()
14
+ try:
15
+ yield db
16
+ finally:
17
+ db.close()
app/main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from app.routes import chat
4
+ from app.database import engine, Base
5
+ from app.qdrant_client import init_qdrant_collection
6
+
7
+ app = FastAPI(title="RAG Chatbot API")
8
+
9
+ # CORS Configuration - Allow frontend to connect
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ # Include routers
19
+ app.include_router(chat.router)
20
+
21
+ @app.on_event("startup")
22
+ async def startup_event():
23
+ # Create database tables
24
+ Base.metadata.create_all(bind=engine)
25
+ # Initialize Qdrant collection
26
+ init_qdrant_collection()
27
+
28
+ @app.get("/")
29
+ async def root():
30
+ return {"message": "RAG Chatbot API"}
31
+
32
+ @app.get("/api/health")
33
+ async def health():
34
+ return {"status": "ok"}
app/models/__pycache__/chat.cpython-311.pyc ADDED
Binary file (1.17 kB). View file
 
app/models/__pycache__/user.cpython-311.pyc ADDED
Binary file (794 Bytes). View file
 
app/models/chat.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, func
2
+ from sqlalchemy.orm import relationship
3
+ from app.database import Base
4
+
5
+ class ChatHistory(Base):
6
+ __tablename__ = "chat_history"
7
+
8
+ id = Column(Integer, primary_key=True, index=True)
9
+ user_id = Column(Integer, ForeignKey("users.id"))
10
+ message = Column(String)
11
+ response = Column(String)
12
+ timestamp = Column(DateTime, default=func.now())
13
+
14
+ user = relationship("User")
app/models/user.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String
2
+ from app.database import Base
3
+
4
+ class User(Base):
5
+ __tablename__ = "users"
6
+
7
+ id = Column(Integer, primary_key=True, index=True)
8
+ username = Column(String, unique=True, index=True)
9
+ email = Column(String, unique=True, index=True)
app/qdrant_client.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from qdrant_client import QdrantClient
3
+ from qdrant_client.models import Distance, VectorParams
4
+ from app.config import settings
5
+
6
+ # Initialize Qdrant client
7
+ qdrant_client = QdrantClient(
8
+ url=settings.QDRANT_URL,
9
+ api_key=settings.QDRANT_API_KEY,
10
+ )
11
+
12
+ COLLECTION_NAME = "book_embeddings"
13
+
14
+ def init_qdrant_collection():
15
+ """Initialize Qdrant collection if it doesn't exist"""
16
+ try:
17
+ # Check if collection exists
18
+ collections = qdrant_client.get_collections().collections
19
+ collection_names = [col.name for col in collections]
20
+
21
+ if COLLECTION_NAME not in collection_names:
22
+ # Create collection with vector configuration
23
+ qdrant_client.create_collection(
24
+ collection_name=COLLECTION_NAME,
25
+ vectors_config=VectorParams(
26
+ size=1536, # OpenAI text-embedding-3-small dimension
27
+ distance=Distance.COSINE
28
+ )
29
+ )
30
+ print(f"✅ Created Qdrant collection: {COLLECTION_NAME}")
31
+ else:
32
+ print(f"✅ Qdrant collection already exists: {COLLECTION_NAME}")
33
+ except Exception as e:
34
+ print(f"⚠️ Warning: Could not initialize Qdrant collection: {e}")
35
+
36
+ def get_qdrant_client():
37
+ """Dependency to get Qdrant client"""
38
+ return qdrant_client
app/routes/__pycache__/chat.cpython-311.pyc ADDED
Binary file (3.74 kB). View file
 
app/routes/chat.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from qdrant_client import QdrantClient
3
+ from app.qdrant_client import get_qdrant_client
4
+ from app.schemas.chat import ChatRequest, ChatResponse, ChatSelectionRequest
5
+ from app.services.rag_service import RAGService
6
+ from app.services.embeddings_service import EmbeddingsService, GeminiEmbeddingsService
7
+ from app.services.openai_service import OpenAIService, GeminiService
8
+ from app.config import settings
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ router = APIRouter(prefix="/api", tags=["chat"])
14
+
15
+ def get_rag_service(
16
+ qdrant_client: QdrantClient = Depends(get_qdrant_client)
17
+ ):
18
+ # Choose the appropriate services based on AI_PROVIDER setting
19
+ if settings.AI_PROVIDER.lower() == "gemini":
20
+ embeddings_service = GeminiEmbeddingsService()
21
+ ai_service = GeminiService()
22
+ else:
23
+ embeddings_service = EmbeddingsService()
24
+ ai_service = OpenAIService()
25
+
26
+ return RAGService(qdrant_client, embeddings_service, ai_service)
27
+
28
+ @router.post("/chat", response_model=ChatResponse)
29
+ async def chat(
30
+ request: ChatRequest,
31
+ rag_service: RAGService = Depends(get_rag_service)
32
+ ):
33
+ try:
34
+ # Retrieve context from vector database
35
+ context = await rag_service.retrieve_context(request.question, top_k=3)
36
+
37
+ # Generate response using the configured AI service
38
+ answer = await rag_service.generate_response(request.question, context)
39
+
40
+ # Extract sources from context
41
+ sources = [f"Source {i+1}" for i in range(len(context))]
42
+
43
+ return ChatResponse(answer=answer, sources=sources)
44
+ except Exception as e:
45
+ logger.error(f"Error in chat endpoint: {str(e)}", exc_info=True)
46
+ raise HTTPException(status_code=500, detail=str(e))
47
+
48
+ @router.post("/chat-selection", response_model=ChatResponse)
49
+ async def chat_selection(
50
+ request: ChatSelectionRequest,
51
+ rag_service: RAGService = Depends(get_rag_service)
52
+ ):
53
+ try:
54
+ # Use selected text as primary context
55
+ context = [request.selected_text]
56
+
57
+ # Generate response
58
+ answer = await rag_service.generate_response(request.question, context)
59
+
60
+ return ChatResponse(answer=answer, sources=["Selected Text"])
61
+ except Exception as e:
62
+ logger.error(f"Error in chat_selection endpoint: {str(e)}", exc_info=True)
63
+ raise HTTPException(status_code=500, detail=str(e))
app/schemas/__pycache__/chat.cpython-311.pyc ADDED
Binary file (1.84 kB). View file
 
app/schemas/chat.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional
3
+ from datetime import datetime
4
+
5
+ class Message(BaseModel):
6
+ content: str
7
+ role: str
8
+
9
+ class ChatRequest(BaseModel):
10
+ question: str
11
+ user_id: Optional[int] = None
12
+
13
+ class ChatResponse(BaseModel):
14
+ answer: str
15
+ sources: List[str] = []
16
+
17
+ class ChatSelectionRequest(BaseModel):
18
+ question: str
19
+ selected_text: str
20
+ user_id: Optional[int] = None
21
+
22
+ class ChatSelectionResponse(BaseModel):
23
+ response: str
app/services/__pycache__/embeddings_service.cpython-311.pyc ADDED
Binary file (1.57 kB). View file
 
app/services/__pycache__/openai_service.cpython-311.pyc ADDED
Binary file (1.84 kB). View file
 
app/services/__pycache__/rag_service.cpython-311.pyc ADDED
Binary file (2.75 kB). View file
 
app/services/embeddings_service.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from app.config import settings
3
+ import httpx
4
+ import asyncio
5
+ import google.generativeai as genai
6
+
7
+ class EmbeddingsService:
8
+ def __init__(self):
9
+ # Use httpx client without problematic kwargs
10
+ http_client = httpx.Client()
11
+ self.client = OpenAI(api_key=settings.OPENAI_API_KEY, http_client=http_client)
12
+ self.model = "text-embedding-3-small"
13
+
14
+ async def create_embedding(self, text: str):
15
+ text = text.replace("\n", " ")
16
+ # Run the blocking OpenAI call in a thread pool
17
+ response = await asyncio.to_thread(
18
+ self.client.embeddings.create,
19
+ input=[text],
20
+ model=self.model
21
+ )
22
+ return response.data[0].embedding
23
+
24
+ class GeminiEmbeddingsService:
25
+ def __init__(self):
26
+ genai.configure(api_key=settings.GEMINI_API_KEY)
27
+ self.model = "models/" + settings.GEMINI_MODEL_EMBEDDING
28
+
29
+ async def create_embedding(self, text: str):
30
+ text = text.replace("\n", " ")
31
+ # Run the blocking Gemini call in a thread pool
32
+ response = await asyncio.to_thread(
33
+ genai.embed_content,
34
+ model=self.model,
35
+ content=text
36
+ )
37
+ return response['embedding']
app/services/openai_service.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from app.config import settings
3
+ from typing import List
4
+ import httpx
5
+ import asyncio
6
+ import google.generativeai as genai
7
+
8
+ class OpenAIService:
9
+ def __init__(self):
10
+ # Use httpx client without problematic kwargs
11
+ http_client = httpx.Client()
12
+ self.client = OpenAI(api_key=settings.OPENAI_API_KEY, http_client=http_client)
13
+ self.model = "gpt-4o-mini"
14
+
15
+ async def get_chat_response(self, prompt: str, history: List[dict] = None) -> str:
16
+ messages = []
17
+ if history:
18
+ messages.extend(history)
19
+ messages.append({"role": "user", "content": prompt})
20
+
21
+ # Run the blocking OpenAI call in a thread pool
22
+ response = await asyncio.to_thread(
23
+ self.client.chat.completions.create,
24
+ model=self.model,
25
+ messages=messages
26
+ )
27
+ return response.choices[0].message.content
28
+
29
+ class GeminiService:
30
+ def __init__(self):
31
+ genai.configure(api_key=settings.GEMINI_API_KEY)
32
+ self.model = genai.GenerativeModel(settings.GEMINI_MODEL_CHAT)
33
+
34
+ async def get_chat_response(self, prompt: str, history: List[dict] = None) -> str:
35
+ # Convert history to Gemini format
36
+ chat_history = []
37
+ if history:
38
+ for msg in history:
39
+ # Convert OpenAI roles to Gemini roles
40
+ role = "model" if msg["role"] == "assistant" else "user"
41
+ chat_history.append({"role": role, "parts": [msg["content"]]})
42
+
43
+ # Create chat session with history
44
+ chat = self.model.start_chat(history=chat_history)
45
+
46
+ # Get response
47
+ response = await asyncio.to_thread(chat.send_message, prompt)
48
+ return response.text
app/services/rag_service.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from qdrant_client import QdrantClient
3
+ from qdrant_client.models import NamedVector
4
+ from typing import List
5
+
6
+ from app.services.openai_service import OpenAIService, GeminiService
7
+ from app.services.embeddings_service import EmbeddingsService, GeminiEmbeddingsService
8
+
9
+ class RAGService:
10
+ def __init__(self, qdrant_client: QdrantClient, embeddings_service: EmbeddingsService, ai_service: OpenAIService):
11
+ self.qdrant_client = qdrant_client
12
+ self.embeddings_service = embeddings_service
13
+ self.ai_service = ai_service
14
+ self.collection_name = os.getenv("QDRANT_COLLECTION_NAME", "book_embeddings")
15
+
16
+ async def retrieve_context(self, query: str, top_k: int = 3) -> List[str]:
17
+ # Handle both OpenAI and Gemini embeddings
18
+ if isinstance(self.embeddings_service, GeminiEmbeddingsService):
19
+ query_vector = await self.embeddings_service.create_embedding(query)
20
+ else:
21
+ query_vector = await self.embeddings_service.create_embedding(query)
22
+
23
+ search_result = self.qdrant_client.search(
24
+ collection_name=self.collection_name,
25
+ query_vector=query_vector,
26
+ limit=top_k,
27
+ with_payload=True,
28
+ )
29
+
30
+ context = [point.payload.get("content", "") for point in search_result if point.payload]
31
+ return context
32
+
33
+ async def generate_response(self, query: str, context: List[str]) -> str:
34
+ full_prompt = f"""Context: {' '.join(context)}
35
+
36
+ Question: {query}
37
+
38
+ Answer:"""
39
+ # Handle both OpenAI and Gemini services
40
+ if isinstance(self.ai_service, GeminiService):
41
+ response = await self.ai_service.get_chat_response(full_prompt)
42
+ else:
43
+ response = await self.ai_service.get_chat_response(full_prompt)
44
+ return response
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn==0.30.1
3
+ openai==1.35.13
4
+ qdrant-client==1.9.0
5
+ psycopg2-binary==2.9.9
6
+ sqlalchemy==2.0.31
7
+ python-dotenv==1.0.1
8
+ pydantic==2.8.2
9
+ pydantic-settings==2.3.4
10
+ asyncpg==0.29.0
11
+ markdown==3.6
12
+ beautifulsoup4==4.12.3
13
+ google-generativeai==0.8.3
run.bat ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ setlocal
3
+
4
+ REM Navigate to the backend directory
5
+ cd backend
6
+
7
+ REM Activate virtual environment
8
+ echo Activating virtual environment...
9
+ call venv\Scripts\activate
10
+
11
+ REM Start the uvicorn server
12
+ echo Starting FastAPI application with uvicorn...
13
+ uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
14
+
15
+ endlocal
scripts/ingest_content.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ from pathlib import Path
4
+ import markdown
5
+ from bs4 import BeautifulSoup
6
+ from qdrant_client import QdrantClient
7
+ from qdrant_client.models import Distance, VectorParams, PointStruct
8
+ from dotenv import load_dotenv
9
+
10
+ # Add these to enable relative imports
11
+ import sys
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
13
+
14
+ from app.services.embeddings_service import EmbeddingsService, GeminiEmbeddingsService
15
+ from app.qdrant_client import get_qdrant_client
16
+ from app.config import settings
17
+
18
+ load_dotenv(dotenv_path=Path(__file__).resolve().parent.parent / ".env")
19
+
20
+ QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "docs_collection")
21
+
22
+ def load_mdx_content(filepath: Path) -> str:
23
+ with open(filepath, 'r', encoding='utf-8') as f:
24
+ content = f.read()
25
+ # MDX is essentially Markdown, so we can convert to HTML then extract text
26
+ html = markdown.markdown(content)
27
+ soup = BeautifulSoup(html, 'html.parser')
28
+ return soup.get_text()
29
+
30
+ def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
31
+ chunks = []
32
+ for i in range(0, len(text), chunk_size - overlap):
33
+ chunks.append(text[i:i + chunk_size])
34
+ return chunks
35
+
36
+ async def ingest_content(
37
+ docs_path: Path,
38
+ qdrant_client: QdrantClient,
39
+ embeddings_service: EmbeddingsService,
40
+ collection_name: str,
41
+ ):
42
+ # Determine vector size based on the embedding service
43
+ if isinstance(embeddings_service, GeminiEmbeddingsService):
44
+ vector_size = 768 # Gemini embedding size
45
+ else:
46
+ vector_size = 1536 # OpenAI embedding size
47
+
48
+ qdrant_client.recreate_collection(
49
+ collection_name=collection_name,
50
+ vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
51
+ )
52
+
53
+ points = []
54
+ point_id = 0
55
+ for mdx_file in docs_path.rglob("*.mdx"):
56
+ print(f"Processing {mdx_file}")
57
+ content = load_mdx_content(mdx_file)
58
+ chunks = chunk_text(content)
59
+
60
+ for chunk in chunks:
61
+ embedding = await embeddings_service.create_embedding(chunk)
62
+ points.append(
63
+ PointStruct(
64
+ id=point_id,
65
+ vector=embedding,
66
+ payload={
67
+ "content": chunk,
68
+ "source": str(mdx_file.relative_to(docs_path))
69
+ }
70
+ )
71
+ )
72
+ point_id += 1
73
+
74
+ if len(points) >= 100: # Batch upsert
75
+ qdrant_client.upsert(
76
+ collection_name=collection_name,
77
+ points=points,
78
+ wait=True,
79
+ )
80
+ points = []
81
+
82
+ if points: # Upsert remaining points
83
+ qdrant_client.upsert(
84
+ collection_name=collection_name,
85
+ points=points,
86
+ wait=True,
87
+ )
88
+
89
+ print(f"Ingestion complete. Total points: {point_id}")
90
+
91
+ if __name__ == "__main__":
92
+ parser = argparse.ArgumentParser(description="Ingest MDX content into Qdrant.")
93
+ parser.add_argument(
94
+ "--docs_path",
95
+ type=str,
96
+ default="../physical-ai-humanoid-robotics/docs/",
97
+ help="Path to the directory containing MDX documentation files."
98
+ )
99
+ args = parser.parse_args()
100
+
101
+ qdrant_client = get_qdrant_client()
102
+
103
+ # Choose the appropriate embedding service based on AI_PROVIDER setting
104
+ if settings.AI_PROVIDER.lower() == "gemini":
105
+ embeddings_service = GeminiEmbeddingsService()
106
+ else:
107
+ embeddings_service = EmbeddingsService()
108
+
109
+ # Run the async ingestion
110
+ import asyncio
111
+ asyncio.run(ingest_content(
112
+ docs_path=Path(args.docs_path),
113
+ qdrant_client=qdrant_client,
114
+ embeddings_service=embeddings_service,
115
+ collection_name=QDRANT_COLLECTION_NAME
116
+ ))
setup.bat ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ setlocal
3
+
4
+ REM Navigate to the backend directory
5
+ cd backend
6
+
7
+ REM Check if virtual environment exists, if not, create it
8
+ if not exist venv (
9
+ echo Creating virtual environment...
10
+ python -m venv venv
11
+ )
12
+
13
+ REM Activate virtual environment
14
+ echo Activating virtual environment...
15
+ call venv\Scripts\activate
16
+
17
+ REM Install dependencies
18
+ echo Installing dependencies from requirements.txt...
19
+ pip install -r requirements.txt
20
+
21
+ REM Check if .env file exists, if not, create it from .env.example
22
+ if not exist .env (
23
+ echo Creating .env from .env.example...
24
+ copy .env.example .env
25
+ )
26
+
27
+ echo Setup complete.
28
+ endlocal
29
+ pause