Spaces:

Arnavkumar01
/

business_orbit

Sleeping

App Files Files Community

Arnavkumar01 commited on Oct 2, 2025

Commit

9b454aa

1 Parent(s): 749195f

Initial Commit

Browse files

Files changed (23) hide show

Dockerfile +20 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/__pycache__/main.cpython-310.pyc +0 -0
app/__pycache__/main.cpython-312.pyc +0 -0
app/api/__init__.py +0 -0
app/api/__pycache__/__init__.cpython-310.pyc +0 -0
app/api/__pycache__/__init__.cpython-312.pyc +0 -0
app/api/__pycache__/navigator.cpython-310.pyc +0 -0
app/api/__pycache__/navigator.cpython-312.pyc +0 -0
app/api/navigator.py +46 -0
app/core/__init__.py +0 -0
app/core/__pycache__/__init__.cpython-310.pyc +0 -0
app/core/__pycache__/security.cpython-310.pyc +0 -0
app/core/security.py +19 -0
app/main.py +29 -0
app/models.py +45 -0
app/services/__pycache__/rag_service.cpython-310.pyc +0 -0
app/services/__pycache__/rag_service.cpython-312.pyc +0 -0
app/services/rag_service.py +111 -0
app/services/rag_service_old.py +115 -0
requirements.txt +24 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# 1. Start with a lean and official Python base image
+FROM python:3.10-slim
+# 2. Set the working directory inside the container
+WORKDIR /app
+# 3. Copy only the requirements file first to leverage Docker's caching
+COPY requirements.txt .
+# 4. Install all Python dependencies from requirements.txt
+# This includes the extra PyTorch URL your file specifies.
+RUN pip install --no-cache-dir -r requirements.txt
+# 5. Now, copy your application code into the container
+# This assumes your code is in a folder named 'app'
+COPY ./app /app/app
+# 6. Define the command to run your application using Gunicorn
+# This command points to the 'app' object inside your 'main.py' file within the 'app' module.
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--timeout", "0", "app.main:app"]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (113 Bytes). View file

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (143 Bytes). View file

app/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (672 Bytes). View file

app/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (624 Bytes). View file

app/api/__init__.py ADDED Viewed

File without changes

app/api/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (117 Bytes). View file

app/api/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (147 Bytes). View file

app/api/__pycache__/navigator.cpython-310.pyc ADDED Viewed

Binary file (1.57 kB). View file

app/api/__pycache__/navigator.cpython-312.pyc ADDED Viewed

Binary file (2.01 kB). View file

app/api/navigator.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from fastapi import APIRouter, HTTPException, Depends
+from pydantic import BaseModel
+# This is the correct line
+from app.services import rag_service
+from app.core.security import get_api_key
+router = APIRouter()
+class QueryRequest(BaseModel):
+    query: str
+    top_k: int = 10
+@router.post("/navigator/query", dependencies=[Depends(get_api_key)])
+def navigator_query(request: QueryRequest):
+    """
+    Accepts a user query, performs a hybrid search to find relevant professionals,
+    generates a summary, and returns a comprehensive response.
+    """
+    if not request.query:
+        raise HTTPException(status_code=400, detail="Query cannot be empty.")
+    try:
+        # Step 1: Retrieve relevant documents using hybrid search
+        retrieved_docs = rag_service.hybrid_search(request.query, k=request.top_k)
+        # Step 2: Generate a summary response based on the retrieved docs
+        summary = rag_service.generate_summary_response(request.query, retrieved_docs)
+        # Step 3: Format the retrieved docs for a clean JSON response
+        results = [
+            {
+                # Convert the numpy.int64 to a standard Python int
+                "professional_id": int(doc.metadata.get("professional_id")),
+                "content": doc.page_content
+            }
+            for doc in retrieved_docs
+        ]
+        return {
+            "summary": summary,
+            "professionals": results
+        }
+    except Exception as e:
+        # Adding a print statement here helps with debugging
+        print(f"🔥🔥🔥 An error occurred: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

app/core/__init__.py ADDED Viewed

File without changes

app/core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (118 Bytes). View file

app/core/__pycache__/security.cpython-310.pyc ADDED Viewed

Binary file (662 Bytes). View file

app/core/security.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+from fastapi import Security, HTTPException, status
+from fastapi.security import APIKeyHeader
+# Define the name of the header we expect
+api_key_header = APIKeyHeader(name="X-API-Key")
+# Get the secret key from the environment variables
+SECRET_KEY = os.getenv("INTERNAL_API_KEY")
+async def get_api_key(api_key: str = Security(api_key_header)):
+    """Checks if the provided API key is valid."""
+    if api_key == SECRET_KEY:
+        return api_key
+    else:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid or missing API Key",
+        )

app/main.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# app/main.py
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from .api import navigator
+# This is your main FastAPI application instance
+app = FastAPI(title="Navigator AI API")
+# This is the "guest list" of addresses that are allowed to connect.
+# Add the default address for the Next.js development server.
+origins = [
+    "http://localhost:3000", # For local development of your frontend
+    "https://business-orbit.onrender.com", # Your production frontend URL
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods (GET, POST, etc.)
+    allow_headers=["*"],  # Allows all headers
+)
+# This line includes the API routes from navigator.py
+app.include_router(navigator.router, prefix="/api")
+@app.get("/")
+def read_root():
+    return {"Status": "API is running"}

app/models.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# app/models.py
+from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+# In newer versions of SQLAlchemy, declarative_base is imported from sqlalchemy.orm
+Base = declarative_base()
+class Professional(Base):
+    __tablename__ = 'professionals_denormalized'
+    id = Column(Integer, primary_key=True) # SERIAL PRIMARY KEY is handled
+    name = Column(String(255), nullable=False)
+    professional_role = Column(String(255))
+    email = Column(String(255), unique=True, index=True, nullable=False)
+    phone = Column(String(20), unique=True) # Added unique constraint based on common usage
+    password_hash = Column(String(255), nullable=False)
+    # New and updated text/varchar fields
+    profile_photo_url = Column(Text)
+    profile_photo_id = Column(String(255))
+    banner_url = Column(Text)
+    banner_id = Column(String(255))
+    skills = Column(Text) # Changed from JSON to Text to match your SQL
+    description = Column(Text)
+    google_id = Column(String(255))
+    linkedin_id = Column(String(255))
+    role = Column(String(50))
+    avatar = Column(String(5)) # Added length limit
+    # Boolean and Integer fields
+    is_admin = Column(Boolean, default=False)
+    rewardScore = Column("rewardScore", Integer, default=0) # Explicitly name to handle case-sensitivity
+    matchScore = Column("matchScore", Integer, default=0)
+    # New location fields
+    location1 = Column(String(255))
+    location2 = Column(String(255))
+    location3 = Column(String(255))
+    location4 = Column(String(255))
+    location5 = Column(String(255))
+    # Kept the automatic timestamping from your original model as it's a best practice
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+    updated_at = Column(DateTime(timezone=True), onupdate=func.now())

app/services/__pycache__/rag_service.cpython-310.pyc ADDED Viewed

Binary file (4.29 kB). View file

app/services/__pycache__/rag_service.cpython-312.pyc ADDED Viewed

Binary file (6.04 kB). View file

app/services/rag_service.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# file: rag_service.py
+import os
+import pandas as pd
+from sqlalchemy import create_engine
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores.pgvector import PGVector # Replaces FAISS
+from rank_bm25 import BM25Okapi
+import google.generativeai as genai
+from dotenv import load_dotenv
+# --- CONFIGURATION ---
+load_dotenv()
+MODEL_NAME = "BAAI/bge-large-en-v1.5"
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+DB_URL = os.getenv("DATABASE_URL")
+COLLECTION_NAME = "professionals_embeddings_hybrid" # Must match the ingestion script
+# --- INITIALIZATION ---
+# Initialize Gemini (Your original code)
+genai.configure(api_key=GEMINI_API_KEY)
+llm = genai.GenerativeModel('gemini-1.5-pro') # Using 1.5-pro as it's the latest powerful model
+# Initialize Embedding Model (Your original code)
+model_kwargs = {'device': 'cpu'}
+encode_kwargs = {'normalize_embeddings': True}
+embeddings = HuggingFaceEmbeddings(
+    model_name=MODEL_NAME,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs
+)
+# Load data and initialize BM25 on startup (Your original logic)
+print("Initializing BM25 Keyword Search...")
+engine = create_engine(DB_URL)
+df = pd.read_sql('SELECT id, name, professional_role, skills, description FROM professionals_denormalized', engine)
+corpus = (df['name'] + ". " + df['professional_role'] + ". " + df['skills'] + ". " + df['description']).fillna("").tolist()
+tokenized_corpus = [doc.lower().split(" ") for doc in corpus]
+bm25 = BM25Okapi(tokenized_corpus)
+print("BM25 Initialized.")
+# --- HYBRID SEARCH LOGIC ---
+def reciprocal_rank_fusion(ranked_lists, k=60):
+    # This function is unchanged
+    fused_scores = {}
+    for doc_list in ranked_lists:
+        for i, doc in enumerate(doc_list):
+            doc_id = doc.metadata['professional_id']
+            if doc_id not in fused_scores:
+                fused_scores[doc_id] = 0
+            fused_scores[doc_id] += 1 / (i + k)
+    reranked_results = sorted(fused_scores.items(), key=lambda item: item[1], reverse=True)
+    return reranked_results
+def hybrid_search(query: str, k: int = 10):
+    # 1. Keyword Search (BM25) - This logic is unchanged
+    tokenized_query = query.lower().split(" ")
+    bm25_scores = bm25.get_scores(tokenized_query)
+    top_n_bm25_indices = sorted(range(len(bm25_scores)), key=lambda i: bm25_scores[i], reverse=True)[:k]
+    bm25_results = []
+    for i in top_n_bm25_indices:
+        metadata = {"professional_id": df.iloc[i]['id']}
+        bm25_results.append(type('Document', (), {'page_content': corpus[i], 'metadata': metadata})())
+    # 2. Semantic Search (THE ONLY CHANGE IS HERE)
+    # Instead of loading from a file, we connect to the live database.
+    vector_store = PGVector(
+        connection_string=DB_URL,
+        embedding_function=embeddings,
+        collection_name=COLLECTION_NAME
+    )
+    semantic_results = vector_store.similarity_search(query, k=k)
+    # 3. Fuse the results - This logic is unchanged
+    fused_ranks = reciprocal_rank_fusion([semantic_results, bm25_results])
+    # 4. Retrieve the final documents - This logic is unchanged
+    final_results = []
+    for doc_id, score in fused_ranks[:k]:
+        found_doc = next((doc for doc in semantic_results if doc.metadata['professional_id'] == doc_id), None)
+        if not found_doc:
+            found_doc = next((doc for doc in bm25_results if doc.metadata['professional_id'] == doc_id), None)
+        if found_doc:
+            final_results.append(found_doc)
+    return final_results
+# --- GENERATION LOGIC ---
+def generate_summary_response(query: str, retrieved_docs: list):
+    # This function is unchanged
+    if not retrieved_docs:
+        return "I could not find any professionals that match your query."
+    context = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
+    prompt = f"""
+    You are the Navigator AI assistant. Your task is to provide a helpful and concise summary based on the provided professional profiles.
+    Based on the following retrieved profiles:
+    ---
+    {context}
+    ---
+    Please answer the user's original query: "{query}"
+    Your response should be a brief, natural language summary that synthesizes the information and highlights why these professionals are a good match.
+    """
+    try:
+        response = llm.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        print(f"🔥 Gemini API call failed: {e}")
+        return "I was unable to generate a summary at this time."

app/services/rag_service_old.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# app/services/rag_service.py
+import os
+import pandas as pd
+from sqlalchemy import create_engine
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from rank_bm25 import BM25Okapi
+import google.generativeai as genai
+from dotenv import load_dotenv
+# --- CONFIGURATION ---
+load_dotenv()
+VECTOR_STORE_PATH = "vector_store"
+MODEL_NAME = "BAAI/bge-large-en-v1.5"
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# --- INITIALIZATION ---
+# Initialize Gemini
+genai.configure(api_key=GEMINI_API_KEY)
+llm = genai.GenerativeModel('gemini-2.5-pro')
+# Initialize Embedding Model
+model_kwargs = {'device': 'cpu'}
+encode_kwargs = {'normalize_embeddings': True}
+embeddings = HuggingFaceEmbeddings(
+    model_name=MODEL_NAME,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs
+)
+# Load the vector store on startup
+vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
+# Load data for BM25 Keyword Search
+db_url = os.getenv("DATABASE_URL")
+engine = create_engine(db_url)
+df = pd.read_sql('SELECT id, name, professional_role, skills, description FROM professionals_denormalized', engine)
+# Create the text corpus for BM25
+corpus = (df['name'] + ". " + df['professional_role'] + ". " + df['skills'] + ". " + df['description']).fillna("").tolist()
+tokenized_corpus = [doc.lower().split(" ") for doc in corpus]
+bm25 = BM25Okapi(tokenized_corpus)
+# --- HYBRID SEARCH LOGIC ---
+def reciprocal_rank_fusion(ranked_lists, k=60):
+    fused_scores = {}
+    for doc_list in ranked_lists:
+        for i, doc in enumerate(doc_list):
+            doc_id = doc.metadata['professional_id']
+            if doc_id not in fused_scores:
+                fused_scores[doc_id] = 0
+            fused_scores[doc_id] += 1 / (i + k)
+    reranked_results = sorted(fused_scores.items(), key=lambda item: item[1], reverse=True)
+    return reranked_results
+def hybrid_search(query: str, k: int = 10):
+    # 1. Keyword Search (BM25)
+    tokenized_query = query.lower().split(" ")
+    bm25_scores = bm25.get_scores(tokenized_query)
+    top_n_bm25_indices = sorted(range(len(bm25_scores)), key=lambda i: bm25_scores[i], reverse=True)[:k]
+    bm25_results = []
+    for i in top_n_bm25_indices:
+        metadata = {"professional_id": df.iloc[i]['id']}
+        bm25_results.append(type('Document', (), {'page_content': corpus[i], 'metadata': metadata})())
+    # 2. Semantic Search (FAISS)
+    semantic_results = vector_store.similarity_search(query, k=k)
+    # 3. Fuse the results
+    fused_ranks = reciprocal_rank_fusion([semantic_results, bm25_results])
+    # 4. Retrieve the final documents
+    final_results = []
+    for doc_id, score in fused_ranks[:k]:
+        # Find the original document from our FAISS results or BM25 results
+        found_doc = next((doc for doc in semantic_results if doc.metadata['professional_id'] == doc_id), None)
+        if not found_doc:
+            found_doc = next((doc for doc in bm25_results if doc.metadata['professional_id'] == doc_id), None)
+        if found_doc:
+            final_results.append(found_doc)
+    return final_results
+# --- GENERATION LOGIC ---
+def generate_summary_response(query: str, retrieved_docs: list):
+    """
+    Generates a natural language summary using the retrieved documents.
+    """
+    if not retrieved_docs:
+        return "I could not find any professionals that match your query.", []
+    # Prepare context from retrieved docs
+    context = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
+    prompt = f"""
+    You are the Navigator AI assistant. Your task is to provide a helpful and concise summary based on the provided professional profiles.
+    Based on the following retrieved profiles:
+    ---
+    {context}
+    ---
+    Please answer the user's original query: "{query}"
+    Your response should be a brief, natural language summary that synthesizes the information and highlights why these professionals are a good match.
+    """
+    try:
+        response = llm.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        print(f"🔥 Gemini API call failed: {e}")
+        return "I was unable to generate a summary at this time."

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# This line tells pip to use the main repository AND the PyTorch CPU repository
+--extra-index-url https://download.pytorch.org/whl/cpu
+# Core API Framework
+fastapi==0.115.13
+uvicorn==0.34.3
+gunicorn==23.0.0
+# Database & Vector Store
+psycopg2-binary==2.9.10
+SQLAlchemy==2.0.40
+pgvector==0.4.1
+# RAG & Machine Learning
+langchain-huggingface==0.3.1
+langchain-community==0.3.30
+sentence-transformers==5.1.1
+rank-bm25==0.2.2
+google-generativeai==0.8.5
+pandas==2.2.3
+torch
+# Utilities
+python-dotenv==1.1.0