Gaykar commited on
Commit
9bc7a47
·
1 Parent(s): 4ec07e1

add to ig

Browse files
.gitignore CHANGED
@@ -5,6 +5,8 @@ google-credentials.json
5
  !composer.json # If you use composer (optional)
6
  /venv
7
 
 
 
8
  # --- Python Environment ---
9
  __pycache__/
10
  *.py[cod]
 
5
  !composer.json # If you use composer (optional)
6
  /venv
7
 
8
+
9
+ /Notebooks
10
  # --- Python Environment ---
11
  __pycache__/
12
  *.py[cod]
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.12-slim
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the requirements.txt file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install the dependencies
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+
16
+ # Switch to the "user" user
17
+ USER user
18
+
19
+ # Set home to the user's home directory
20
+ ENV HOME=/home/user \
21
+ PATH=/home/user/.local/bin:$PATH
22
+
23
+ # Set the working directory to the user's home directory
24
+ WORKDIR $HOME/app
25
+
26
+ # Copy the current directory contents into the container at $HOME/app
27
+ COPY --chown=user . $HOME/app
28
+
29
+ EXPOSE 7860
30
+
31
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
Notebooks/CodeForge.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
app/core/config.py CHANGED
@@ -1,16 +1,16 @@
1
  from pathlib import Path
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
 
4
- BASE_DIR = Path(__file__).resolve().parent.parent
5
 
6
  class Settings(BaseSettings):
7
  PROJECT_NAME: str = "Adaptive Onboarding Engine"
8
 
9
  GROQ_API_KEY: str
10
  PINECONE_API_KEY: str
11
- CLOUDINARY_CLOUD_NAME: str
12
- CLOUDINARY_API_KEY: str
13
- CLOUDINARY_API_SECRET: str
14
 
15
  model_config = SettingsConfigDict(
16
  env_file=str(BASE_DIR / ".env"),
 
1
  from pathlib import Path
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
 
4
+ BASE_DIR = Path(__file__).resolve().parent.parent.parent
5
 
6
  class Settings(BaseSettings):
7
  PROJECT_NAME: str = "Adaptive Onboarding Engine"
8
 
9
  GROQ_API_KEY: str
10
  PINECONE_API_KEY: str
11
+ # CLOUDINARY_CLOUD_NAME: str
12
+ # CLOUDINARY_API_KEY: str
13
+ # CLOUDINARY_API_SECRET: str
14
 
15
  model_config = SettingsConfigDict(
16
  env_file=str(BASE_DIR / ".env"),
app/graph.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  from app.state.state import OnboardingState
3
  from app.nodes.graphnodes import *
4
  from langgraph.prebuilt import ToolNode ,tools_condition
 
 
1
  from app.state.state import OnboardingState
2
  from app.nodes.graphnodes import *
3
  from langgraph.prebuilt import ToolNode ,tools_condition
app/main.py CHANGED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import tempfile
3
+ import os
4
+ from pathlib import Path
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from langgraph.checkpoint.memory import MemorySaver
8
+
9
+ from app.graph import graph
10
+
11
+ app = FastAPI(title="Adaptive Onboarding Engine")
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"], # tighten this in production
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ checkpointer = MemorySaver()
21
+
22
+ # -----------------------------
23
+ # Payload Builder
24
+ # (inline from your export_ui_payload logic)
25
+ # -----------------------------
26
+
27
+ REQUIRED_KEYS = ["candidate_name", "skill_gap_analysis_data", "mermaid_code", "final_roadmap"]
28
+
29
+ def build_ui_payload(state: dict) -> dict:
30
+ ui_data = {}
31
+ for key in REQUIRED_KEYS:
32
+ val = state.get(key)
33
+ if val is None:
34
+ continue
35
+ if hasattr(val, "model_dump"):
36
+ ui_data[key] = val.model_dump()
37
+ else:
38
+ ui_data[key] = val
39
+ return ui_data
40
+
41
+
42
+ # -----------------------------
43
+ # POST /analyze
44
+ # Accepts: resume PDF (file upload) + job description (form field)
45
+ # Returns: UI payload JSON
46
+ # -----------------------------
47
+
48
+ @app.post("/analyze")
49
+ async def analyze(
50
+ resume: UploadFile = File(..., description="Resume PDF file"),
51
+ job_description: str = Form(..., description="Job description text"),
52
+ candidate_name: str = Form(default="Candidate"),
53
+ ):
54
+ # 1. Save uploaded PDF to a temp file
55
+ tmp_path = None
56
+ try:
57
+ suffix = Path(resume.filename).suffix or ".pdf"
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
59
+ content = await resume.read()
60
+ tmp.write(content)
61
+ tmp_path = tmp.name
62
+
63
+ # 2. Build initial graph state
64
+ initial_input = {
65
+ "candidate_name": candidate_name,
66
+ "resume_text": None,
67
+ "job_description": job_description,
68
+ "file_path": tmp_path, # local temp path for PyMuPDF
69
+ "resume_data": None,
70
+ "extraction_error": None,
71
+ "JobDescriptionExtract_data": None,
72
+ "skill_gap_analysis_data": None,
73
+ "messages": [],
74
+ "mermaid_code": None,
75
+ "final_roadmap": None,
76
+ }
77
+
78
+ # 3. Run the graph
79
+ thread_id = str(uuid.uuid4())
80
+ config = {"configurable": {"thread_id": thread_id}}
81
+
82
+ final_state = graph.invoke(initial_input, config=config)
83
+
84
+ # 4. Check for extraction errors
85
+ if final_state.get("extraction_error"):
86
+ raise HTTPException(
87
+ status_code=422,
88
+ detail=f"Extraction failed: {final_state['extraction_error']}"
89
+ )
90
+
91
+ # 5. Build and return UI payload
92
+ return build_ui_payload(final_state)
93
+
94
+ except HTTPException:
95
+ raise
96
+
97
+ except Exception as e:
98
+ raise HTTPException(status_code=500, detail=str(e))
99
+
100
+ finally:
101
+ # 6. Clean up temp file
102
+ if tmp_path and os.path.exists(tmp_path):
103
+ os.remove(tmp_path)
104
+
105
+
106
+ # -----------------------------
107
+ # GET /health
108
+ # -----------------------------
109
+
110
+ @app.get("/health")
111
+ def health():
112
+ return {"status": "ok", "service": "Adaptive Onboarding Engine"}
113
+
114
+
115
+
116
+ if __name__ == "__main__":
117
+ import uvicorn
118
+ uvicorn.run(app, host="127.0.0.1", port=8000)
app/tools/tools.py CHANGED
@@ -4,6 +4,8 @@ from app.utils.vectordatabase import retriever
4
  from app.schemas.pydanticschema import LearningRoadmap
5
  import json
6
  from typing import Dict, List,Any
 
 
7
 
8
  @tool
9
  def search_courses(query: str, level: str, category: str):
@@ -111,8 +113,16 @@ class CourseLookup:
111
  """Retrieves full details of a course by its ID."""
112
  return self.courses_map.get(course_id)
113
 
 
 
 
 
 
 
 
 
 
114
 
115
- lookup_service = CourseLookup("Catalog.json")
116
 
117
  @tool
118
  def get_course_by_id(course_id: str) -> str:
 
4
  from app.schemas.pydanticschema import LearningRoadmap
5
  import json
6
  from typing import Dict, List,Any
7
+ from pathlib import Path
8
+ BASE_DIR = Path(__file__).resolve().parent
9
 
10
  @tool
11
  def search_courses(query: str, level: str, category: str):
 
113
  """Retrieves full details of a course by its ID."""
114
  return self.courses_map.get(course_id)
115
 
116
+ DATA_PATH = BASE_DIR / "Catalog.json"
117
+
118
+ if DATA_PATH.exists():
119
+ lookup_service = CourseLookup(DATA_PATH)
120
+
121
+ else:
122
+ raise FileNotFoundError(f"Catalog file not found: {DATA_PATH}")
123
+
124
+
125
 
 
126
 
127
  @tool
128
  def get_course_by_id(course_id: str) -> str:
app/utils/bm25.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ee33742ce995054481aaba1f309536b1b8991754c1a1627743076d64aba478
3
+ size 5752
app/utils/vectordatabase.py CHANGED
@@ -1,14 +1,14 @@
1
  import json
2
  import pickle
3
- import torch
4
  from pathlib import Path
5
  from typing import List
6
 
7
  from pinecone import Pinecone, ServerlessSpec
8
  from pinecone_text.sparse import BM25Encoder
9
- from langchain_huggingface import HuggingFaceEmbeddings
10
  from langchain_community.retrievers import PineconeHybridSearchRetriever
11
  from langchain_core.documents import Document
 
12
 
13
  from app.core.config import settings
14
 
@@ -23,25 +23,38 @@ BM25_PKL_PATH = BASE_DIR / "bm25.pkl"
23
 
24
 
25
  # -----------------------------
26
- # Device
 
27
  # -----------------------------
28
 
29
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
- print(f"Using device: {device}")
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # -----------------------------
34
- # Embeddings
35
- # -----------------------------
36
 
37
- embeddings = HuggingFaceEmbeddings(
38
- model_name="sentence-transformers/all-MiniLM-L6-v2",
39
- model_kwargs={"device": str(device)}
40
  )
41
 
42
 
43
  # -----------------------------
44
- # Load Documents from JSON
45
  # -----------------------------
46
 
47
  def load_documents(data_path: Path) -> List[Document]:
 
1
  import json
2
  import pickle
3
+ import requests
4
  from pathlib import Path
5
  from typing import List
6
 
7
  from pinecone import Pinecone, ServerlessSpec
8
  from pinecone_text.sparse import BM25Encoder
 
9
  from langchain_community.retrievers import PineconeHybridSearchRetriever
10
  from langchain_core.documents import Document
11
+ from langchain_core.embeddings import Embeddings
12
 
13
  from app.core.config import settings
14
 
 
23
 
24
 
25
  # -----------------------------
26
+ # General Remote Embeddings
27
+ # aviods cold starts
28
  # -----------------------------
29
 
30
+ class GeneralRemoteEmbeddings(Embeddings):
31
+ def __init__(self, endpoint: str):
32
+ self.endpoint = endpoint
33
 
34
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
35
+ response = requests.post(
36
+ f"{self.endpoint}/embed_docs",
37
+ json={"texts": texts}
38
+ )
39
+ response.raise_for_status()
40
+ return response.json()["embeddings"]
41
+
42
+ def embed_query(self, text: str) -> List[float]:
43
+ response = requests.post(
44
+ f"{self.endpoint}/embed_query",
45
+ json={"text": text}
46
+ )
47
+ response.raise_for_status()
48
+ return response.json()["embedding"]
49
 
 
 
 
50
 
51
+ embeddings = GeneralRemoteEmbeddings(
52
+ endpoint="https://gaykar-generalembeddings.hf.space"
 
53
  )
54
 
55
 
56
  # -----------------------------
57
+ # Load Documents
58
  # -----------------------------
59
 
60
  def load_documents(data_path: Path) -> List[Document]:
requirements.txt CHANGED
@@ -7,3 +7,4 @@ langchain_community==0.4.1
7
  fastapi==0.118.1
8
  uvicorn
9
  pinecone-text
 
 
7
  fastapi==0.118.1
8
  uvicorn
9
  pinecone-text
10
+ sentence-transformers