Spaces:

Rishabh2095
/

AgentWorkflowJobApplications

Sleeping

App Files Files Community

Rishabh2095 commited on Jan 16

Commit

c7be661

1 Parent(s): 8349858

Modified Dockerfile and docker-compose for HF Deployement

Browse files

Files changed (6) hide show

Dockerfile +63 -12
README.md +1 -0
docker-compose.yml +22 -3
src/job_writing_agent/agents/nodes.py +36 -11
src/job_writing_agent/classes/__init__.py +2 -16
src/job_writing_agent/utils/document_processing.py +5 -5

Dockerfile CHANGED Viewed

@@ -1,32 +1,83 @@
 # syntax=docker/dockerfile:1.4
 FROM langchain/langgraph-api:3.12
-# HuggingFace Spaces requires port 7860
-ENV PORT=7860
-ENV LANGGRAPH_PORT=7860
 ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", "research_workflow": "/deps/job_writer/src/job_writing_agent/nodes/research_workflow.py:research_workflow", "data_loading_workflow": "/deps/job_writer/src/job_writing_agent/nodes/data_loading_workflow.py:data_loading_workflow"}'
-COPY pyproject.toml langgraph.json /deps/job_writer/
-COPY src/ /deps/job_writer/src/
-RUN for dep in /deps/*; do \
   if [ -d "$dep" ]; then \
   echo "Installing $dep"; \
-  (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); \
   fi; \
   done
-# Use cache mount for Playwright - browsers persist between builds!
 RUN --mount=type=cache,target=/root/.cache/ms-playwright \
-  playwright install chromium && \
-  playwright install-deps
 RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && \
   touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py && \
-  PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
 WORKDIR /deps/job_writer
 # Expose port for HuggingFace Spaces
-EXPOSE 7860

 # syntax=docker/dockerfile:1.4
 FROM langchain/langgraph-api:3.12
+# Set Python environment variables (best practice)
+ENV PYTHONUNBUFFERED=1 \
+  PYTHONDONTWRITEBYTECODE=1 \
+  PORT=7860 \
+  LANGGRAPH_PORT=7860
+# Create user with UID 1000 for HuggingFace Spaces compatibility
+RUN useradd -m -u 1000 hf_user
 ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", "research_workflow": "/deps/job_writer/src/job_writing_agent/nodes/research_workflow.py:research_workflow", "data_loading_workflow": "/deps/job_writer/src/job_writing_agent/nodes/data_loading_workflow.py:data_loading_workflow"}'
+# Copy package metadata and structure files (needed for editable install)
+COPY --chown=hf_user:hf_user pyproject.toml langgraph.json README.md /deps/job_writer/
+# Create src directory structure (needed for setuptools to find packages)
+RUN mkdir -p /deps/job_writer/src
+# Copy source code (required for editable install)
+COPY --chown=hf_user:hf_user src/ /deps/job_writer/src/
+# Install Python dependencies as ROOT using --system flag
+# Using cache mount for faster rebuilds
+RUN --mount=type=cache,target=/root/.cache/uv \
+  for dep in /deps/*; do \
   if [ -d "$dep" ]; then \
   echo "Installing $dep"; \
+  (cd "$dep" && uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); \
   fi; \
   done
+# Install Playwright system dependencies (after playwright package is installed)
+RUN playwright install-deps chromium
+# Install Playwright browser binaries (with cache mount)
 RUN --mount=type=cache,target=/root/.cache/ms-playwright \
+  playwright install chromium
+# Create API directories and install langgraph-api as ROOT
 RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && \
   touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py && \
+  uv pip install --system --no-cache-dir --no-deps -e /api
+# Fix permissions for packages that write to their own directories
+# Make ONLY the specific directories writable (not entire site-packages)
+RUN mkdir -p /usr/local/lib/python3.12/site-packages/litellm/litellm_core_utils/tokenizers && \
+  chown -R hf_user:hf_user /usr/local/lib/python3.12/site-packages/litellm/litellm_core_utils/tokenizers && \
+  chmod -R u+w /usr/local/lib/python3.12/site-packages/litellm/litellm_core_utils/tokenizers
+# Create user cache directories with proper permissions (BEFORE switching user)
+# Following XDG Base Directory Specification: https://specifications.freedesktop.org/basedir-spec/
+RUN mkdir -p /home/hf_user/.cache/tiktoken \
+  /home/hf_user/.cache/litellm \
+  /home/hf_user/.cache/huggingface \
+  /home/hf_user/.cache/torch \
+  /home/hf_user/.local/share && \
+  chown -R hf_user:hf_user /home/hf_user/.cache /home/hf_user/.local
+# Switch to hf_user for runtime (after all root operations)
+USER hf_user
+# Set environment variables following XDG Base Directory Specification
+# This ensures all packages respect standard cache locations
+ENV HOME=/home/hf_user \
+  PATH="/home/hf_user/.local/bin:$PATH" \
+  XDG_CACHE_HOME=/home/hf_user/.cache \
+  XDG_DATA_HOME=/home/hf_user/.local/share \
+  XDG_CONFIG_HOME=/home/hf_user/.config \
+  # Package-specific cache directories (for packages that don't fully respect XDG)
+  TIKTOKEN_CACHE_DIR=/home/hf_user/.cache/tiktoken \
+  HF_HOME=/home/hf_user/.cache/huggingface \
+  TORCH_HOME=/home/hf_user/.cache/torch
 WORKDIR /deps/job_writer
 # Expose port for HuggingFace Spaces
+EXPOSE 7860
+# Healthcheck (LangGraph API typically has /ok endpoint)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/ok')" || exit 1

README.md CHANGED Viewed

@@ -6,6 +6,7 @@ colorTo: purple
 sdk: docker
 app_port: 7860
 pinned: false
 ---
 # Job Writer Module

 sdk: docker
 app_port: 7860
 pinned: false
+python_version: 3.12.8
 ---
 # Job Writer Module

docker-compose.yml CHANGED Viewed

@@ -9,6 +9,8 @@ services:
       interval: 5s
       timeout: 3s
       retries: 5
     networks:
       - job-app-network
@@ -26,17 +28,18 @@ services:
       interval: 5s
       timeout: 5s
       retries: 5
     volumes:
       - pg_data_local:/var/lib/postgresql/data
     networks:
       - job-app-network
-  # Optional: Uncomment to run your agent container alongside Redis/Postgres
   agent:
     build:
       context: .
       dockerfile: Dockerfile
-    image: job-app-workflow:latest
     container_name: job-app-agent
     ports:
       - "7860:7860"
@@ -47,6 +50,13 @@ services:
       - DATABASE_URI=postgresql://postgres:postgres@postgres:5432/postgres
     env_file:
       - .docker_env
     depends_on:
       redis:
         condition: service_healthy
@@ -54,10 +64,19 @@ services:
         condition: service_healthy
     networks:
       - job-app-network
 networks:
   job-app-network:
     driver: bridge
 volumes:
-  pg_data_local:

       interval: 5s
       timeout: 3s
       retries: 5
+      start_period: 10s
+    restart: unless-stopped
     networks:
       - job-app-network
       interval: 5s
       timeout: 5s
       retries: 5
+      start_period: 10s
     volumes:
       - pg_data_local:/var/lib/postgresql/data
+    restart: unless-stopped
     networks:
       - job-app-network
   agent:
     build:
       context: .
       dockerfile: Dockerfile
+    image: job-app-workflow:latest  # Consider versioned tag in production
     container_name: job-app-agent
     ports:
       - "7860:7860"
       - DATABASE_URI=postgresql://postgres:postgres@postgres:5432/postgres
     env_file:
       - .docker_env
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:7860/ok')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    restart: unless-stopped
     depends_on:
       redis:
         condition: service_healthy
         condition: service_healthy
     networks:
       - job-app-network
+    # Optional: Resource limits (uncomment for production)
+    # deploy:
+    #   resources:
+    #     limits:
+    #       cpus: '2'
+    #       memory: 4G
+    #     reservations:
+    #       cpus: '1'
+    #       memory: 2G
 networks:
   job-app-network:
     driver: bridge
 volumes:
+  pg_data_local:

src/job_writing_agent/agents/nodes.py CHANGED Viewed

@@ -94,11 +94,13 @@ def create_draft(state: ResearchState) -> ResultState:
     logger.info(f"Draft has been created: {response.content}")
     app_state = ResultState(
-        draft=response.content,
         feedback="",
         critique_feedback="",
         current_node="create_draft",
-        output_data={},
     )
     return app_state
@@ -116,9 +118,6 @@ def critique_draft(state: ResultState) -> ResultState:
         company_research_data = state.get("company_research_data", {})
         job_description = str(company_research_data.get("job_description", ""))
         draft_content = str(state.get("draft", ""))
-        feedback = state.get("feedback", "")
-        output_data = state.get("output_data", "")
-        current_node = state.get("current_node", "")
         # Debug logging to verify values
         logger.debug(f"Job description length: {len(job_description)}")
@@ -126,8 +125,16 @@ def critique_draft(state: ResultState) -> ResultState:
         # Early return if required fields are missing
         if not job_description or not draft_content:
-            logger.warning("Missing job_description or draft in state")
-            return ResultState(**state, current_node=current_node)
         # Create LLM inside function (lazy initialization)
         llm_provider = LLMFactory()
@@ -198,7 +205,13 @@ def critique_draft(state: ResultState) -> ResultState:
         # Store the critique - using validated variables from top of function
         return ResultState(
-            **state, critique_feedback=critique_content, current_node=current_node
         )
     except Exception as e:
@@ -232,7 +245,15 @@ def human_approval(state: ResultState) -> ResultState:
     print(f"Human feedback: {human_feedback}")
-    return ResultState(**state, feedback=human_feedback, current_node="human_approval")
 def finalize_document(state: ResultState) -> ResultState:
@@ -278,16 +299,20 @@ def finalize_document(state: ResultState) -> ResultState:
     )
     # Return final state using validated variables
     return ResultState(
         draft=draft_content,
         feedback=feedback_content,
         critique_feedback=critique_feedback_content,
         current_node="finalize",
         output_data=(
-            final_content.content
             if hasattr(final_content, "content")
-            else final_content
         ),
     )

     logger.info(f"Draft has been created: {response.content}")
     app_state = ResultState(
+        draft=str(response.content),
         feedback="",
         critique_feedback="",
         current_node="create_draft",
+        output_data="",
+        company_research_data=state.get("company_research_data", {}),
+        messages=state.get("messages", []),
     )
     return app_state
         company_research_data = state.get("company_research_data", {})
         job_description = str(company_research_data.get("job_description", ""))
         draft_content = str(state.get("draft", ""))
         # Debug logging to verify values
         logger.debug(f"Job description length: {len(job_description)}")
         # Early return if required fields are missing
         if not job_description or not draft_content:
+            logger.warning("Missing content for critique in state")
+            return ResultState(
+                draft=state.get("draft", ""),
+                feedback=state.get("feedback", ""),
+                critique_feedback="",
+                current_node="critique",
+                output_data="",
+                company_research_data=state.get("company_research_data", {}),
+                messages=state.get("messages", []),
+            )
         # Create LLM inside function (lazy initialization)
         llm_provider = LLMFactory()
         # Store the critique - using validated variables from top of function
         return ResultState(
+            draft=state.get("draft", ""),
+            feedback=state.get("feedback", ""),
+            critique_feedback=str(critique_content),
+            current_node="critique",
+            output_data="",
+            company_research_data=state.get("company_research_data", {}),
+            messages=state.get("messages", []),
         )
     except Exception as e:
     print(f"Human feedback: {human_feedback}")
+    return ResultState(
+        draft=state.get("draft", ""),
+        feedback=human_feedback,
+        critique_feedback=state.get("critique_feedback", ""),
+        current_node="human_approval",
+        output_data="",
+        company_research_data=state.get("company_research_data", {}),
+        messages=state.get("messages", []),
+    )
 def finalize_document(state: ResultState) -> ResultState:
     )
     # Return final state using validated variables
+    # Current (INCOMPLETE):
     return ResultState(
         draft=draft_content,
         feedback=feedback_content,
         critique_feedback=critique_feedback_content,
         current_node="finalize",
         output_data=(
+            str(final_content.content)
             if hasattr(final_content, "content")
+            else str(final_content)
         ),
+        company_research_data=state.get("company_research_data", {}),
+        messages=state.get("messages", []),
     )

src/job_writing_agent/classes/__init__.py CHANGED Viewed

@@ -1,17 +1,3 @@
-from .classes import (
-    AppState,
-    ResearchState,
-    DataLoadState,
-    ResultState,
-    FormField,
-    FormFieldsExtraction,
-)
-__all__ = [
-    "AppState",
-    "ResearchState",
-    "DataLoadState",
-    "ResultState",
-    "FormField",
-    "FormFieldsExtraction",
-]


1	+ from .classes import AppState, ResearchState, DataLoadState, ResultState







2
3	+ __all__ = ["AppState", "ResearchState", "DataLoadState", "ResultState"]

src/job_writing_agent/utils/document_processing.py CHANGED Viewed

@@ -416,17 +416,17 @@ async def parse_job_description_from_url(url: str) -> Document:
             if not cerebras_api_key:
                 raise ValueError("CEREBRAS_API_KEY environment variable not set")
-            dspy.configure(
                 lm=dspy.LM(
                     "cerebras/qwen-3-32b",
                     api_key=cerebras_api_key,
                     temperature=0.1,
                     max_tokens=60000,  # Note: This max_tokens is unusually high
                 )
-            )
-            job_extract_fn = dspy.Predict(ExtractJobDescription)
-            result = job_extract_fn(job_description_html_content=raw_content)
             logger.info("Successfully processed job description with LLM.")
             # 4. Create the final Document with structured data

             if not cerebras_api_key:
                 raise ValueError("CEREBRAS_API_KEY environment variable not set")
+            # Use dspy.context() for async tasks instead of dspy.configure()
+            with dspy.context(
                 lm=dspy.LM(
                     "cerebras/qwen-3-32b",
                     api_key=cerebras_api_key,
                     temperature=0.1,
                     max_tokens=60000,  # Note: This max_tokens is unusually high
                 )
+            ):
+                job_extract_fn = dspy.Predict(ExtractJobDescription)
+                result = job_extract_fn(job_description_html_content=raw_content)
             logger.info("Successfully processed job description with LLM.")
             # 4. Create the final Document with structured data