Spaces:
Running
Running
Initial commit
Browse files- .gitignore +30 -0
- README.md +121 -0
- app.py +49 -0
- core/config.py +0 -0
- core/ingestion/docling_loader.py +34 -0
- core/parsing/extractor.py +37 -0
- core/parsing/schema.py +65 -0
- core/processing/dataframe.py +64 -0
- core/utils/helpers.py +0 -0
- requirements.txt +19 -0
.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables
|
| 2 |
+
.env
|
| 3 |
+
|
| 4 |
+
# Python cache
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.pyc
|
| 7 |
+
|
| 8 |
+
# Virtual environment
|
| 9 |
+
.venv/
|
| 10 |
+
|
| 11 |
+
# OS files
|
| 12 |
+
.DS_Store
|
| 13 |
+
Thumbs.db
|
| 14 |
+
|
| 15 |
+
# Logs
|
| 16 |
+
*.log
|
| 17 |
+
|
| 18 |
+
# Data (optional)
|
| 19 |
+
data/
|
| 20 |
+
|
| 21 |
+
# Jupyter
|
| 22 |
+
.ipynb_checkpoints/
|
| 23 |
+
|
| 24 |
+
# Build
|
| 25 |
+
dist/
|
| 26 |
+
build/
|
| 27 |
+
|
| 28 |
+
# IDE
|
| 29 |
+
.vscode/
|
| 30 |
+
.idea/
|
README.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
title: Caption Gen
|
| 2 |
+
emoji: 📸
|
| 3 |
+
sdk: streamlit
|
| 4 |
+
sdk_version: 1.43.0
|
| 5 |
+
app_file: app.py
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# CV Analyzer (AI-Powered Resume Parser)
|
| 9 |
+
|
| 10 |
+
A Streamlit-based app that extracts structured data from CVs (PDF) using **Docling + Agentic AI + Pydantic schema**, and converts it into a clean, downloadable CSV.
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
|
| 16 |
+
- Upload CV (PDF)
|
| 17 |
+
- Parse document using Docling
|
| 18 |
+
- Extract structured data using LLM agent
|
| 19 |
+
- Validate with Pydantic schema
|
| 20 |
+
- Convert to Pandas DataFrame
|
| 21 |
+
- View extracted data in UI
|
| 22 |
+
- Download as CSV
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## Tech Stack
|
| 27 |
+
|
| 28 |
+
- **Streamlit** – UI
|
| 29 |
+
- **Docling** – PDF parsing
|
| 30 |
+
- **Pydantic / pydantic-ai** – structured extraction
|
| 31 |
+
- **Hugging Face / LLM** – inference
|
| 32 |
+
- **Pandas** – data processing
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
+
## Setup
|
| 37 |
+
|
| 38 |
+
### 1. Clone repo
|
| 39 |
+
```bash
|
| 40 |
+
git clone https://github.com/your-username/cv-analyzer.git
|
| 41 |
+
cd cv-analyzer
|
| 42 |
+
````
|
| 43 |
+
|
| 44 |
+
### 2. Create virtual environment
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
python -m venv .venv
|
| 48 |
+
source .venv/bin/activate # Linux/macOS
|
| 49 |
+
.venv\Scripts\activate # Windows
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### 3. Install dependencies
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### 4. Environment variables
|
| 59 |
+
|
| 60 |
+
Create a `.env` file:
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
HF_TOKEN=your_huggingface_token
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
> `.env` is ignored via `.gitignore`
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
## Run App
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
streamlit run app.py
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## How it works
|
| 79 |
+
|
| 80 |
+
1. User uploads CV (PDF)
|
| 81 |
+
2. Docling converts PDF → structured text/markdown
|
| 82 |
+
3. LLM agent extracts data using predefined schema
|
| 83 |
+
4. Output is validated via Pydantic
|
| 84 |
+
5. Data is converted into a DataFrame
|
| 85 |
+
6. User can view and download CSV
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
## Notes
|
| 90 |
+
|
| 91 |
+
* Schema is designed for **AI/ML-focused resumes**
|
| 92 |
+
* Missing fields are returned as `null` (no hallucination policy)
|
| 93 |
+
* Dates are stored as strings to avoid parsing errors
|
| 94 |
+
* Validation is relaxed to improve LLM compatibility
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
## Limitations
|
| 99 |
+
|
| 100 |
+
* LLM may still produce inconsistent outputs for poorly formatted CVs
|
| 101 |
+
* Complex layouts (tables, multi-column PDFs) may affect parsing quality
|
| 102 |
+
* Requires internet access for model inference
|
| 103 |
+
|
| 104 |
+
---
|
| 105 |
+
|
| 106 |
+
## Future Improvements
|
| 107 |
+
|
| 108 |
+
* Multi-CV batch processing
|
| 109 |
+
* Candidate scoring & ranking
|
| 110 |
+
* Semantic search over resumes (FAISS)
|
| 111 |
+
* UI improvements (filters, charts)
|
| 112 |
+
* Export to JSON / Excel
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## License
|
| 117 |
+
|
| 118 |
+
MIT License
|
| 119 |
+
|
| 120 |
+
```
|
| 121 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import tempfile
|
| 4 |
+
|
| 5 |
+
from core.ingestion.docling_loader import load_and_convert_cv
|
| 6 |
+
from core.parsing.extractor import extract_resume
|
| 7 |
+
from core.processing.dataframe import resume_to_df
|
| 8 |
+
|
| 9 |
+
st.title("CV Analyzer")
|
| 10 |
+
|
| 11 |
+
# ---- session state init ----
|
| 12 |
+
if "processed" not in st.session_state:
|
| 13 |
+
st.session_state.processed = False
|
| 14 |
+
if "df" not in st.session_state:
|
| 15 |
+
st.session_state.df = None
|
| 16 |
+
|
| 17 |
+
uploaded_file = st.file_uploader("Upload CV (PDF)", type=["pdf"])
|
| 18 |
+
|
| 19 |
+
if st.button("Upload New CV"):
|
| 20 |
+
st.session_state.processed = False
|
| 21 |
+
st.session_state.df = None
|
| 22 |
+
|
| 23 |
+
# ---- process only once ----
|
| 24 |
+
if uploaded_file and not st.session_state.processed:
|
| 25 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 26 |
+
tmp.write(uploaded_file.read())
|
| 27 |
+
pdf_path = tmp.name
|
| 28 |
+
|
| 29 |
+
text = load_and_convert_cv(pdf_path)
|
| 30 |
+
data = extract_resume(text)
|
| 31 |
+
df = resume_to_df(data)
|
| 32 |
+
|
| 33 |
+
st.session_state.df = df
|
| 34 |
+
st.session_state.processed = True
|
| 35 |
+
|
| 36 |
+
# ---- display from session (no recompute) ----
|
| 37 |
+
if st.session_state.processed and st.session_state.df is not None:
|
| 38 |
+
df = st.session_state.df
|
| 39 |
+
|
| 40 |
+
st.subheader("Extracted Data")
|
| 41 |
+
st.dataframe(df)
|
| 42 |
+
|
| 43 |
+
csv = df.to_csv(index=False).encode("utf-8")
|
| 44 |
+
st.download_button(
|
| 45 |
+
"Download CSV",
|
| 46 |
+
data=csv,
|
| 47 |
+
file_name="cv_data.csv",
|
| 48 |
+
mime="text/csv"
|
| 49 |
+
)
|
core/config.py
ADDED
|
File without changes
|
core/ingestion/docling_loader.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from docling.document_converter import DocumentConverter, PdfFormatOption
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@st.cache_resource
|
| 7 |
+
def get_converter():
|
| 8 |
+
"""
|
| 9 |
+
Initializes and caches the Docling DocumentConverter.
|
| 10 |
+
This ensures models are only loaded once across app reruns.
|
| 11 |
+
"""
|
| 12 |
+
return DocumentConverter(
|
| 13 |
+
format_options={
|
| 14 |
+
"pdf": PdfFormatOption(
|
| 15 |
+
enable_ocr=False
|
| 16 |
+
)
|
| 17 |
+
}
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def load_and_convert_cv(file_path: str) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Converts a PDF/DOCX file to Markdown format using Docling.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
file_path (str): The local path to the uploaded CV file.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
str: The converted markdown text.
|
| 30 |
+
"""
|
| 31 |
+
converter = get_converter()
|
| 32 |
+
result = converter.convert(file_path)
|
| 33 |
+
text_content = result.document.export_to_text()
|
| 34 |
+
return text_content
|
core/parsing/extractor.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_ai import Agent
|
| 2 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 3 |
+
from pydantic_ai.providers.openai import OpenAIProvider
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from core.parsing.schema import Resume
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
load_dotenv() # unnecessary if deployed on huggingface space.
|
| 11 |
+
api_key = os.environ["HF_TOKEN"] # raises error if missing
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
model = HuggingFaceModel(
|
| 16 |
+
'Qwen/Qwen2.5-7B-Instruct',
|
| 17 |
+
provider=OpenAIProvider(
|
| 18 |
+
base_url="https://router.huggingface.co/v1",
|
| 19 |
+
api_key=api_key
|
| 20 |
+
)
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
agent = Agent(
|
| 25 |
+
model=model,
|
| 26 |
+
system_prompt=(
|
| 27 |
+
'You are an expert resume extractor.'
|
| 28 |
+
'Do NOT infer or hallucinate missing sections.'
|
| 29 |
+
'If a section is not explicitly present, return null or empty list.'
|
| 30 |
+
),
|
| 31 |
+
output_type=Resume
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def extract_resume(text: str) -> Resume:
|
| 36 |
+
result = agent.run_sync(text)
|
| 37 |
+
return result.output
|
core/parsing/schema.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
# Nested models for detailed resume sections
|
| 6 |
+
class ContactInformation(BaseModel):
|
| 7 |
+
email: str = Field(None, description="Email address.")
|
| 8 |
+
phone: Optional[str] = Field(None, description='mobile number eg. +92 03011234567')
|
| 9 |
+
linkedin: Optional[str] = None
|
| 10 |
+
github: Optional[str] = None
|
| 11 |
+
hugging_face: Optional[str] = None
|
| 12 |
+
kaggle: Optional[str] = None
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Education(BaseModel):
|
| 16 |
+
institution: str
|
| 17 |
+
degree: str
|
| 18 |
+
start_date: Optional[str] = None
|
| 19 |
+
end_date: Optional[str] = None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class Experience(BaseModel):
|
| 23 |
+
title: str = Field(description="Job role/title.")
|
| 24 |
+
company: str = Field(description="Name of the company or organization.")
|
| 25 |
+
start_date: Optional[str] = None
|
| 26 |
+
end_date: Optional[str] = None
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class Project(BaseModel):
|
| 30 |
+
name: str = Field(description="Name of a project.")
|
| 31 |
+
description: str = Field(description="Project Description")
|
| 32 |
+
technologies: List[str] = None
|
| 33 |
+
url: Optional[str] = None
|
| 34 |
+
difficulty_score: int = Field(
|
| 35 |
+
...,
|
| 36 |
+
ge=1,
|
| 37 |
+
le=10,
|
| 38 |
+
description=(
|
| 39 |
+
"Strictly evaluate AI engineering complexity. "
|
| 40 |
+
"1-3: Simple 'wrapper' apps, basic prompting, or out-of-the-box RAG with a single data source. "
|
| 41 |
+
"4-6: Production-grade apps with persistent memory, multi-step tool use (agents), "
|
| 42 |
+
"complex data parsing (PDFs/Tables), or basic fine-tuning for style. "
|
| 43 |
+
"7-8: Advanced architectures featuring multi-agent orchestration, self-healing loops, "
|
| 44 |
+
"complex hybrid search (vector + keyword), or custom evaluation frameworks (LLM-as-a-judge). "
|
| 45 |
+
"9-10: Highly complex, mission-critical systems with real-time streaming, "
|
| 46 |
+
"multi-modal integration, or heavy optimization for cost and latency at scale. "
|
| 47 |
+
"If the project only uses a single API call without complex logic, it must not exceed 3."
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Main AI Developer Resume Schema
|
| 52 |
+
class Resume(BaseModel):
|
| 53 |
+
full_name: str = Field(..., description="Full name of the applicant.")
|
| 54 |
+
contact: ContactInformation
|
| 55 |
+
summary: str = Field(..., description="Professional summary focusing on AI/ML.")
|
| 56 |
+
education: Optional[List[Education]] = Field(
|
| 57 |
+
..., description="List of educational degrees. Return null if not explicitly present."
|
| 58 |
+
)
|
| 59 |
+
experience: Optional[List[Experience]] = Field(
|
| 60 |
+
..., description="List of experiences. Return null if not explicitly present."
|
| 61 |
+
)
|
| 62 |
+
ai_ml_skills: List[str] = Field(..., description="Specific AI/ML skills (e.g., LLMs, Computer Vision).")
|
| 63 |
+
technical_skills: List[str] = Field(..., description="Programming languages and tools.")
|
| 64 |
+
projects: Optional[List[Project]] = None
|
| 65 |
+
certifications: Optional[List[str]] = None
|
core/processing/dataframe.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def resume_to_df(resume):
|
| 4 |
+
r = resume.dict()
|
| 5 |
+
|
| 6 |
+
base = {
|
| 7 |
+
"full_name": r["full_name"],
|
| 8 |
+
"summary": r["summary"],
|
| 9 |
+
**{f"contact_{k}": v for k, v in r["contact"].items()},
|
| 10 |
+
"ai_ml_skills": ", ".join(r.get("ai_ml_skills", []) or []),
|
| 11 |
+
"technical_skills": ", ".join(r.get("technical_skills", []) or []),
|
| 12 |
+
"certifications": ", ".join(r.get("certifications", []) or [])
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
rows = []
|
| 16 |
+
|
| 17 |
+
# max length among lists
|
| 18 |
+
max_len = max(
|
| 19 |
+
len(r.get("education") or []),
|
| 20 |
+
len(r.get("experience") or []),
|
| 21 |
+
len(r.get("projects") or []),
|
| 22 |
+
1
|
| 23 |
+
)
|
| 24 |
+
print('max_len: ', max_len)
|
| 25 |
+
|
| 26 |
+
for i in range(max_len):
|
| 27 |
+
row = base.copy()
|
| 28 |
+
|
| 29 |
+
# education
|
| 30 |
+
educations = r.get("education", []) or []
|
| 31 |
+
if i < len(educations):
|
| 32 |
+
e = educations[i]
|
| 33 |
+
row.update({
|
| 34 |
+
"edu_institution": e["institution"],
|
| 35 |
+
"edu_degree": e["degree"],
|
| 36 |
+
"edu_start": e["start_date"],
|
| 37 |
+
"edu_end": e["end_date"],
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
# experience
|
| 41 |
+
experiences = r.get("experience", []) or []
|
| 42 |
+
if i < len(experiences):
|
| 43 |
+
ex = experiences[i]
|
| 44 |
+
row.update({
|
| 45 |
+
"exp_title": ex["title"],
|
| 46 |
+
"exp_company": ex["company"],
|
| 47 |
+
"exp_start": ex["start_date"],
|
| 48 |
+
"exp_end": ex["end_date"],
|
| 49 |
+
})
|
| 50 |
+
|
| 51 |
+
# projects
|
| 52 |
+
projects = r.get("projects", []) or []
|
| 53 |
+
if i < len(projects):
|
| 54 |
+
p = projects[i]
|
| 55 |
+
row.update({
|
| 56 |
+
"proj_name": p["name"],
|
| 57 |
+
"proj_desc": p["description"],
|
| 58 |
+
"proj_tech": ", ".join(p["technologies"]),
|
| 59 |
+
"proj_score": p["difficulty_score"],
|
| 60 |
+
})
|
| 61 |
+
|
| 62 |
+
rows.append(row)
|
| 63 |
+
|
| 64 |
+
return pd.DataFrame(rows)
|
core/utils/helpers.py
ADDED
|
File without changes
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.37.1
|
| 2 |
+
pandas==2.2.2
|
| 3 |
+
python-dotenv==1.0.1
|
| 4 |
+
|
| 5 |
+
# Pydantic AI stack
|
| 6 |
+
pydantic==2.7.4
|
| 7 |
+
pydantic-ai==0.0.14
|
| 8 |
+
|
| 9 |
+
# HF ecosystem (important: keep older compatible versions)
|
| 10 |
+
transformers==4.41.2
|
| 11 |
+
huggingface-hub==0.34.0
|
| 12 |
+
tokenizers==0.19.1
|
| 13 |
+
|
| 14 |
+
# Docling
|
| 15 |
+
docling==2.28.0
|
| 16 |
+
|
| 17 |
+
# Optional but often required by docling
|
| 18 |
+
torch==2.3.1
|
| 19 |
+
numpy==1.26.4
|