Spaces:
Paused
Paused
Commit
·
29cfacc
1
Parent(s):
89511c3
updated
Browse files- app.py +158 -161
- requirements.txt +7 -10
app.py
CHANGED
|
@@ -54,23 +54,24 @@ import shutil
|
|
| 54 |
shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
|
| 55 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
| 56 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
|
|
|
| 57 |
# -----------------------------------------------------------------------------
|
| 58 |
# Hugging Face model configuration
|
| 59 |
#
|
| 60 |
# The chatbot uses a small conversational model hosted on Hugging Face. To
|
| 61 |
# allow easy experimentation, the model name can be overridden via the
|
| 62 |
# ``HF_CHATBOT_MODEL`` environment variable. If unset, we fall back to
|
| 63 |
-
# ``
|
| 64 |
-
#
|
| 65 |
-
HF_MODEL_NAME = os.getenv("HF_CHATBOT_MODEL", "
|
| 66 |
|
| 67 |
# Global Hugging Face model and tokenizer. These variables remain ``None``
|
| 68 |
# until ``init_hf_model()`` is called. They are reused across all chatbot
|
| 69 |
# requests to prevent repeatedly loading the large model into memory.
|
| 70 |
-
_hf_model
|
| 71 |
-
_hf_tokenizer
|
| 72 |
|
| 73 |
-
def init_hf_model()
|
| 74 |
"""
|
| 75 |
Initialise the Hugging Face conversational model and tokenizer.
|
| 76 |
|
|
@@ -83,25 +84,30 @@ def init_hf_model() -> None:
|
|
| 83 |
if _hf_model is not None and _hf_tokenizer is not None:
|
| 84 |
return
|
| 85 |
|
| 86 |
-
from transformers import
|
| 87 |
import torch
|
| 88 |
|
| 89 |
model_name = HF_MODEL_NAME
|
| 90 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
# Load tokenizer and model from Hugging Face
|
| 93 |
-
# specifying ``use_auth_token`` here since the default models are
|
| 94 |
-
# publicly accessible. Should you wish to use a private model, set
|
| 95 |
-
# HF_HOME/HF_TOKEN environment variables accordingly.
|
| 96 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 97 |
-
model =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
_hf_model = model
|
| 100 |
_hf_tokenizer = tokenizer
|
|
|
|
|
|
|
| 101 |
_chatbot_embedder = None
|
| 102 |
_chatbot_collection = None
|
| 103 |
|
| 104 |
-
def init_chatbot()
|
| 105 |
"""Initialise the Chroma vector DB with chatbot.txt content."""
|
| 106 |
global _chatbot_embedder, _chatbot_collection
|
| 107 |
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
|
@@ -115,81 +121,130 @@ def init_chatbot() -> None:
|
|
| 115 |
|
| 116 |
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
| 121 |
-
docs = [doc.strip() for doc in splitter.split_text(text)]
|
| 122 |
|
|
|
|
| 123 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 124 |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
collection = client.get_or_create_collection("chatbot")
|
| 128 |
-
|
|
|
|
| 129 |
try:
|
| 130 |
-
existing = collection.get(
|
| 131 |
if not existing.get("documents"):
|
| 132 |
raise ValueError("Empty Chroma DB")
|
| 133 |
except Exception:
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
_chatbot_embedder = embedder
|
| 137 |
_chatbot_collection = collection
|
| 138 |
|
| 139 |
-
|
| 140 |
def get_chatbot_response(query: str) -> str:
|
| 141 |
"""Generate a reply to the user's query using Chroma + Hugging Face model."""
|
| 142 |
-
init_chatbot()
|
| 143 |
-
init_hf_model()
|
| 144 |
-
|
| 145 |
-
# Safety: prevent empty input
|
| 146 |
-
if not query or not query.strip():
|
| 147 |
-
return "Please type a question about the Codingo platform."
|
| 148 |
-
|
| 149 |
-
embedder = _chatbot_embedder
|
| 150 |
-
collection = _chatbot_collection
|
| 151 |
-
model = _hf_model
|
| 152 |
-
tokenizer = _hf_tokenizer
|
| 153 |
-
device = model.device
|
| 154 |
-
|
| 155 |
-
# Retrieve context from Chroma
|
| 156 |
-
query_embedding = embedder.encode([query])[0]
|
| 157 |
-
results = collection.query(query_embeddings=[query_embedding], n_results=3)
|
| 158 |
-
retrieved_docs = results.get("documents", [[]])[0] if results else []
|
| 159 |
-
context = "\n".join(retrieved_docs)
|
| 160 |
-
|
| 161 |
-
# System instruction
|
| 162 |
-
system_prompt = (
|
| 163 |
-
"You are a helpful assistant for the Codingo website. "
|
| 164 |
-
"Only answer questions relevant to the context provided. "
|
| 165 |
-
"If unrelated, reply: 'I'm only trained to answer questions about the Codingo platform.'"
|
| 166 |
-
)
|
| 167 |
-
|
| 168 |
-
prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"
|
| 169 |
-
|
| 170 |
-
# ✅ Safe tokenization with truncation to avoid CUDA indexing issues
|
| 171 |
-
inputs = tokenizer(
|
| 172 |
-
prompt,
|
| 173 |
-
return_tensors="pt",
|
| 174 |
-
truncation=True,
|
| 175 |
-
max_length=256, # Prevents long inputs
|
| 176 |
-
padding=True
|
| 177 |
-
).to(device)
|
| 178 |
-
|
| 179 |
try:
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
)
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
except Exception as e:
|
| 192 |
-
|
|
|
|
| 193 |
|
| 194 |
# Initialize Flask app
|
| 195 |
app = Flask(
|
|
@@ -197,37 +252,20 @@ app = Flask(
|
|
| 197 |
static_folder='backend/static',
|
| 198 |
static_url_path='/static',
|
| 199 |
template_folder='backend/templates',
|
| 200 |
-
instance_path=safe_instance_path
|
| 201 |
)
|
| 202 |
|
| 203 |
app.config['SECRET_KEY'] = 'saadi'
|
| 204 |
|
| 205 |
-
# -----------------------------------------------------------------------------
|
| 206 |
# Cookie configuration for Hugging Face Spaces
|
| 207 |
-
#
|
| 208 |
-
# When running this app inside an iframe (as is typical on Hugging Face Spaces),
|
| 209 |
-
# browsers will drop cookies that have the default SameSite policy of ``Lax``.
|
| 210 |
-
# This prevents the Flask session cookie from being stored and means that
|
| 211 |
-
# ``login_user()`` will appear to have no effect – the user will be redirected
|
| 212 |
-
# back to the home page but remain anonymous. By explicitly setting the
|
| 213 |
-
# SameSite policy to ``None`` and enabling the ``Secure`` flag, we allow the
|
| 214 |
-
# session and remember cookies to be sent even when the app is embedded in an
|
| 215 |
-
# iframe. Without these settings the sign‑up and login flows work locally
|
| 216 |
-
# but silently fail in Spaces, causing the "redirect to home page without
|
| 217 |
-
# anything" behaviour reported by users.
|
| 218 |
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
|
| 219 |
app.config['SESSION_COOKIE_SECURE'] = True
|
| 220 |
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
|
| 221 |
app.config['REMEMBER_COOKIE_SECURE'] = True
|
| 222 |
|
| 223 |
# Configure the database connection
|
| 224 |
-
# Use /tmp directory for database in Hugging Face Spaces
|
| 225 |
-
# Note: Data will be lost when the space restarts
|
| 226 |
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
|
| 227 |
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
| 228 |
-
from flask_wtf.csrf import CSRFProtect
|
| 229 |
-
|
| 230 |
-
# csrf = CSRFProtect(app)
|
| 231 |
|
| 232 |
# Create necessary directories in writable locations
|
| 233 |
os.makedirs('/tmp/static/audio', exist_ok=True)
|
|
@@ -249,7 +287,7 @@ def load_user(user_id):
|
|
| 249 |
app.register_blueprint(auth_bp)
|
| 250 |
app.register_blueprint(interview_api, url_prefix="/api")
|
| 251 |
|
| 252 |
-
# Routes
|
| 253 |
@app.route('/')
|
| 254 |
def index():
|
| 255 |
return render_template('index.html')
|
|
@@ -269,30 +307,17 @@ def job_detail(job_id):
|
|
| 269 |
def apply(job_id):
|
| 270 |
job = Job.query.get_or_404(job_id)
|
| 271 |
if request.method == 'POST':
|
| 272 |
-
# Retrieve the uploaded resume file from the request. The ``name``
|
| 273 |
-
# attribute in the HTML form is ``resume``.
|
| 274 |
file = request.files.get('resume')
|
| 275 |
-
# Use our safe upload helper to store the resume. ``filepath``
|
| 276 |
-
# contains the location where the file was saved so that recruiters
|
| 277 |
-
# can download it later. Resume parsing has been disabled, so
|
| 278 |
-
# ``features`` will always be an empty dictionary.
|
| 279 |
features, error, filepath = handle_resume_upload(file)
|
| 280 |
|
| 281 |
-
# If there was an error saving the resume, notify the user. We no
|
| 282 |
-
# longer attempt to parse the resume contents, so the manual fields
|
| 283 |
-
# collected below will form the entire feature set.
|
| 284 |
if error:
|
| 285 |
flash("Resume upload failed. Please try again.", "danger")
|
| 286 |
return render_template('apply.html', job=job)
|
| 287 |
|
| 288 |
-
# Collect the manually entered fields for skills, experience and education.
|
| 289 |
-
# Users can separate entries with commas, semicolons or newlines; we
|
| 290 |
-
# normalise the input into lists of trimmed strings.
|
| 291 |
def parse_entries(raw_value: str):
|
| 292 |
import re
|
| 293 |
entries = []
|
| 294 |
if raw_value:
|
| 295 |
-
# Split on commas, semicolons or newlines
|
| 296 |
for item in re.split(r'[\n,;]+', raw_value):
|
| 297 |
item = item.strip()
|
| 298 |
if item:
|
|
@@ -309,10 +334,6 @@ def apply(job_id):
|
|
| 309 |
"education": parse_entries(education_input)
|
| 310 |
}
|
| 311 |
|
| 312 |
-
# Prepare the application record. We ignore the empty ``features``
|
| 313 |
-
# returned by ``handle_resume_upload`` and instead persist the
|
| 314 |
-
# manually collected attributes. The extracted_features column
|
| 315 |
-
# expects a JSON string; json.dumps handles proper serialization.
|
| 316 |
application = Application(
|
| 317 |
job_id=job_id,
|
| 318 |
user_id=current_user.id,
|
|
@@ -338,43 +359,33 @@ def my_applications():
|
|
| 338 |
).order_by(Application.date_applied.desc()).all()
|
| 339 |
return render_template('my_applications.html', applications=applications)
|
| 340 |
|
| 341 |
-
# -----------------------------------------------------------------------------
|
| 342 |
# Chatbot API endpoint
|
| 343 |
-
#
|
| 344 |
-
# This route receives a JSON payload containing a ``message`` field from the
|
| 345 |
-
# front‑end chat widget. It validates the input, invokes the chatbot
|
| 346 |
-
# response function and returns a JSON response. Any errors are surfaced
|
| 347 |
-
# as a 400 or 500 response with an ``error`` message field.
|
| 348 |
@app.route('/chatbot', methods=['POST'])
|
| 349 |
def chatbot_endpoint():
|
| 350 |
-
|
| 351 |
-
user_input = str(data.get('message', '')).strip()
|
| 352 |
-
if not user_input:
|
| 353 |
-
return jsonify({"error": "Empty message"}), 400
|
| 354 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
reply = get_chatbot_response(user_input)
|
| 356 |
return jsonify({"response": reply})
|
|
|
|
| 357 |
except Exception as exc:
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
# facility instead.
|
| 361 |
-
print(f"Chatbot error: {exc}", file=sys.stderr)
|
| 362 |
-
return jsonify({"error": str(exc)}), 500
|
| 363 |
|
| 364 |
@app.route('/parse_resume', methods=['POST'])
|
| 365 |
def parse_resume():
|
| 366 |
file = request.files.get('resume')
|
| 367 |
features, error, filepath = handle_resume_upload(file)
|
| 368 |
|
| 369 |
-
# If the upload failed, return an error. Parsing is no longer
|
| 370 |
-
# supported, so we do not attempt to inspect the resume contents.
|
| 371 |
if error:
|
| 372 |
return {"error": "Error processing resume. Please try again."}, 400
|
| 373 |
|
| 374 |
-
# If no features were extracted (the normal case now), respond with
|
| 375 |
-
# empty fields rather than an error. This preserves the API
|
| 376 |
-
# contract expected by any front‑end code that might call this
|
| 377 |
-
# endpoint.
|
| 378 |
if not features:
|
| 379 |
return {
|
| 380 |
"name": "",
|
|
@@ -386,8 +397,6 @@ def parse_resume():
|
|
| 386 |
"summary": ""
|
| 387 |
}, 200
|
| 388 |
|
| 389 |
-
# Should features contain values (unlikely in the new implementation),
|
| 390 |
-
# pass them through to the client.
|
| 391 |
response = {
|
| 392 |
"name": features.get('name', ''),
|
| 393 |
"email": features.get('email', ''),
|
|
@@ -415,30 +424,20 @@ def interview_page(job_id):
|
|
| 415 |
cv_data = json.loads(application.extracted_features)
|
| 416 |
return render_template("interview.html", job=job, cv=cv_data)
|
| 417 |
|
| 418 |
-
|
| 419 |
-
# -----------------------------------------------------------------------------
|
| 420 |
-
# Recruiter job posting route
|
| 421 |
-
#
|
| 422 |
-
# Authenticated users with a recruiter or admin role can access this page to
|
| 423 |
-
# create new job listings. Posted jobs are associated with the current
|
| 424 |
-
# recruiter via the ``recruiter_id`` foreign key on the ``Job`` model.
|
| 425 |
@app.route('/post_job', methods=['GET', 'POST'])
|
| 426 |
@login_required
|
| 427 |
def post_job():
|
| 428 |
-
# Only allow recruiters and admins to post jobs
|
| 429 |
if current_user.role not in ('recruiter', 'admin'):
|
| 430 |
flash('You do not have permission to post jobs.', 'warning')
|
| 431 |
return redirect(url_for('jobs'))
|
| 432 |
|
| 433 |
if request.method == 'POST':
|
| 434 |
-
# Extract fields from the form
|
| 435 |
role_title = request.form.get('role', '').strip()
|
| 436 |
description = request.form.get('description', '').strip()
|
| 437 |
seniority = request.form.get('seniority', '').strip()
|
| 438 |
skills_input = request.form.get('skills', '').strip()
|
| 439 |
company = request.form.get('company', '').strip()
|
| 440 |
|
| 441 |
-
# Validate required fields
|
| 442 |
errors = []
|
| 443 |
if not role_title:
|
| 444 |
errors.append('Job title is required.')
|
|
@@ -456,12 +455,9 @@ def post_job():
|
|
| 456 |
flash(err, 'danger')
|
| 457 |
return render_template('post_job.html')
|
| 458 |
|
| 459 |
-
# Normalise the skills input into a JSON encoded list. Users can
|
| 460 |
-
# separate entries with commas, semicolons or newlines.
|
| 461 |
skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
|
| 462 |
skills_json = json.dumps(skills_list)
|
| 463 |
|
| 464 |
-
# Create and persist the new job
|
| 465 |
new_job = Job(
|
| 466 |
role=role_title,
|
| 467 |
description=description,
|
|
@@ -476,52 +472,35 @@ def post_job():
|
|
| 476 |
flash('Job posted successfully!', 'success')
|
| 477 |
return redirect(url_for('jobs'))
|
| 478 |
|
| 479 |
-
# GET request returns the form
|
| 480 |
return render_template('post_job.html')
|
| 481 |
|
| 482 |
-
|
| 483 |
-
# -----------------------------------------------------------------------------
|
| 484 |
-
# Recruiter dashboard route
|
| 485 |
-
#
|
| 486 |
-
# Displays a list of candidates who applied to jobs posted by the current
|
| 487 |
-
# recruiter. Candidates are sorted by a simple skill match score computed
|
| 488 |
-
# against the job requirements. A placeholder download button is provided
|
| 489 |
-
# for future PDF report functionality.
|
| 490 |
@app.route('/dashboard')
|
| 491 |
@login_required
|
| 492 |
def dashboard():
|
| 493 |
-
# Only recruiters and admins can view the dashboard
|
| 494 |
if current_user.role not in ('recruiter', 'admin'):
|
| 495 |
flash('You do not have permission to access the dashboard.', 'warning')
|
| 496 |
return redirect(url_for('index'))
|
| 497 |
|
| 498 |
-
# Fetch jobs posted by the current recruiter
|
| 499 |
posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
|
| 500 |
job_ids = [job.id for job in posted_jobs]
|
| 501 |
|
| 502 |
candidates_with_scores = []
|
| 503 |
if job_ids:
|
| 504 |
-
# Fetch applications associated with these job IDs
|
| 505 |
candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()
|
| 506 |
|
| 507 |
-
# Helper to compute a match score based on skills overlap
|
| 508 |
def compute_score(application):
|
| 509 |
try:
|
| 510 |
-
# Extract candidate skills from stored JSON
|
| 511 |
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
|
| 512 |
candidate_skills = candidate_features.get('skills', [])
|
| 513 |
-
# Retrieve the job's required skills and parse from JSON
|
| 514 |
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
|
| 515 |
if not job_skills:
|
| 516 |
-
return ('Medium', 2)
|
| 517 |
|
| 518 |
-
# Compute case‑insensitive intersection
|
| 519 |
candidate_set = {s.lower() for s in candidate_skills}
|
| 520 |
job_set = {s.lower() for s in job_skills}
|
| 521 |
common = candidate_set & job_set
|
| 522 |
ratio = len(common) / len(job_set) if job_set else 0
|
| 523 |
|
| 524 |
-
# Map ratio to qualitative score
|
| 525 |
if ratio >= 0.75:
|
| 526 |
return ('Excellent', 4)
|
| 527 |
elif ratio >= 0.5:
|
|
@@ -533,7 +512,6 @@ def dashboard():
|
|
| 533 |
except Exception:
|
| 534 |
return ('Medium', 2)
|
| 535 |
|
| 536 |
-
# Build a list of candidate applications with computed scores
|
| 537 |
for app_record in candidate_apps:
|
| 538 |
score_label, score_value = compute_score(app_record)
|
| 539 |
candidates_with_scores.append({
|
|
@@ -542,15 +520,34 @@ def dashboard():
|
|
| 542 |
'score_value': score_value
|
| 543 |
})
|
| 544 |
|
| 545 |
-
# Sort candidates from highest to lowest score
|
| 546 |
candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)
|
| 547 |
|
| 548 |
return render_template('dashboard.html', candidates=candidates_with_scores)
|
| 549 |
|
| 550 |
if __name__ == '__main__':
|
| 551 |
print("Starting Codingo application...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
with app.app_context():
|
| 553 |
db.create_all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
# Use port from environment or default to 7860
|
| 556 |
port = int(os.environ.get('PORT', 7860))
|
|
|
|
| 54 |
shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
|
| 55 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
| 56 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
| 57 |
+
|
| 58 |
# -----------------------------------------------------------------------------
|
| 59 |
# Hugging Face model configuration
|
| 60 |
#
|
| 61 |
# The chatbot uses a small conversational model hosted on Hugging Face. To
|
| 62 |
# allow easy experimentation, the model name can be overridden via the
|
| 63 |
# ``HF_CHATBOT_MODEL`` environment variable. If unset, we fall back to
|
| 64 |
+
# ``microsoft/DialoGPT-medium`` which provides better conversational quality
|
| 65 |
+
# than blenderbot for our use case.
|
| 66 |
+
HF_MODEL_NAME = os.getenv("HF_CHATBOT_MODEL", "microsoft/DialoGPT-medium")
|
| 67 |
|
| 68 |
# Global Hugging Face model and tokenizer. These variables remain ``None``
|
| 69 |
# until ``init_hf_model()`` is called. They are reused across all chatbot
|
| 70 |
# requests to prevent repeatedly loading the large model into memory.
|
| 71 |
+
_hf_model = None
|
| 72 |
+
_hf_tokenizer = None
|
| 73 |
|
| 74 |
+
def init_hf_model():
|
| 75 |
"""
|
| 76 |
Initialise the Hugging Face conversational model and tokenizer.
|
| 77 |
|
|
|
|
| 84 |
if _hf_model is not None and _hf_tokenizer is not None:
|
| 85 |
return
|
| 86 |
|
| 87 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 88 |
import torch
|
| 89 |
|
| 90 |
model_name = HF_MODEL_NAME
|
| 91 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 92 |
+
|
| 93 |
+
print(f"Loading model {model_name} on device {device}")
|
| 94 |
|
| 95 |
+
# Load tokenizer and model from Hugging Face
|
|
|
|
|
|
|
|
|
|
| 96 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 97 |
+
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
| 98 |
+
|
| 99 |
+
# Set pad token to eos token if not set
|
| 100 |
+
if tokenizer.pad_token is None:
|
| 101 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 102 |
|
| 103 |
_hf_model = model
|
| 104 |
_hf_tokenizer = tokenizer
|
| 105 |
+
print(f"Model loaded successfully on {device}")
|
| 106 |
+
|
| 107 |
_chatbot_embedder = None
|
| 108 |
_chatbot_collection = None
|
| 109 |
|
| 110 |
+
def init_chatbot():
|
| 111 |
"""Initialise the Chroma vector DB with chatbot.txt content."""
|
| 112 |
global _chatbot_embedder, _chatbot_collection
|
| 113 |
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
|
|
|
| 121 |
|
| 122 |
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
| 123 |
|
| 124 |
+
# Read and parse the chatbot knowledge base
|
| 125 |
+
try:
|
| 126 |
+
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
|
| 127 |
+
text = f.read()
|
| 128 |
+
except FileNotFoundError:
|
| 129 |
+
print(f"Warning: {CHATBOT_TXT_PATH} not found, using default content")
|
| 130 |
+
text = """
|
| 131 |
+
Codingo is an AI-powered recruitment platform designed to streamline job applications,
|
| 132 |
+
candidate screening, and hiring. We make hiring smarter, faster, and fairer through
|
| 133 |
+
automation and intelligent recommendations.
|
| 134 |
+
"""
|
| 135 |
+
|
| 136 |
+
# Split text into chunks for vector search
|
| 137 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
| 138 |
+
docs = [doc.strip() for doc in splitter.split_text(text) if doc.strip()]
|
| 139 |
|
| 140 |
+
# Initialize embedder
|
| 141 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 142 |
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
| 143 |
|
| 144 |
+
# Initialize Chroma client
|
| 145 |
+
client = chromadb.Client(Settings(
|
| 146 |
+
persist_directory=CHATBOT_DB_DIR,
|
| 147 |
+
anonymized_telemetry=False,
|
| 148 |
+
is_persistent=True
|
| 149 |
+
))
|
| 150 |
+
|
| 151 |
+
# Get or create collection
|
| 152 |
collection = client.get_or_create_collection("chatbot")
|
| 153 |
+
|
| 154 |
+
# Check if collection is empty and populate if needed
|
| 155 |
try:
|
| 156 |
+
existing = collection.get(limit=1)
|
| 157 |
if not existing.get("documents"):
|
| 158 |
raise ValueError("Empty Chroma DB")
|
| 159 |
except Exception:
|
| 160 |
+
# Add documents to collection
|
| 161 |
+
ids = [f"doc_{i}" for i in range(len(docs))]
|
| 162 |
+
collection.add(
|
| 163 |
+
documents=docs,
|
| 164 |
+
embeddings=embeddings.tolist(),
|
| 165 |
+
ids=ids
|
| 166 |
+
)
|
| 167 |
+
print(f"Added {len(docs)} documents to Chroma DB")
|
| 168 |
|
| 169 |
_chatbot_embedder = embedder
|
| 170 |
_chatbot_collection = collection
|
| 171 |
|
|
|
|
| 172 |
def get_chatbot_response(query: str) -> str:
|
| 173 |
"""Generate a reply to the user's query using Chroma + Hugging Face model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
+
init_chatbot()
|
| 176 |
+
init_hf_model()
|
| 177 |
+
|
| 178 |
+
# Safety: prevent empty input
|
| 179 |
+
if not query or not query.strip():
|
| 180 |
+
return "Please type a question about the Codingo platform."
|
| 181 |
+
|
| 182 |
+
embedder = _chatbot_embedder
|
| 183 |
+
collection = _chatbot_collection
|
| 184 |
+
model = _hf_model
|
| 185 |
+
tokenizer = _hf_tokenizer
|
| 186 |
+
device = model.device
|
| 187 |
+
|
| 188 |
+
# Retrieve context from Chroma
|
| 189 |
+
query_embedding = embedder.encode([query])[0]
|
| 190 |
+
results = collection.query(
|
| 191 |
+
query_embeddings=[query_embedding.tolist()],
|
| 192 |
+
n_results=3
|
| 193 |
+
)
|
| 194 |
+
retrieved_docs = results.get("documents", [[]])[0] if results else []
|
| 195 |
+
context = "\n".join(retrieved_docs[:3]) # Limit context to top 3 results
|
| 196 |
+
|
| 197 |
+
# Build conversational prompt
|
| 198 |
+
system_instruction = (
|
| 199 |
+
"You are LUNA AI, a helpful assistant for the Codingo recruitment platform. "
|
| 200 |
+
"Use the provided context to answer questions about Codingo. "
|
| 201 |
+
"If the question is not related to Codingo, politely redirect the conversation. "
|
| 202 |
+
"Keep responses concise and friendly."
|
| 203 |
)
|
| 204 |
+
|
| 205 |
+
# Format prompt for DialoGPT
|
| 206 |
+
prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
|
| 207 |
+
|
| 208 |
+
# Tokenize with proper truncation
|
| 209 |
+
inputs = tokenizer.encode(
|
| 210 |
+
prompt,
|
| 211 |
+
return_tensors="pt",
|
| 212 |
+
truncation=True,
|
| 213 |
+
max_length=512,
|
| 214 |
+
padding=True
|
| 215 |
+
).to(device)
|
| 216 |
+
|
| 217 |
+
# Generate response
|
| 218 |
+
with torch.no_grad():
|
| 219 |
+
output_ids = model.generate(
|
| 220 |
+
inputs,
|
| 221 |
+
max_length=inputs.shape[1] + 150,
|
| 222 |
+
num_beams=3,
|
| 223 |
+
do_sample=True,
|
| 224 |
+
temperature=0.7,
|
| 225 |
+
pad_token_id=tokenizer.eos_token_id,
|
| 226 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 227 |
+
early_stopping=True
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
# Decode response
|
| 231 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 232 |
+
|
| 233 |
+
# Extract only the bot's response
|
| 234 |
+
if "LUNA AI:" in response:
|
| 235 |
+
response = response.split("LUNA AI:")[-1].strip()
|
| 236 |
+
elif prompt in response:
|
| 237 |
+
response = response.replace(prompt, "").strip()
|
| 238 |
+
|
| 239 |
+
# Fallback if response is empty
|
| 240 |
+
if not response:
|
| 241 |
+
response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
|
| 242 |
+
|
| 243 |
+
return response
|
| 244 |
+
|
| 245 |
except Exception as e:
|
| 246 |
+
print(f"Chatbot error: {str(e)}")
|
| 247 |
+
return "I'm having trouble processing your request. Please try again or ask about Codingo's features, job matching, or how to use the platform."
|
| 248 |
|
| 249 |
# Initialize Flask app
|
| 250 |
app = Flask(
|
|
|
|
| 252 |
static_folder='backend/static',
|
| 253 |
static_url_path='/static',
|
| 254 |
template_folder='backend/templates',
|
| 255 |
+
instance_path=safe_instance_path
|
| 256 |
)
|
| 257 |
|
| 258 |
app.config['SECRET_KEY'] = 'saadi'
|
| 259 |
|
|
|
|
| 260 |
# Cookie configuration for Hugging Face Spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
|
| 262 |
app.config['SESSION_COOKIE_SECURE'] = True
|
| 263 |
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
|
| 264 |
app.config['REMEMBER_COOKIE_SECURE'] = True
|
| 265 |
|
| 266 |
# Configure the database connection
|
|
|
|
|
|
|
| 267 |
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
|
| 268 |
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
# Create necessary directories in writable locations
|
| 271 |
os.makedirs('/tmp/static/audio', exist_ok=True)
|
|
|
|
| 287 |
app.register_blueprint(auth_bp)
|
| 288 |
app.register_blueprint(interview_api, url_prefix="/api")
|
| 289 |
|
| 290 |
+
# Routes
|
| 291 |
@app.route('/')
|
| 292 |
def index():
|
| 293 |
return render_template('index.html')
|
|
|
|
| 307 |
def apply(job_id):
|
| 308 |
job = Job.query.get_or_404(job_id)
|
| 309 |
if request.method == 'POST':
|
|
|
|
|
|
|
| 310 |
file = request.files.get('resume')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
features, error, filepath = handle_resume_upload(file)
|
| 312 |
|
|
|
|
|
|
|
|
|
|
| 313 |
if error:
|
| 314 |
flash("Resume upload failed. Please try again.", "danger")
|
| 315 |
return render_template('apply.html', job=job)
|
| 316 |
|
|
|
|
|
|
|
|
|
|
| 317 |
def parse_entries(raw_value: str):
|
| 318 |
import re
|
| 319 |
entries = []
|
| 320 |
if raw_value:
|
|
|
|
| 321 |
for item in re.split(r'[\n,;]+', raw_value):
|
| 322 |
item = item.strip()
|
| 323 |
if item:
|
|
|
|
| 334 |
"education": parse_entries(education_input)
|
| 335 |
}
|
| 336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
application = Application(
|
| 338 |
job_id=job_id,
|
| 339 |
user_id=current_user.id,
|
|
|
|
| 359 |
).order_by(Application.date_applied.desc()).all()
|
| 360 |
return render_template('my_applications.html', applications=applications)
|
| 361 |
|
|
|
|
| 362 |
# Chatbot API endpoint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
@app.route('/chatbot', methods=['POST'])
|
| 364 |
def chatbot_endpoint():
|
| 365 |
+
"""Handle chatbot queries from the frontend."""
|
|
|
|
|
|
|
|
|
|
| 366 |
try:
|
| 367 |
+
data = request.get_json(silent=True) or {}
|
| 368 |
+
user_input = str(data.get('message', '')).strip()
|
| 369 |
+
|
| 370 |
+
if not user_input:
|
| 371 |
+
return jsonify({"error": "Empty message"}), 400
|
| 372 |
+
|
| 373 |
+
# Get chatbot response
|
| 374 |
reply = get_chatbot_response(user_input)
|
| 375 |
return jsonify({"response": reply})
|
| 376 |
+
|
| 377 |
except Exception as exc:
|
| 378 |
+
print(f"Chatbot endpoint error: {exc}", file=sys.stderr)
|
| 379 |
+
return jsonify({"error": "I'm having trouble right now. Please try again."}), 500
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
@app.route('/parse_resume', methods=['POST'])
|
| 382 |
def parse_resume():
|
| 383 |
file = request.files.get('resume')
|
| 384 |
features, error, filepath = handle_resume_upload(file)
|
| 385 |
|
|
|
|
|
|
|
| 386 |
if error:
|
| 387 |
return {"error": "Error processing resume. Please try again."}, 400
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
if not features:
|
| 390 |
return {
|
| 391 |
"name": "",
|
|
|
|
| 397 |
"summary": ""
|
| 398 |
}, 200
|
| 399 |
|
|
|
|
|
|
|
| 400 |
response = {
|
| 401 |
"name": features.get('name', ''),
|
| 402 |
"email": features.get('email', ''),
|
|
|
|
| 424 |
cv_data = json.loads(application.extracted_features)
|
| 425 |
return render_template("interview.html", job=job, cv=cv_data)
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
@app.route('/post_job', methods=['GET', 'POST'])
|
| 428 |
@login_required
|
| 429 |
def post_job():
|
|
|
|
| 430 |
if current_user.role not in ('recruiter', 'admin'):
|
| 431 |
flash('You do not have permission to post jobs.', 'warning')
|
| 432 |
return redirect(url_for('jobs'))
|
| 433 |
|
| 434 |
if request.method == 'POST':
|
|
|
|
| 435 |
role_title = request.form.get('role', '').strip()
|
| 436 |
description = request.form.get('description', '').strip()
|
| 437 |
seniority = request.form.get('seniority', '').strip()
|
| 438 |
skills_input = request.form.get('skills', '').strip()
|
| 439 |
company = request.form.get('company', '').strip()
|
| 440 |
|
|
|
|
| 441 |
errors = []
|
| 442 |
if not role_title:
|
| 443 |
errors.append('Job title is required.')
|
|
|
|
| 455 |
flash(err, 'danger')
|
| 456 |
return render_template('post_job.html')
|
| 457 |
|
|
|
|
|
|
|
| 458 |
skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
|
| 459 |
skills_json = json.dumps(skills_list)
|
| 460 |
|
|
|
|
| 461 |
new_job = Job(
|
| 462 |
role=role_title,
|
| 463 |
description=description,
|
|
|
|
| 472 |
flash('Job posted successfully!', 'success')
|
| 473 |
return redirect(url_for('jobs'))
|
| 474 |
|
|
|
|
| 475 |
return render_template('post_job.html')
|
| 476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
@app.route('/dashboard')
|
| 478 |
@login_required
|
| 479 |
def dashboard():
|
|
|
|
| 480 |
if current_user.role not in ('recruiter', 'admin'):
|
| 481 |
flash('You do not have permission to access the dashboard.', 'warning')
|
| 482 |
return redirect(url_for('index'))
|
| 483 |
|
|
|
|
| 484 |
posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
|
| 485 |
job_ids = [job.id for job in posted_jobs]
|
| 486 |
|
| 487 |
candidates_with_scores = []
|
| 488 |
if job_ids:
|
|
|
|
| 489 |
candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()
|
| 490 |
|
|
|
|
| 491 |
def compute_score(application):
|
| 492 |
try:
|
|
|
|
| 493 |
candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
|
| 494 |
candidate_skills = candidate_features.get('skills', [])
|
|
|
|
| 495 |
job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
|
| 496 |
if not job_skills:
|
| 497 |
+
return ('Medium', 2)
|
| 498 |
|
|
|
|
| 499 |
candidate_set = {s.lower() for s in candidate_skills}
|
| 500 |
job_set = {s.lower() for s in job_skills}
|
| 501 |
common = candidate_set & job_set
|
| 502 |
ratio = len(common) / len(job_set) if job_set else 0
|
| 503 |
|
|
|
|
| 504 |
if ratio >= 0.75:
|
| 505 |
return ('Excellent', 4)
|
| 506 |
elif ratio >= 0.5:
|
|
|
|
| 512 |
except Exception:
|
| 513 |
return ('Medium', 2)
|
| 514 |
|
|
|
|
| 515 |
for app_record in candidate_apps:
|
| 516 |
score_label, score_value = compute_score(app_record)
|
| 517 |
candidates_with_scores.append({
|
|
|
|
| 520 |
'score_value': score_value
|
| 521 |
})
|
| 522 |
|
|
|
|
| 523 |
candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)
|
| 524 |
|
| 525 |
return render_template('dashboard.html', candidates=candidates_with_scores)
|
| 526 |
|
| 527 |
if __name__ == '__main__':
|
| 528 |
print("Starting Codingo application...")
|
| 529 |
+
|
| 530 |
+
# Import torch to check GPU availability
|
| 531 |
+
try:
|
| 532 |
+
import torch
|
| 533 |
+
if torch.cuda.is_available():
|
| 534 |
+
print(f"GPU Available: {torch.cuda.get_device_name(0)}")
|
| 535 |
+
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
|
| 536 |
+
else:
|
| 537 |
+
print("No GPU available, using CPU")
|
| 538 |
+
except ImportError:
|
| 539 |
+
print("PyTorch not installed, chatbot will use CPU")
|
| 540 |
+
|
| 541 |
with app.app_context():
|
| 542 |
db.create_all()
|
| 543 |
+
# Pre-initialize chatbot on startup for faster first response
|
| 544 |
+
print("Initializing chatbot...")
|
| 545 |
+
try:
|
| 546 |
+
init_chatbot()
|
| 547 |
+
init_hf_model()
|
| 548 |
+
print("Chatbot initialized successfully")
|
| 549 |
+
except Exception as e:
|
| 550 |
+
print(f"Chatbot initialization warning: {e}")
|
| 551 |
|
| 552 |
# Use port from environment or default to 7860
|
| 553 |
port = int(os.environ.get('PORT', 7860))
|
requirements.txt
CHANGED
|
@@ -28,15 +28,12 @@ cohere==5.16.1
|
|
| 28 |
# Vector DB
|
| 29 |
qdrant-client==1.14.3
|
| 30 |
|
| 31 |
-
# PDF & DOCX parsing (removed; resume parsing is no longer supported)
|
| 32 |
-
|
| 33 |
# Audio processing
|
| 34 |
ffmpeg-python==0.2.0
|
| 35 |
inputimeout==1.0.4
|
| 36 |
evaluate==0.4.5
|
| 37 |
accelerate==0.29.3
|
| 38 |
huggingface_hub==0.20.3
|
| 39 |
-
# textract removed; no resume parsing
|
| 40 |
bitsandbytes
|
| 41 |
faster-whisper==0.10.0
|
| 42 |
edge-tts==6.1.2
|
|
@@ -46,17 +43,17 @@ gunicorn
|
|
| 46 |
python-dotenv
|
| 47 |
|
| 48 |
# --- Chatbot Dependencies ---
|
| 49 |
-
#
|
| 50 |
-
# the knowledge base stored in ``chatbot/chatbot.txt``. ``chromadb`` provides
|
| 51 |
-
# this capability. We removed the OpenAI dependency in favour of a local
|
| 52 |
-
# Hugging Face model, so no openai package is required. ``flask-cors`` is
|
| 53 |
-
# retained to allow cross‑origin requests should the chat UI be decoupled in
|
| 54 |
-
# the future.
|
| 55 |
chromadb>=0.4.0
|
|
|
|
| 56 |
flask-cors>=4.0.0
|
| 57 |
|
| 58 |
# Audio format conversion (critical for WebM/WAV handling)
|
| 59 |
pydub>=0.25.1
|
| 60 |
|
| 61 |
# Better error handling for API calls
|
| 62 |
-
requests>=2.31.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Vector DB
|
| 29 |
qdrant-client==1.14.3
|
| 30 |
|
|
|
|
|
|
|
| 31 |
# Audio processing
|
| 32 |
ffmpeg-python==0.2.0
|
| 33 |
inputimeout==1.0.4
|
| 34 |
evaluate==0.4.5
|
| 35 |
accelerate==0.29.3
|
| 36 |
huggingface_hub==0.20.3
|
|
|
|
| 37 |
bitsandbytes
|
| 38 |
faster-whisper==0.10.0
|
| 39 |
edge-tts==6.1.2
|
|
|
|
| 43 |
python-dotenv
|
| 44 |
|
| 45 |
# --- Chatbot Dependencies ---
|
| 46 |
+
# Vector database for semantic search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
chromadb>=0.4.0
|
| 48 |
+
# CORS support for potential future decoupling
|
| 49 |
flask-cors>=4.0.0
|
| 50 |
|
| 51 |
# Audio format conversion (critical for WebM/WAV handling)
|
| 52 |
pydub>=0.25.1
|
| 53 |
|
| 54 |
# Better error handling for API calls
|
| 55 |
+
requests>=2.31.0
|
| 56 |
+
|
| 57 |
+
# Additional dependencies for improved chatbot functionality
|
| 58 |
+
# Note: We're using DialoGPT which requires transformers (already included above)
|
| 59 |
+
# No OpenAI dependency needed - using Hugging Face models instead
|