Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,121 +1,190 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from datasets import Dataset
|
| 3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
|
| 4 |
-
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 5 |
-
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
| 6 |
-
|
| 7 |
-
bnb_config = BitsAndBytesConfig(
|
| 8 |
-
load_in_4bit=True,
|
| 9 |
-
bnb_4bit_compute_dtype=torch.float16,
|
| 10 |
-
bnb_4bit_use_double_quant=True,
|
| 11 |
-
bnb_4bit_quant_type="nf4",
|
| 12 |
-
)
|
| 13 |
-
|
| 14 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 15 |
-
model_name,
|
| 16 |
-
quantization_config=bnb_config,
|
| 17 |
-
device_map="auto"
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 21 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 22 |
-
model.config.use_cache = False
|
| 23 |
-
model.gradient_checkpointing_enable()
|
| 24 |
-
model = prepare_model_for_kbit_training(model)
|
| 25 |
-
|
| 26 |
-
lora_config = LoraConfig(
|
| 27 |
-
r=8,
|
| 28 |
-
lora_alpha=32,
|
| 29 |
-
target_modules=["q_proj", "v_proj"],
|
| 30 |
-
lora_dropout=0.05,
|
| 31 |
-
bias="none",
|
| 32 |
-
task_type="CAUSAL_LM"
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
model = get_peft_model(model, lora_config)
|
| 36 |
-
import pandas as pd
|
| 37 |
-
from datasets import Dataset
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
# Load data from CSV
|
| 41 |
-
df = pd.read_csv("Customer-Support.csv")
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
# Rename columns to match expected keys
|
| 45 |
-
df = df.rename(columns={"query": "instruction", "response": "output"})
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# Select required columns
|
| 49 |
-
data = df[["instruction", "output"]].fillna("")
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
# Convert DataFrame to list of dictionaries
|
| 53 |
-
data = data.to_dict(orient="records")
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
# Create Hugging Face Dataset
|
| 57 |
-
dataset = Dataset.from_list(data)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
# Format each example
|
| 61 |
-
def format_instruction(example):
|
| 62 |
-
return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
# Map formatted text
|
| 66 |
-
dataset = dataset.map(lambda x: {"text": format_instruction(x)})
|
| 67 |
-
|
| 68 |
-
def tokenize_function(example):
|
| 69 |
-
tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
|
| 70 |
-
tokenized["labels"] = tokenized["input_ids"].copy()
|
| 71 |
-
return tokenized
|
| 72 |
-
|
| 73 |
-
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| 74 |
-
training_args = TrainingArguments(
|
| 75 |
-
output_dir="./tinyllama-qlora-support-bot",
|
| 76 |
-
per_device_train_batch_size=2,
|
| 77 |
-
gradient_accumulation_steps=4,
|
| 78 |
-
learning_rate=2e-4,
|
| 79 |
-
logging_dir="./logs",
|
| 80 |
-
num_train_epochs=3,
|
| 81 |
-
logging_steps=10,
|
| 82 |
-
save_total_limit=2,
|
| 83 |
-
save_strategy="epoch",
|
| 84 |
-
bf16=True,
|
| 85 |
-
optim="paged_adamw_8bit"
|
| 86 |
-
)
|
| 87 |
-
trainer = Trainer(
|
| 88 |
-
model=model,
|
| 89 |
-
args=training_args,
|
| 90 |
-
train_dataset=tokenized_dataset,
|
| 91 |
-
tokenizer=tokenizer
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
trainer.train()
|
| 95 |
-
model.save_pretrained("tinyllama-qlora-support-bot")
|
| 96 |
-
tokenizer.save_pretrained("tinyllama-qlora-support-bot")
|
| 97 |
-
from transformers import pipeline
|
| 98 |
-
|
| 99 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 100 |
-
|
| 101 |
-
instruction = "How do I update the app?"
|
| 102 |
-
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
|
| 103 |
-
|
| 104 |
-
output = pipe(prompt, max_new_tokens=100)
|
| 105 |
-
print(output[0]['generated_text'])
|
| 106 |
import gradio as gr
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 8 |
+
from groq import Groq
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
from faster_whisper import WhisperModel
|
| 11 |
+
from elevenlabs.client import ElevenLabs
|
| 12 |
+
from gtts import gTTS
|
| 13 |
+
import tempfile
|
| 14 |
+
|
| 15 |
+
# Load environment variables from .env
|
| 16 |
+
load_dotenv()
|
| 17 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 18 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
| 19 |
+
|
| 20 |
+
# Check API keys
|
| 21 |
+
if not GROQ_API_KEY or not ELEVENLABS_API_KEY:
|
| 22 |
+
raise EnvironmentError("Missing API keys. Please create a .env file with GROQ_API_KEY and ELEVENLABS_API_KEY.")
|
| 23 |
+
|
| 24 |
+
# Initialize clients
|
| 25 |
+
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 26 |
+
elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
| 27 |
+
whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
|
| 28 |
+
|
| 29 |
+
def clean_markdown(text):
|
| 30 |
+
return re.sub(r'[*_#`]+', '', text)
|
| 31 |
+
|
| 32 |
+
def summarize_resume(resume_text):
|
| 33 |
+
prompt = f"""Create a concise summary of this resume highlighting:
|
| 34 |
+
1. Professional title/role
|
| 35 |
+
2. Years of experience
|
| 36 |
+
3. Core skills/competencies
|
| 37 |
+
4. Education background
|
| 38 |
+
5. Notable achievements
|
| 39 |
+
|
| 40 |
+
Resume:
|
| 41 |
+
{resume_text[:3000]}... [truncated]"""
|
| 42 |
+
response = groq_client.chat.completions.create(
|
| 43 |
+
messages=[{"role": "user", "content": prompt}],
|
| 44 |
+
model="llama3-70b-8192",
|
| 45 |
+
temperature=0.3,
|
| 46 |
+
)
|
| 47 |
+
return clean_markdown(response.choices[0].message.content)
|
| 48 |
+
|
| 49 |
+
def calculate_ats_score(resume_text):
|
| 50 |
+
prompt = f"""Analyze this resume and calculate an ATS score (0-100) considering:
|
| 51 |
+
1. Keyword optimization (20 pts)
|
| 52 |
+
2. Section organization (20 pts)
|
| 53 |
+
3. Experience quality (20 pts)
|
| 54 |
+
4. Education completeness (20 pts)
|
| 55 |
+
5. Readability (20 pts)
|
| 56 |
+
|
| 57 |
+
Return ONLY the numerical score and nothing else.
|
| 58 |
+
|
| 59 |
+
Resume:
|
| 60 |
+
{resume_text[:3000]}... [truncated]"""
|
| 61 |
+
response = groq_client.chat.completions.create(
|
| 62 |
+
messages=[{"role": "user", "content": prompt}],
|
| 63 |
+
model="llama3-70b-8192",
|
| 64 |
+
temperature=0,
|
| 65 |
+
)
|
| 66 |
+
try:
|
| 67 |
+
return int(response.choices[0].message.content.strip())
|
| 68 |
+
except:
|
| 69 |
+
return 50
|
| 70 |
+
|
| 71 |
+
def process_resume(file):
|
| 72 |
+
try:
|
| 73 |
+
loader = PyPDFLoader(file.name)
|
| 74 |
+
docs = RecursiveCharacterTextSplitter(
|
| 75 |
+
chunk_size=1000,
|
| 76 |
+
chunk_overlap=200,
|
| 77 |
+
separators=["\n\n", "\n", " ", ""]
|
| 78 |
+
).split_documents(loader.load())
|
| 79 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 80 |
+
FAISS.from_documents(docs, embeddings).save_local("resume_index")
|
| 81 |
+
full_text = "\n".join([doc.page_content for doc in docs])
|
| 82 |
+
gr.Info("β
Resume processed successfully!")
|
| 83 |
+
return summarize_resume(full_text), f"ATS Score: {calculate_ats_score(full_text)}/100"
|
| 84 |
+
except Exception as e:
|
| 85 |
+
gr.Warning(f"β Error: {e}")
|
| 86 |
+
return f"Error: {e}", "ATS Score: N/A"
|
| 87 |
+
|
| 88 |
+
def transcribe_audio(audio_path):
|
| 89 |
+
if not audio_path:
|
| 90 |
+
return "No audio recorded"
|
| 91 |
+
segments, _ = whisper_model.transcribe(audio_path)
|
| 92 |
+
return " ".join([segment.text for segment in segments])
|
| 93 |
+
|
| 94 |
+
def generate_question(resume_text):
|
| 95 |
+
prompt = f"""Generate one general interview question focusing on:
|
| 96 |
+
- Teamwork experiences
|
| 97 |
+
- Challenges overcome
|
| 98 |
+
- Learning experiences
|
| 99 |
+
- Career motivations
|
| 100 |
+
- Problem-solving examples
|
| 101 |
+
|
| 102 |
+
Make it conversational and open-ended.
|
| 103 |
+
|
| 104 |
+
Resume Excerpt:
|
| 105 |
+
{resume_text[:2000]}... [truncated]"""
|
| 106 |
+
response = groq_client.chat.completions.create(
|
| 107 |
+
messages=[{"role": "user", "content": prompt}],
|
| 108 |
+
model="llama3-70b-8192",
|
| 109 |
+
temperature=0.7,
|
| 110 |
+
)
|
| 111 |
+
return clean_markdown(response.choices[0].message.content)
|
| 112 |
+
|
| 113 |
+
def evaluate_response(question, response_text):
|
| 114 |
+
prompt = f"""Evaluate this interview response on:
|
| 115 |
+
1. Clarity (1-5)
|
| 116 |
+
2. Confidence (1-5)
|
| 117 |
+
3. Relevance (1-5)
|
| 118 |
+
4. Suggested improvements
|
| 119 |
+
|
| 120 |
+
Question: {question}
|
| 121 |
+
Response: {response_text}"""
|
| 122 |
+
evaluation = groq_client.chat.completions.create(
|
| 123 |
+
messages=[{"role": "user", "content": prompt}],
|
| 124 |
+
model="llama3-70b-8192",
|
| 125 |
+
temperature=0.2,
|
| 126 |
+
)
|
| 127 |
+
return clean_markdown(evaluation.choices[0].message.content)
|
| 128 |
+
|
| 129 |
+
def gtts_speak(text):
|
| 130 |
+
try:
|
| 131 |
+
if not text.strip():
|
| 132 |
+
raise ValueError("Empty text")
|
| 133 |
+
tts = gTTS(text, lang="en", tld="com")
|
| 134 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
|
| 135 |
+
tts.save(tmp.name)
|
| 136 |
+
return tmp.name
|
| 137 |
+
except Exception as e:
|
| 138 |
+
gr.Warning(f"gTTS Error: {e}")
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 142 |
+
gr.Markdown("<h1 style='font-size: 3em; text-align: center;'>π Ready Set Hire</h1>")
|
| 143 |
+
|
| 144 |
+
with gr.Tab("π Resume Analysis"):
|
| 145 |
+
with gr.Row():
|
| 146 |
+
with gr.Column():
|
| 147 |
+
resume_upload = gr.File(label="π Upload Resume (PDF)", file_types=[".pdf"])
|
| 148 |
+
process_btn = gr.Button("π Analyze Resume", variant="primary")
|
| 149 |
+
with gr.Column():
|
| 150 |
+
resume_summary = gr.Textbox(label="π Resume Summary", lines=10)
|
| 151 |
+
hear_summary_btn = gr.Button("π Hear Summary")
|
| 152 |
+
summary_audio = gr.Audio(visible=True)
|
| 153 |
+
ats_score = gr.Textbox(label="π ATS Compatibility Score", interactive=False)
|
| 154 |
+
process_btn.click(fn=process_resume, inputs=resume_upload, outputs=[resume_summary, ats_score])
|
| 155 |
+
hear_summary_btn.click(fn=gtts_speak, inputs=resume_summary, outputs=summary_audio)
|
| 156 |
+
|
| 157 |
+
with gr.Tab("π€ Mock Interview"):
|
| 158 |
+
with gr.Row():
|
| 159 |
+
with gr.Column():
|
| 160 |
+
audio_input = gr.Audio(
|
| 161 |
+
label="π€ Record Your Response",
|
| 162 |
+
sources=["microphone"],
|
| 163 |
+
type="filepath",
|
| 164 |
+
interactive=True
|
| 165 |
+
)
|
| 166 |
+
transcribe_btn = gr.Button("π Transcribe Response")
|
| 167 |
+
question_box = gr.Textbox(label="β Current Question")
|
| 168 |
+
generate_btn = gr.Button("π€ Generate New Question")
|
| 169 |
+
gtts_question_btn = gr.Button("π Hear Question")
|
| 170 |
+
question_audio = gr.Audio(visible=True)
|
| 171 |
+
with gr.Column():
|
| 172 |
+
transcription = gr.Textbox(label="π¬ Your Response")
|
| 173 |
+
evaluation = gr.Textbox(label="π Feedback", lines=8)
|
| 174 |
+
gtts_feedback_btn = gr.Button("π Hear Feedback")
|
| 175 |
+
feedback_audio = gr.Audio(visible=True)
|
| 176 |
+
|
| 177 |
+
transcribe_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription)
|
| 178 |
+
generate_btn.click(fn=generate_question, inputs=resume_summary, outputs=question_box)
|
| 179 |
+
transcription.change(fn=evaluate_response, inputs=[question_box, transcription], outputs=evaluation)
|
| 180 |
+
gtts_question_btn.click(fn=gtts_speak, inputs=question_box, outputs=question_audio)
|
| 181 |
+
gtts_feedback_btn.click(fn=gtts_speak, inputs=evaluation, outputs=feedback_audio)
|
| 182 |
+
|
| 183 |
+
gr.Markdown("""
|
| 184 |
+
<div style='text-align:center; margin-top:2em; color:gray'>
|
| 185 |
+
π Built by Cognify.AI
|
| 186 |
+
</div>
|
| 187 |
+
""")
|
| 188 |
+
|
| 189 |
+
if __name__ == "__main__":
|
| 190 |
+
demo.launch()
|