AnnaMathews commited on
Commit
88e7f08
Β·
verified Β·
1 Parent(s): 99f19f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -120
app.py CHANGED
@@ -1,121 +1,190 @@
1
- import torch
2
- from datasets import Dataset
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
4
- from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
5
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6
-
7
- bnb_config = BitsAndBytesConfig(
8
- load_in_4bit=True,
9
- bnb_4bit_compute_dtype=torch.float16,
10
- bnb_4bit_use_double_quant=True,
11
- bnb_4bit_quant_type="nf4",
12
- )
13
-
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
- quantization_config=bnb_config,
17
- device_map="auto"
18
- )
19
-
20
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
21
- tokenizer.pad_token = tokenizer.eos_token
22
- model.config.use_cache = False
23
- model.gradient_checkpointing_enable()
24
- model = prepare_model_for_kbit_training(model)
25
-
26
- lora_config = LoraConfig(
27
- r=8,
28
- lora_alpha=32,
29
- target_modules=["q_proj", "v_proj"],
30
- lora_dropout=0.05,
31
- bias="none",
32
- task_type="CAUSAL_LM"
33
- )
34
-
35
- model = get_peft_model(model, lora_config)
36
- import pandas as pd
37
- from datasets import Dataset
38
-
39
-
40
- # Load data from CSV
41
- df = pd.read_csv("Customer-Support.csv")
42
-
43
-
44
- # Rename columns to match expected keys
45
- df = df.rename(columns={"query": "instruction", "response": "output"})
46
-
47
-
48
- # Select required columns
49
- data = df[["instruction", "output"]].fillna("")
50
-
51
-
52
- # Convert DataFrame to list of dictionaries
53
- data = data.to_dict(orient="records")
54
-
55
-
56
- # Create Hugging Face Dataset
57
- dataset = Dataset.from_list(data)
58
-
59
-
60
- # Format each example
61
- def format_instruction(example):
62
- return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
63
-
64
-
65
- # Map formatted text
66
- dataset = dataset.map(lambda x: {"text": format_instruction(x)})
67
-
68
- def tokenize_function(example):
69
- tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
70
- tokenized["labels"] = tokenized["input_ids"].copy()
71
- return tokenized
72
-
73
- tokenized_dataset = dataset.map(tokenize_function, batched=True)
74
- training_args = TrainingArguments(
75
- output_dir="./tinyllama-qlora-support-bot",
76
- per_device_train_batch_size=2,
77
- gradient_accumulation_steps=4,
78
- learning_rate=2e-4,
79
- logging_dir="./logs",
80
- num_train_epochs=3,
81
- logging_steps=10,
82
- save_total_limit=2,
83
- save_strategy="epoch",
84
- bf16=True,
85
- optim="paged_adamw_8bit"
86
- )
87
- trainer = Trainer(
88
- model=model,
89
- args=training_args,
90
- train_dataset=tokenized_dataset,
91
- tokenizer=tokenizer
92
- )
93
-
94
- trainer.train()
95
- model.save_pretrained("tinyllama-qlora-support-bot")
96
- tokenizer.save_pretrained("tinyllama-qlora-support-bot")
97
- from transformers import pipeline
98
-
99
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
100
-
101
- instruction = "How do I update the app?"
102
- prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
103
-
104
- output = pipe(prompt, max_new_tokens=100)
105
- print(output[0]['generated_text'])
106
  import gradio as gr
107
-
108
- def generate_response(instruction):
109
- prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
110
- output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
111
- # Extract only the response part
112
- response = output[0]["generated_text"].split("### Response:\n")[-1].strip()
113
- return response
114
-
115
- gr.Interface(
116
- fn=generate_response,
117
- inputs=gr.Textbox(lines=3, label="Enter your question"),
118
- outputs=gr.Textbox(lines=5, label="Support Bot's Response"),
119
- title="πŸ“ž Customer Support Chatbot",
120
- description="Ask a question and get a response from your fine-tuned TinyLLaMA model.",
121
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import re
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from groq import Groq
9
+ from dotenv import load_dotenv
10
+ from faster_whisper import WhisperModel
11
+ from elevenlabs.client import ElevenLabs
12
+ from gtts import gTTS
13
+ import tempfile
14
+
15
+ # Load environment variables from .env
16
+ load_dotenv()
17
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
18
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
19
+
20
+ # Check API keys
21
+ if not GROQ_API_KEY or not ELEVENLABS_API_KEY:
22
+ raise EnvironmentError("Missing API keys. Please create a .env file with GROQ_API_KEY and ELEVENLABS_API_KEY.")
23
+
24
+ # Initialize clients
25
+ groq_client = Groq(api_key=GROQ_API_KEY)
26
+ elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
27
+ whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
28
+
29
+ def clean_markdown(text):
30
+ return re.sub(r'[*_#`]+', '', text)
31
+
32
+ def summarize_resume(resume_text):
33
+ prompt = f"""Create a concise summary of this resume highlighting:
34
+ 1. Professional title/role
35
+ 2. Years of experience
36
+ 3. Core skills/competencies
37
+ 4. Education background
38
+ 5. Notable achievements
39
+
40
+ Resume:
41
+ {resume_text[:3000]}... [truncated]"""
42
+ response = groq_client.chat.completions.create(
43
+ messages=[{"role": "user", "content": prompt}],
44
+ model="llama3-70b-8192",
45
+ temperature=0.3,
46
+ )
47
+ return clean_markdown(response.choices[0].message.content)
48
+
49
+ def calculate_ats_score(resume_text):
50
+ prompt = f"""Analyze this resume and calculate an ATS score (0-100) considering:
51
+ 1. Keyword optimization (20 pts)
52
+ 2. Section organization (20 pts)
53
+ 3. Experience quality (20 pts)
54
+ 4. Education completeness (20 pts)
55
+ 5. Readability (20 pts)
56
+
57
+ Return ONLY the numerical score and nothing else.
58
+
59
+ Resume:
60
+ {resume_text[:3000]}... [truncated]"""
61
+ response = groq_client.chat.completions.create(
62
+ messages=[{"role": "user", "content": prompt}],
63
+ model="llama3-70b-8192",
64
+ temperature=0,
65
+ )
66
+ try:
67
+ return int(response.choices[0].message.content.strip())
68
+ except:
69
+ return 50
70
+
71
+ def process_resume(file):
72
+ try:
73
+ loader = PyPDFLoader(file.name)
74
+ docs = RecursiveCharacterTextSplitter(
75
+ chunk_size=1000,
76
+ chunk_overlap=200,
77
+ separators=["\n\n", "\n", " ", ""]
78
+ ).split_documents(loader.load())
79
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
80
+ FAISS.from_documents(docs, embeddings).save_local("resume_index")
81
+ full_text = "\n".join([doc.page_content for doc in docs])
82
+ gr.Info("βœ… Resume processed successfully!")
83
+ return summarize_resume(full_text), f"ATS Score: {calculate_ats_score(full_text)}/100"
84
+ except Exception as e:
85
+ gr.Warning(f"❌ Error: {e}")
86
+ return f"Error: {e}", "ATS Score: N/A"
87
+
88
+ def transcribe_audio(audio_path):
89
+ if not audio_path:
90
+ return "No audio recorded"
91
+ segments, _ = whisper_model.transcribe(audio_path)
92
+ return " ".join([segment.text for segment in segments])
93
+
94
+ def generate_question(resume_text):
95
+ prompt = f"""Generate one general interview question focusing on:
96
+ - Teamwork experiences
97
+ - Challenges overcome
98
+ - Learning experiences
99
+ - Career motivations
100
+ - Problem-solving examples
101
+
102
+ Make it conversational and open-ended.
103
+
104
+ Resume Excerpt:
105
+ {resume_text[:2000]}... [truncated]"""
106
+ response = groq_client.chat.completions.create(
107
+ messages=[{"role": "user", "content": prompt}],
108
+ model="llama3-70b-8192",
109
+ temperature=0.7,
110
+ )
111
+ return clean_markdown(response.choices[0].message.content)
112
+
113
+ def evaluate_response(question, response_text):
114
+ prompt = f"""Evaluate this interview response on:
115
+ 1. Clarity (1-5)
116
+ 2. Confidence (1-5)
117
+ 3. Relevance (1-5)
118
+ 4. Suggested improvements
119
+
120
+ Question: {question}
121
+ Response: {response_text}"""
122
+ evaluation = groq_client.chat.completions.create(
123
+ messages=[{"role": "user", "content": prompt}],
124
+ model="llama3-70b-8192",
125
+ temperature=0.2,
126
+ )
127
+ return clean_markdown(evaluation.choices[0].message.content)
128
+
129
+ def gtts_speak(text):
130
+ try:
131
+ if not text.strip():
132
+ raise ValueError("Empty text")
133
+ tts = gTTS(text, lang="en", tld="com")
134
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
135
+ tts.save(tmp.name)
136
+ return tmp.name
137
+ except Exception as e:
138
+ gr.Warning(f"gTTS Error: {e}")
139
+ return None
140
+
141
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
142
+ gr.Markdown("<h1 style='font-size: 3em; text-align: center;'>πŸš€ Ready Set Hire</h1>")
143
+
144
+ with gr.Tab("πŸ“„ Resume Analysis"):
145
+ with gr.Row():
146
+ with gr.Column():
147
+ resume_upload = gr.File(label="πŸ“„ Upload Resume (PDF)", file_types=[".pdf"])
148
+ process_btn = gr.Button("πŸ” Analyze Resume", variant="primary")
149
+ with gr.Column():
150
+ resume_summary = gr.Textbox(label="πŸ“ Resume Summary", lines=10)
151
+ hear_summary_btn = gr.Button("πŸ”Š Hear Summary")
152
+ summary_audio = gr.Audio(visible=True)
153
+ ats_score = gr.Textbox(label="πŸ“Š ATS Compatibility Score", interactive=False)
154
+ process_btn.click(fn=process_resume, inputs=resume_upload, outputs=[resume_summary, ats_score])
155
+ hear_summary_btn.click(fn=gtts_speak, inputs=resume_summary, outputs=summary_audio)
156
+
157
+ with gr.Tab("🎀 Mock Interview"):
158
+ with gr.Row():
159
+ with gr.Column():
160
+ audio_input = gr.Audio(
161
+ label="🎀 Record Your Response",
162
+ sources=["microphone"],
163
+ type="filepath",
164
+ interactive=True
165
+ )
166
+ transcribe_btn = gr.Button("πŸ“ Transcribe Response")
167
+ question_box = gr.Textbox(label="❓ Current Question")
168
+ generate_btn = gr.Button("πŸ€– Generate New Question")
169
+ gtts_question_btn = gr.Button("πŸ”Š Hear Question")
170
+ question_audio = gr.Audio(visible=True)
171
+ with gr.Column():
172
+ transcription = gr.Textbox(label="πŸ’¬ Your Response")
173
+ evaluation = gr.Textbox(label="πŸ“ Feedback", lines=8)
174
+ gtts_feedback_btn = gr.Button("πŸ”Š Hear Feedback")
175
+ feedback_audio = gr.Audio(visible=True)
176
+
177
+ transcribe_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription)
178
+ generate_btn.click(fn=generate_question, inputs=resume_summary, outputs=question_box)
179
+ transcription.change(fn=evaluate_response, inputs=[question_box, transcription], outputs=evaluation)
180
+ gtts_question_btn.click(fn=gtts_speak, inputs=question_box, outputs=question_audio)
181
+ gtts_feedback_btn.click(fn=gtts_speak, inputs=evaluation, outputs=feedback_audio)
182
+
183
+ gr.Markdown("""
184
+ <div style='text-align:center; margin-top:2em; color:gray'>
185
+ πŸš€ Built by Cognify.AI
186
+ </div>
187
+ """)
188
+
189
+ if __name__ == "__main__":
190
+ demo.launch()