Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gc | |
| import os | |
| import re | |
| # Optimized settings for GPU upgrade | |
| os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" | |
| torch.backends.cudnn.benchmark = True | |
| # Global variables | |
| model = None | |
| tokenizer = None | |
| def load_model_optimized(): | |
| """Load model optimized for GPU upgrade""" | |
| global model, tokenizer | |
| if model is not None: | |
| return model, tokenizer | |
| model_name = "somriksur/HireFlow-Qwen-Fresh-Pro" | |
| try: | |
| print("π Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| use_fast=True | |
| ) | |
| print("π Loading model with GPU optimization...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True, | |
| use_safetensors=True | |
| ) | |
| model.eval() | |
| print("β Model loaded successfully with GPU optimization!") | |
| return model, tokenizer | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| return None, None | |
| def clean_and_extract_questions(text, expected_count=3): | |
| """Extract and clean interview questions from model output""" | |
| # Remove common unwanted patterns | |
| text = re.sub(r'http[s]?://[^\s]+', '', text) # Remove URLs | |
| text = re.sub(r'www\.[^\s]+', '', text) # Remove www links | |
| text = re.sub(r'[\u4e00-\u9fff]+', '', text) # Remove Chinese characters | |
| text = re.sub(r'linkedin\.com[^\s]*', '', text) # Remove LinkedIn | |
| text = re.sub(r'twitter\.com[^\s]*', '', text) # Remove Twitter | |
| # Split into lines and clean | |
| lines = [line.strip() for line in text.split('\n') if line.strip()] | |
| questions = [] | |
| question_patterns = [ | |
| r'^\d+[\.\)]\s*(.+\?)\s*$', # "1. Question?" | |
| r'^[Qq]uestion\s*\d*:?\s*(.+\?)\s*$', # "Question: ..." | |
| r'^(.+\?)\s*$' # Any line ending with ? | |
| ] | |
| for line in lines: | |
| # Skip unwanted lines | |
| if (len(line) < 15 or | |
| 'http' in line.lower() or | |
| 'www.' in line.lower() or | |
| 'linkedin' in line.lower() or | |
| 'twitter' in line.lower() or | |
| 'best practice' in line.lower() and len(line) < 30): | |
| continue | |
| # Try to extract question | |
| for pattern in question_patterns: | |
| match = re.match(pattern, line, re.IGNORECASE) | |
| if match: | |
| question = match.group(1) if len(match.groups()) > 0 else match.group(0) | |
| question = question.strip() | |
| # Validate question quality | |
| if (len(question) > 20 and | |
| question.endswith('?') and | |
| not any(bad in question.lower() for bad in ['http', 'www', 'linkedin', 'twitter'])): | |
| questions.append(question) | |
| break | |
| # If we don't have enough good questions, create fallbacks | |
| if len(questions) < expected_count: | |
| fallback_questions = [ | |
| "What are the key principles you follow when designing scalable applications?", | |
| "How do you approach debugging complex issues in production environments?", | |
| "Describe your experience with modern development practices and tools?" | |
| ] | |
| while len(questions) < expected_count and fallback_questions: | |
| questions.append(fallback_questions.pop(0)) | |
| # Format questions properly | |
| formatted_questions = [] | |
| for i, q in enumerate(questions[:expected_count], 1): | |
| # Clean up any existing numbering | |
| q = re.sub(r'^\(?(\d+)[\.\)]\s*', '', q).strip() | |
| formatted_questions.append(f"{i}. {q}") | |
| return formatted_questions | |
| def generate_interface(prompt, max_tokens=300, temperature=0.6): | |
| """Generate clean interview questions""" | |
| current_model, current_tokenizer = load_model_optimized() | |
| if current_model is None or current_tokenizer is None: | |
| return "β Model failed to load. Please try again." | |
| try: | |
| # Clear cache | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| # Create a very structured prompt | |
| system_prompt = """You are a professional technical interviewer. Generate exactly 3 high-quality interview questions. | |
| Rules: | |
| - Each question must be numbered (1., 2., 3.) | |
| - Each question must end with a question mark | |
| - Focus only on the requested technology/role | |
| - Keep questions professional and concise | |
| - No explanations, just questions | |
| - No URLs or links | |
| Request: """ | |
| full_prompt = system_prompt + prompt + "\n\nQuestions:\n" | |
| # Tokenize | |
| inputs = current_tokenizer( | |
| full_prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512, | |
| padding=False | |
| ) | |
| # Move to GPU if available | |
| if torch.cuda.is_available(): | |
| inputs = {k: v.cuda() for k, v in inputs.items()} | |
| # Generate with strict parameters | |
| with torch.no_grad(): | |
| outputs = current_model.generate( | |
| **inputs, | |
| max_new_tokens=min(max_tokens, 250), | |
| temperature=max(0.4, min(temperature, 0.7)), | |
| do_sample=True, | |
| pad_token_id=current_tokenizer.eos_token_id, | |
| eos_token_id=current_tokenizer.eos_token_id, | |
| use_cache=True, | |
| repetition_penalty=1.5, | |
| top_p=0.8, | |
| top_k=25, | |
| no_repeat_ngram_size=4, | |
| early_stopping=True | |
| ) | |
| # Decode | |
| generated_text = current_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove the prompt | |
| if full_prompt in generated_text: | |
| generated_text = generated_text.replace(full_prompt, "").strip() | |
| # Clean and extract questions | |
| questions = clean_and_extract_questions(generated_text, 3) | |
| # Format final output | |
| if questions: | |
| result = "\n".join(questions) | |
| else: | |
| result = "Unable to generate clean questions. Please try a more specific prompt." | |
| # Cleanup | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return result | |
| except Exception as e: | |
| print(f"Generation error: {e}") | |
| return f"β Generation failed: {str(e)}" | |
| # Create interface | |
| interface = gr.Interface( | |
| fn=generate_interface, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Interview Question Request", | |
| placeholder="Example: Generate 3 React interview questions for senior developers", | |
| lines=3, | |
| max_lines=5 | |
| ), | |
| gr.Slider( | |
| minimum=100, | |
| maximum=400, | |
| value=250, | |
| label="Max Tokens" | |
| ), | |
| gr.Slider( | |
| minimum=0.3, | |
| maximum=0.8, | |
| value=0.6, | |
| label="Temperature" | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="Generated Interview Questions", | |
| lines=8 | |
| ), | |
| title="π― HireFlow Interview Question Generator (GPU Optimized)", | |
| description="Generate professional, clean interview questions using your custom-trained model. Optimized for quality output.", | |
| examples=[ | |
| ["Generate 3 React interview questions for senior developers", 250, 0.6], | |
| ["Create 3 Python backend questions for mid-level engineers", 250, 0.6], | |
| ["Generate 3 JavaScript questions about async programming", 250, 0.6], | |
| ["Create 3 Node.js questions for experienced developers", 250, 0.6], | |
| ["Generate 3 system design questions for senior engineers", 250, 0.6] | |
| ], | |
| cache_examples=False, | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |