Instructions to use purvbhor-10/prompt-optimizer-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use purvbhor-10/prompt-optimizer-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("google/gemma-2b") model = PeftModel.from_pretrained(base_model, "purvbhor-10/prompt-optimizer-lora") - Transformers
How to use purvbhor-10/prompt-optimizer-lora with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="purvbhor-10/prompt-optimizer-lora")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("purvbhor-10/prompt-optimizer-lora", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use purvbhor-10/prompt-optimizer-lora with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "purvbhor-10/prompt-optimizer-lora" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "purvbhor-10/prompt-optimizer-lora", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/purvbhor-10/prompt-optimizer-lora
- SGLang
How to use purvbhor-10/prompt-optimizer-lora with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "purvbhor-10/prompt-optimizer-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "purvbhor-10/prompt-optimizer-lora", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "purvbhor-10/prompt-optimizer-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "purvbhor-10/prompt-optimizer-lora", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use purvbhor-10/prompt-optimizer-lora with Docker Model Runner:
docker model run hf.co/purvbhor-10/prompt-optimizer-lora
| # ============================================================ | |
| # PHASE 3 β Testing the trained model | |
| # Run: python phase3_test_model.py | |
| # Make sure lora-adapter/ folder is in the same directory | |
| # ============================================================ | |
| import torch | |
| import json | |
| import os | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| print("=" * 60) | |
| print(" PHASE 3: Testing Prompt Optimizer") | |
| print("=" * 60) | |
| BASE_MODEL = "google/gemma-2b" | |
| ADAPTER_PATH = "./lora-adapter" | |
| # ββ Check adapter exists ββββββββββββββββββββββββββββββββββββ | |
| if not os.path.exists(ADAPTER_PATH): | |
| print("\nβ lora-adapter/ folder not found!") | |
| print(" β Download it from Colab after training finishes.") | |
| exit(1) | |
| # ββ Load model ββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n[1/3] Loading model + LoRA adapter (may take 1β2 min)...") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| model = PeftModel.from_pretrained(model, ADAPTER_PATH) | |
| model.eval() | |
| print(" β Model loaded") | |
| # ββ Inference function ββββββββββββββββββββββββββββββββββββββ | |
| def improve_prompt(weak_prompt: str, max_new_tokens: int = 250) -> str: | |
| input_text = f"### Weak Prompt:\n{weak_prompt}\n\n### Improved Prompt:\n" | |
| inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| full_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
| # Return only the improved part | |
| if "### Improved Prompt:" in full_text: | |
| return full_text.split("### Improved Prompt:")[-1].strip() | |
| return full_text.strip() | |
| # ββ 15 test prompts (diverse topics) βββββββββββββββββββββββ | |
| test_prompts = [ | |
| "write about dogs", | |
| "explain machine learning", | |
| "help me code", | |
| "tell me about space", | |
| "make a diet plan", | |
| "write an email", | |
| "summarize history", | |
| "explain climate change", | |
| "how to learn python", | |
| "write a story", | |
| "explain blockchain", | |
| "give me recipe ideas", | |
| "help with my resume", | |
| "explain quantum computing", | |
| "plan a road trip", | |
| ] | |
| print(f"\n[2/3] Testing on {len(test_prompts)} prompts...") | |
| print("-" * 60) | |
| results = [] | |
| for i, prompt in enumerate(test_prompts, 1): | |
| print(f"\n[{i}/{len(test_prompts)}] Weak: {prompt}") | |
| improved = improve_prompt(prompt) | |
| print(f" Improved:\n{improved}") | |
| print("-" * 60) | |
| results.append({"weak": prompt, "improved": improved}) | |
| # ββ Save results ββββββββββββββββββββββββββββββββββββββββββββ | |
| os.makedirs("data", exist_ok=True) | |
| with open("data/test_results.json", "w", encoding="utf-8") as f: | |
| json.dump(results, f, indent=2, ensure_ascii=False) | |
| print("\n[3/3] Results saved") | |
| print("\n" + "=" * 60) | |
| print(" β PHASE 3 COMPLETE!") | |
| print(" π Results β data/test_results.json") | |
| print(" β‘οΈ Next: Open dashboard.html to visualize results") | |
| print("=" * 60) | |