| import gradio as gr |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
| import torch |
|
|
| |
| nf4_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_use_double_quant=True, |
| bnb_4bit_compute_dtype=torch.bfloat16 |
| ) |
|
|
| |
| |
| |
| MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" |
|
|
| |
| def load_model(): |
| print(f"Loading model {MODEL_NAME}...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| quantization_config=nf4_config, |
| device_map="auto" |
| ) |
| |
| model = torch.compile(model) |
| print("Model loaded and compiled!") |
| return tokenizer, model |
|
|
| tokenizer, model = load_model() |
|
|
| def generate_text_from_file(file_obj, prompt_text, max_length=200): |
| if file_obj is None: |
| return "Please upload a file." |
|
|
| |
| file_content = file_obj.read().decode("utf-8") |
|
|
| |
| |
| full_prompt = f"The following is content from a file:\n\n{file_content}\n\nBased on this, and the following instruction:\n\n{prompt_text}" |
|
|
| |
| inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=tokenizer.model_max_length).to(model.device) |
|
|
| |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=max_length, |
| pad_token_id=tokenizer.eos_token_id, |
| do_sample=False, |
| use_cache=True |
| ) |
|
|
| |
| generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) |
| return generated_text |
|
|
| |
| iface = gr.Interface( |
| fn=generate_text_from_file, |
| inputs=[ |
| gr.File(label="Upload Input File (.txt, .md, etc.)"), |
| gr.Textbox(label="Your Prompt", placeholder="e.g., Summarize the main points or answer this question about the file.") |
| ], |
| outputs="textbox", |
| title="Instant LLM Text Generation from Files on Hugging Face Free Space", |
| description="Upload a text file and provide a prompt to get instant, accurate text generation. Optimized for Hugging Face's free T4 GPU." |
| ) |
|
|
| iface.launch() |