aaaaaaaa / app.py
shibly100's picture
Upload app.py
b1530f5 verified
raw
history blame
1.23 kB
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import gradio as gr
# Step 1: Define the model name from Hugging Face Hub
model_name = "deepseek-ai/deepseek-7b-instruct"
# Step 2: Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16, # Use float16 for efficiency
device_map="auto" # Automatically assigns GPU or CPU
)
# Step 3: Define a simple function to generate model responses
def chat_function(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=300, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Step 4: Create the Gradio interface
iface = gr.Interface(
fn=chat_function,
inputs=gr.Textbox(lines=5, placeholder="Type your question here..."),
outputs="text",
title="🦾 DeepSeek LLM Assistant",
description="Ask me anything! Powered by DeepSeek-7B-Instruct 🪐"
)
# Step 5: Launch the app
if __name__ == "__main__":
iface.launch()