Spaces:

omkargarud
/

Genius

Sleeping

Genius / app.py

Update app.py

309d735 verified 10 months ago

1.32 kB

	# app.py

	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Load model and tokenizer from local directory
	model_path = "./fine_tuned_model" # You can change this if your model is in a subdirectory
	device = "cuda" if torch.cuda.is_available() else "cpu"

	print("Loading model...")
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	)

	if device == "cuda":
	model.to(device)

	def generate_response(prompt):
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=150,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Gradio Interface
	interface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=3, label="Your Question"),
	outputs=gr.Textbox(lines=10, label="Model Response"),
	title="Genius by OG",
	description="Ask a question and receive a response from Genius model."
	)

	# Launch the app (Hugging Face will call launch automatically)
	interface.launch()