kartik2627 commited on
Commit
68d8cd7
·
verified ·
1 Parent(s): fdebec8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -2,9 +2,9 @@ import torch
2
  import streamlit as st
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- # Load the model and tokenizer
6
- model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
7
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
 
10
  # Set up the Streamlit app interface
@@ -20,8 +20,8 @@ if st.button("Generate Response"):
20
  with st.spinner("Generating response..."):
21
  # Tokenize input
22
  tokenized_prompt = tokenizer(prompt, return_tensors="pt")
23
- input_ids = tokenized_prompt['input_ids'].cuda() if torch.cuda.is_available() else tokenized_prompt['input_ids']
24
- attention_mask = tokenized_prompt['attention_mask'].cuda() if torch.cuda.is_available() else tokenized_prompt['attention_mask']
25
 
26
  # Generate the model's response
27
  response_token_ids = model.generate(
 
2
  import streamlit as st
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Use a smaller model for testing; switch to the larger model when GPU resources are available
6
+ model_name = "gpt2"
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
 
10
  # Set up the Streamlit app interface
 
20
  with st.spinner("Generating response..."):
21
  # Tokenize input
22
  tokenized_prompt = tokenizer(prompt, return_tensors="pt")
23
+ input_ids = tokenized_prompt['input_ids']
24
+ attention_mask = tokenized_prompt['attention_mask']
25
 
26
  # Generate the model's response
27
  response_token_ids = model.generate(