kartik2627 commited on
Commit
946d9b6
·
verified ·
1 Parent(s): 1b2cd08

Createapp.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import streamlit as st
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # Load the model and tokenizer
6
+ model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
7
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ # Set up the Streamlit app interface
11
+ st.title("LLM Text Classifier")
12
+ st.write("Enter text below, and the language model will classify or respond to it.")
13
+
14
+ # Take user input
15
+ prompt = st.text_input("Enter your text here:")
16
+
17
+ if st.button("Generate Response"):
18
+ # Check if input is provided
19
+ if prompt:
20
+ with st.spinner("Generating response..."):
21
+ # Tokenize input
22
+ tokenized_prompt = tokenizer(prompt, return_tensors="pt")
23
+ input_ids = tokenized_prompt['input_ids'].cuda() if torch.cuda.is_available() else tokenized_prompt['input_ids']
24
+ attention_mask = tokenized_prompt['attention_mask'].cuda() if torch.cuda.is_available() else tokenized_prompt['attention_mask']
25
+
26
+ # Generate the model's response
27
+ response_token_ids = model.generate(
28
+ input_ids,
29
+ attention_mask=attention_mask,
30
+ max_new_tokens=50,
31
+ pad_token_id=tokenizer.eos_token_id
32
+ )
33
+
34
+ # Decode and display the generated text
35
+ generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)
36
+ st.write("**Model's Response:**", generated_text)
37
+ else:
38
+ st.warning("Please enter some text before generating a response.")