Gajendra5490 commited on
Commit
56634b9
·
verified ·
1 Parent(s): ffc6533

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -31
app.py CHANGED
@@ -1,44 +1,30 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
  import os
 
 
5
 
6
- token = os.getenv("HF_TOKEN") # Safely fetch the token from environment
7
- # tokenizer = AutoTokenizer.from_pretrained(
8
- # "meta-llama/Meta-Llama-3-70B-Instruct",
9
- # token=token # Use the token when loading the model
10
- # )
11
 
12
- # Load model and tokenizer
13
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B-Instruct", token=token)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- "meta-llama/Meta-Llama-3-70B-Instruct",
16
- torch_dtype=torch.float16,
 
17
  device_map="auto"
18
  )
19
 
20
  # Inference function
21
- def generate_response(prompt, max_tokens=256, temperature=0.7):
22
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
23
- outputs = model.generate(
24
- **inputs,
25
- max_new_tokens=max_tokens,
26
- temperature=temperature,
27
- do_sample=True,
28
- top_p=0.95,
29
- eos_token_id=tokenizer.eos_token_id
30
- )
31
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
  # Gradio interface
34
  gr.Interface(
35
  fn=generate_response,
36
- inputs=[
37
- gr.Textbox(lines=4, label="Prompt"),
38
- gr.Slider(50, 1024, step=10, value=256, label="Max Tokens"),
39
- gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature")
40
- ],
41
  outputs=gr.Textbox(label="Generated Response"),
42
- title="Meta LLaMA 3 70B Instruct",
43
- description="Gradio demo for Meta-Llama-3-70B-Instruct"
44
  ).launch()
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ from transformers import pipeline
4
 
5
+ # Load token from environment
6
+ token = os.getenv("HF_TOKEN")
 
 
 
7
 
8
+ # Use a pipeline as a high-level helper
9
+ pipe = pipeline(
10
+ "text-generation",
11
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
12
+ token=token,
13
+ torch_dtype="auto",
14
  device_map="auto"
15
  )
16
 
17
  # Inference function
18
+ def generate_response(prompt):
19
+ messages = [{"role": "user", "content": prompt}]
20
+ response = pipe(messages, max_new_tokens=256, temperature=0.7)
21
+ return response[0]["generated_text"]
 
 
 
 
 
 
 
22
 
23
  # Gradio interface
24
  gr.Interface(
25
  fn=generate_response,
26
+ inputs=gr.Textbox(lines=4, label="Prompt"),
 
 
 
 
27
  outputs=gr.Textbox(label="Generated Response"),
28
+ title="Meta LLaMA 3 8B Instruct",
29
+ description="Gradio demo for Meta-Llama-3-8B-Instruct using Hugging Face Transformers pipeline"
30
  ).launch()