GarGerry commited on
Commit
9a7bebe
·
verified ·
1 Parent(s): 4777e40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -42
app.py CHANGED
@@ -1,47 +1,23 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
 
5
- # Load Model dan Tokenizer
6
- model_name = "meta-llama/Llama-3.3-70B-Instruct"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_name,
10
- device_map="auto",
11
- torch_dtype=torch.float16
12
- )
13
 
14
- # Fungsi Chatbot
15
- def chat(input_text):
16
- # Tokenisasi input
17
- inputs = tokenizer(
18
- input_text,
19
- return_tensors="pt",
20
- truncation=True, # Potong jika terlalu panjang
21
- max_length=512 # Batasi panjang input
22
- ).to("cuda")
23
-
24
- # Hasilkan output menggunakan model
25
- outputs = model.generate(
26
- **inputs,
27
- max_length=200, # Panjang maksimal output
28
- top_p=0.9, # Sampling berdasarkan probabilitas kumulatif
29
- temperature=0.7, # Kontrol kreativitas output
30
- num_return_sequences=1 # Jumlah respons yang dihasilkan
31
- )
32
-
33
- # Decode output menjadi teks
34
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
- return response
36
 
37
- # Membuat antarmuka Gradio
38
- iface = gr.Interface(
39
- fn=chat,
40
- inputs="text",
41
- outputs="text",
42
- title="Llama Chatbot",
43
- description="Chatbot AI menggunakan model Llama-3.3-70B-Instruct."
44
  )
45
 
46
- # Meluncurkan Gradio
47
- iface.launch()
 
1
+ from huggingface_hub import InferenceClient
2
+ import os
 
3
 
4
+ # Ambil token dari Repository secrets
5
+ api_token = os.getenv("LLM")
6
+
7
+ # Inisialisasi Inference Client
8
+ client = InferenceClient(api_key=api_token)
 
 
 
9
 
10
+ # Pesan dari user
11
+ messages = [
12
+ {"role": "user", "content": "What is the capital of France?"}
13
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Inferensi menggunakan model
16
+ completion = client.chat.completions.create(
17
+ model="meta-llama/Llama-3.3-70B-Instruct",
18
+ messages=messages,
19
+ max_tokens=500
 
 
20
  )
21
 
22
+ # Cetak jawaban dari model
23
+ print(completion.choices[0].message)