stevafernandes commited on
Commit
fc77a06
·
verified ·
1 Parent(s): 78c60ea

Create appy.py

Browse files
Files changed (1) hide show
  1. appy.py +214 -0
appy.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import spaces
5
+ import os
6
+
7
+ # Available official Llama models (require access approval from Meta):
8
+ OFFICIAL_LLAMA_MODELS = {
9
+ "Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
10
+ "Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
11
+ "Llama-3.1-8B": "meta-llama/Llama-3.1-8B-Instruct",
12
+ "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
13
+ "Llama-3.1-405B": "meta-llama/Llama-3.1-405B-Instruct", # Requires massive GPU resources
14
+ }
15
+
16
+ # Select your model (start with smaller ones for testing)
17
+ MODEL_ID = OFFICIAL_LLAMA_MODELS["Llama-3.2-8B"]
18
+
19
+
20
+ print(f"Loading official Llama model: {MODEL_ID}")
21
+ print("Note: This requires approval from Meta. Request access at:")
22
+ print(f"https://huggingface.co/{MODEL_ID}")
23
+
24
+ # Check for Hugging Face token (required for Llama models)
25
+ HF_TOKEN = os.environ.get("HF_TOKEN")
26
+ if not HF_TOKEN:
27
+ print("WARNING: HF_TOKEN not found. You need to:")
28
+ print("1. Request access to Llama models from Meta")
29
+ print("2. Create a Hugging Face access token")
30
+ print("3. Add it as a Space secret named 'HF_TOKEN'")
31
+
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+ try:
35
+ # Load tokenizer with authentication
36
+ tokenizer = AutoTokenizer.from_pretrained(
37
+ MODEL_ID,
38
+ token=HF_TOKEN,
39
+ trust_remote_code=False # Security: Don't execute remote code
40
+ )
41
+
42
+ # Load model with authentication
43
+ model = AutoModelForCausalLM.from_pretrained(
44
+ MODEL_ID,
45
+ token=HF_TOKEN,
46
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
47
+ device_map="auto",
48
+ trust_remote_code=False, # Security: Don't execute remote code
49
+ low_cpu_mem_usage=True
50
+ )
51
+
52
+ model_loaded = True
53
+ print(f"✅ Successfully loaded {MODEL_ID}")
54
+
55
+ except Exception as e:
56
+ model_loaded = False
57
+ print(f"❌ Failed to load model: {e}")
58
+ print("\nTo fix this:")
59
+ print("1. Request access at: https://huggingface.co/meta-llama")
60
+ print("2. Create token at: https://huggingface.co/settings/tokens")
61
+ print("3. Add token to Space secrets as 'HF_TOKEN'")
62
+
63
+ @spaces.GPU(duration=60)
64
+ def generate_response(
65
+ message,
66
+ history,
67
+ max_tokens=512,
68
+ temperature=0.1,
69
+ top_p=0.95,
70
+ ):
71
+ """Generate response using official Llama model"""
72
+
73
+ if not model_loaded:
74
+ return "⚠️ Model not loaded. Please set up HF_TOKEN and request Llama access from Meta."
75
+
76
+ # Format conversation for Llama's expected format
77
+ messages = []
78
+ for user_msg, assistant_msg in history:
79
+ messages.append({"role": "user", "content": user_msg})
80
+ if assistant_msg:
81
+ messages.append({"role": "assistant", "content": assistant_msg})
82
+ messages.append({"role": "user", "content": message})
83
+
84
+ # Apply Llama's chat template
85
+ prompt = tokenizer.apply_chat_template(
86
+ messages,
87
+ tokenize=False,
88
+ add_generation_prompt=True
89
+ )
90
+
91
+ # Tokenize
92
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
93
+ inputs = {k: v.to(device) for k, v in inputs.items()}
94
+
95
+ # Generate
96
+ with torch.no_grad():
97
+ outputs = model.generate(
98
+ **inputs,
99
+ max_new_tokens=max_tokens,
100
+ temperature=temperature,
101
+ top_p=top_p,
102
+ do_sample=True,
103
+ eos_token_id=tokenizer.eos_token_id,
104
+ pad_token_id=tokenizer.eos_token_id,
105
+ )
106
+
107
+ # Decode response
108
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
109
+ return response
110
+
111
+ # Create Gradio interface
112
+ with gr.Blocks(title="Official Llama Chat") as demo:
113
+ gr.Markdown("""
114
+ # 🦙 Official Llama Model Chat
115
+
116
+ **IMPORTANT SECURITY NOTICE:**
117
+ - This uses ONLY official Llama models from Meta
118
+ - Never download models from unofficial sources
119
+ - Always verify URLs are from trusted domains
120
+
121
+ **Model**: {model_name}
122
+
123
+ **Setup Required**:
124
+ 1. Request access: [Meta Llama on Hugging Face](https://huggingface.co/meta-llama)
125
+ 2. Create token: [Hugging Face Settings](https://huggingface.co/settings/tokens)
126
+ 3. Add token to Space secrets as 'HF_TOKEN'
127
+ """.format(model_name=MODEL_ID if model_loaded else "Not loaded - see setup instructions"))
128
+
129
+ if not model_loaded:
130
+ gr.Markdown("""
131
+ ### ⚠️ Model Not Loaded
132
+
133
+ The model could not be loaded. This is usually because:
134
+ - You haven't added your HF_TOKEN to the Space secrets
135
+ - You haven't been granted access to Llama models by Meta
136
+
137
+ Please follow the setup instructions above.
138
+ """)
139
+
140
+ chatbot = gr.Chatbot(height=500)
141
+
142
+ with gr.Row():
143
+ msg = gr.Textbox(
144
+ label="Message",
145
+ placeholder="Type your message here...",
146
+ lines=2,
147
+ scale=4
148
+ )
149
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
150
+
151
+ with gr.Accordion("Generation Settings", open=False):
152
+ max_tokens = gr.Slider(minimum=50, maximum=2048, value=512, label="Max Tokens")
153
+ temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
154
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P")
155
+
156
+ clear_btn = gr.Button("Clear Chat")
157
+
158
+ # Example prompts
159
+ gr.Examples(
160
+ examples=[
161
+ "What are the key principles of secure coding?",
162
+ "Explain the importance of using official software sources",
163
+ "How can I verify if a download link is legitimate?",
164
+ ],
165
+ inputs=msg,
166
+ )
167
+
168
+ # Event handlers
169
+ def user_submit(message, history):
170
+ return "", history + [[message, None]]
171
+
172
+ def bot_response(history, max_tokens, temperature, top_p):
173
+ if not history:
174
+ return history
175
+
176
+ message = history[-1][0]
177
+ bot_message = generate_response(
178
+ message,
179
+ history[:-1],
180
+ max_tokens,
181
+ temperature,
182
+ top_p
183
+ )
184
+ history[-1][1] = bot_message
185
+ return history
186
+
187
+ msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
188
+ bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
189
+ )
190
+
191
+ submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
192
+ bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
193
+ )
194
+
195
+ clear_btn.click(lambda: None, outputs=chatbot)
196
+
197
+ gr.Markdown("""
198
+ ---
199
+ ### 🔒 Security Best Practices
200
+
201
+ 1. **Only use official model sources** (meta-llama on Hugging Face)
202
+ 2. **Never run code from untrusted sources**
203
+ 3. **Verify all URLs before downloading**
204
+ 4. **Use access tokens securely** (never share them)
205
+ 5. **Report suspicious links** to the platform
206
+
207
+ ### 📚 Official Resources
208
+ - [Meta AI](https://ai.meta.com/)
209
+ - [Official Llama Page](https://llama.meta.com/)
210
+ - [Hugging Face Meta-Llama](https://huggingface.co/meta-llama)
211
+ """)
212
+
213
+ if __name__ == "__main__":
214
+ demo.launch()