Euryeth commited on
Commit
d331351
·
verified ·
1 Parent(s): 7b7ead5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -36
app.py CHANGED
@@ -1,52 +1,32 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
  import os
5
 
6
- # Login using Hugging Face token from environment variable (set via Secrets)
7
  from huggingface_hub import login
8
- hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
- if hf_token is None:
10
- raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set")
11
- login(hf_token)
12
 
13
- # Set Hugging Face cache dir (optional but recommended for Spaces)
14
  os.environ['HF_HOME'] = '/tmp/cache'
15
 
16
- model_name = "tiiuae/falcon-rw-1b-instruct"
17
 
18
- # Load tokenizer and model explicitly (better control)
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
20
-
21
- # Use bfloat16 if on GPU and supported, else fallback to float32
22
- if torch.cuda.is_available():
23
- torch_dtype = torch.bfloat16
24
- device_map = "auto"
25
- else:
26
- torch_dtype = torch.float32
27
- device_map = None
28
-
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_name,
31
  torch_dtype=torch_dtype,
32
- device_map=device_map,
33
- trust_remote_code=True
34
  )
35
 
36
- # Create text-generation pipeline with tokenizer and model
37
  generator = pipeline(
38
  "text-generation",
39
  model=model,
40
  tokenizer=tokenizer,
41
- device_map=device_map,
42
- torch_dtype=torch_dtype,
43
  )
44
 
45
  def generate_chat_completion(message, history):
46
- """
47
- Simple chat function using Falcon 1B instruct model.
48
- Formats prompt for chat style and returns response.
49
- """
50
  prompt = f"User: {message}\nAssistant:"
51
  output = generator(
52
  prompt,
@@ -54,20 +34,16 @@ def generate_chat_completion(message, history):
54
  temperature=0.9,
55
  top_p=0.9,
56
  repetition_penalty=1.1,
57
- do_sample=True,
58
- eos_token_id=tokenizer.eos_token_id,
59
  )
60
- # Remove the prompt from the generated text to get clean assistant reply
61
- generated_text = output[0]['generated_text']
62
- response = generated_text[len(prompt):].strip()
63
  return response
64
 
65
- # Launch Gradio Chat Interface
66
  gr.ChatInterface(
67
  fn=generate_chat_completion,
68
- title="Falcon 1B Instruct Chatbot",
69
- description="Roleplay-ready chat using Falcon-1B-Instruct",
70
  retry_btn="Retry",
71
  undo_btn="Undo",
72
- clear_btn="Clear",
73
  ).launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import os
5
 
 
6
  from huggingface_hub import login
7
+ login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
 
 
 
8
 
9
+ torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
10
  os.environ['HF_HOME'] = '/tmp/cache'
11
 
12
+ model_name = "tiiuae/falcon-rw-1b"
13
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  torch_dtype=torch_dtype,
18
+ device_map="auto"
 
19
  )
20
 
 
21
  generator = pipeline(
22
  "text-generation",
23
  model=model,
24
  tokenizer=tokenizer,
25
+ device_map="auto",
26
+ torch_dtype=torch_dtype
27
  )
28
 
29
  def generate_chat_completion(message, history):
 
 
 
 
30
  prompt = f"User: {message}\nAssistant:"
31
  output = generator(
32
  prompt,
 
34
  temperature=0.9,
35
  top_p=0.9,
36
  repetition_penalty=1.1,
37
+ do_sample=True
 
38
  )
39
+ response = output[0]['generated_text'].replace(prompt, "").strip()
 
 
40
  return response
41
 
 
42
  gr.ChatInterface(
43
  fn=generate_chat_completion,
44
+ title="Falcon Chatbot",
45
+ description="Roleplay-ready chat using Falcon-RW-1B",
46
  retry_btn="Retry",
47
  undo_btn="Undo",
48
+ clear_btn="Clear"
49
  ).launch()