amkyawdev commited on
Commit
125cba5
·
verified ·
1 Parent(s): 640cc6d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +17 -14
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
- from ctransformers import AutoModelForCausalLM
3
- import os
4
 
5
  # Model repo and filename
6
  model_repo = "amkyawdev/amkyaw-dev-v1"
@@ -12,26 +12,29 @@ llm = None
12
  def get_llm():
13
  global llm
14
  if llm is None:
15
- # Download and load model from Hugging Face (public model, no token needed)
16
- llm = AutoModelForCausalLM.from_pretrained(
17
- model_repo,
18
- model_file=model_filename,
19
- model_type="llama",
20
- context_length=1024,
21
- threads=2
 
 
 
22
  )
23
  return llm
24
 
25
- def generate(prompt, temperature=0.8, max_tokens=256):
26
  try:
27
  model = get_llm()
28
  response = model(
29
  prompt,
30
- max_new_tokens=max_tokens,
31
  temperature=temperature,
32
- stop=["</s>", "assistant:"]
33
  )
34
- return response
35
  except Exception as e:
36
  return f"Error: {str(e)}"
37
 
@@ -42,7 +45,7 @@ with gr.Blocks(title="amkyaw-coder") as demo:
42
  with gr.Column():
43
  prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...")
44
  temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
45
- max_tokens = gr.Slider(64, 1024, value=256, step=64, label="Max Tokens")
46
  submit = gr.Button("Generate", variant="primary")
47
 
48
  with gr.Column():
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
 
5
  # Model repo and filename
6
  model_repo = "amkyawdev/amkyaw-dev-v1"
 
12
  def get_llm():
13
  global llm
14
  if llm is None:
15
+ model_path = hf_hub_download(
16
+ repo_id=model_repo,
17
+ filename=model_filename
18
+ )
19
+ llm = Llama(
20
+ model_path=model_path,
21
+ n_ctx=512,
22
+ n_threads=2,
23
+ n_gpu_layers=0,
24
+ verbose=False
25
  )
26
  return llm
27
 
28
+ def generate(prompt, temperature=0.8, max_tokens=128):
29
  try:
30
  model = get_llm()
31
  response = model(
32
  prompt,
33
+ max_tokens=max_tokens,
34
  temperature=temperature,
35
+ stop=["</s>", "assistant"]
36
  )
37
+ return response["choices"][0]["text"].strip()
38
  except Exception as e:
39
  return f"Error: {str(e)}"
40
 
 
45
  with gr.Column():
46
  prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...")
47
  temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
48
+ max_tokens = gr.Slider(32, 512, value=128, step=32, label="Max Tokens")
49
  submit = gr.Button("Generate", variant="primary")
50
 
51
  with gr.Column():
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  gradio>=4.0.0
2
  huggingface_hub>=0.20.0
3
- ctransformers>=0.2.0
 
1
  gradio>=4.0.0
2
  huggingface_hub>=0.20.0
3
+ llama-cpp-python>=0.2.0