amkyawdev commited on
Commit
ebb8517
·
verified ·
1 Parent(s): 4aa3683

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +31 -9
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,20 +1,42 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
  import os
4
 
5
- # Use environment variable for token (set as secret in Space)
6
- token = os.environ.get("HF_TOKEN", "")
7
- client = InferenceClient("amkyawdev/amkyaw-dev-v1", token=token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def generate(prompt, temperature=0.8, max_tokens=512):
10
  try:
11
- response = client.text_generation(
12
- prompt=prompt,
 
 
13
  temperature=temperature,
14
- max_new_tokens=max_tokens,
15
- do_sample=True if temperature > 0 else False
16
  )
17
- return response
18
  except Exception as e:
19
  return f"Error: {str(e)}"
20
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+ # Model repo and filename
7
+ model_repo = "amkyawdev/amkyaw-dev-v1"
8
+ model_filename = "amkyaw-coder-1.5b-instruct.gguf"
9
+
10
+ # Initialize model (lazy load)
11
+ llm = None
12
+
13
+ def get_llm():
14
+ global llm
15
+ if llm is None:
16
+ # Download model from Hugging Face
17
+ model_path = hf_hub_download(
18
+ repo_id=model_repo,
19
+ filename=model_filename,
20
+ token=os.environ.get("HF_TOKEN", "")
21
+ )
22
+ llm = Llama(
23
+ model_path=model_path,
24
+ n_ctx=2048,
25
+ n_threads=4,
26
+ n_gpu_layers=0
27
+ )
28
+ return llm
29
 
30
  def generate(prompt, temperature=0.8, max_tokens=512):
31
  try:
32
+ model = get_llm()
33
+ response = model(
34
+ prompt,
35
+ max_tokens=max_tokens,
36
  temperature=temperature,
37
+ stop=["</s>", "assistant:"]
 
38
  )
39
+ return response['choices'][0]['text']
40
  except Exception as e:
41
  return f"Error: {str(e)}"
42
 
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio>=4.0.0
2
- huggingface_hub>=0.20.0
 
 
1
  gradio>=4.0.0
2
+ huggingface_hub>=0.20.0
3
+ llama-cpp-python>=0.2.0