Spaces:

amkyawdev
/

amkyaw-coder

Sleeping

amkyawdev commited on 29 days ago

Commit

ebb8517

verified ·

1 Parent(s): 4aa3683

Upload folder using huggingface_hub

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,20 +1,42 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 import os
-# Use environment variable for token (set as secret in Space)
-token = os.environ.get("HF_TOKEN", "")
-client = InferenceClient("amkyawdev/amkyaw-dev-v1", token=token)
 def generate(prompt, temperature=0.8, max_tokens=512):
     try:
-        response = client.text_generation(
-            prompt=prompt,
             temperature=temperature,
-            max_new_tokens=max_tokens,
-            do_sample=True if temperature > 0 else False
         )
-        return response
     except Exception as e:
         return f"Error: {str(e)}"

 import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 import os
+# Model repo and filename
+model_repo = "amkyawdev/amkyaw-dev-v1"
+model_filename = "amkyaw-coder-1.5b-instruct.gguf"
+# Initialize model (lazy load)
+llm = None
+def get_llm():
+    global llm
+    if llm is None:
+        # Download model from Hugging Face
+        model_path = hf_hub_download(
+            repo_id=model_repo,
+            filename=model_filename,
+            token=os.environ.get("HF_TOKEN", "")
+        )
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=2048,
+            n_threads=4,
+            n_gpu_layers=0
+        )
+    return llm
 def generate(prompt, temperature=0.8, max_tokens=512):
     try:
+        model = get_llm()
+        response = model(
+            prompt,
+            max_tokens=max_tokens,
             temperature=temperature,
+            stop=["</s>", "assistant:"]
         )
+        return response['choices'][0]['text']
     except Exception as e:
         return f"Error: {str(e)}"

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio>=4.0.0
-huggingface_hub>=0.20.0

 gradio>=4.0.0
+huggingface_hub>=0.20.0
+llama-cpp-python>=0.2.0