Spaces:

Mehgoss
/

API

Build error

App Files Files Community

Mehgoss commited on Oct 17, 2025

Commit

9a382b7

verified ·

1 Parent(s): 5e4f5d4

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -61

app.py CHANGED Viewed

@@ -1,70 +1,118 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+import os
+print("Starting model download...")
+# Download model file explicitly (better control)
+try:
+    # Try to find the GGUF file in the repo
+    # If your repo has a different filename, change it here
+    model_path = hf_hub_download(
+        repo_id="TheBloke/CodeLlama-7B-Python-GGUF",  # Using TheBloke's reliable repo
+        filename="codellama-7b-python.Q4_K_M.gguf",  # 4.08GB file
+        cache_dir="./models"
+    )
+    print(f"✓ Model downloaded to: {model_path}")
+except Exception as e:
+    print(f"Error downloading model: {e}")
+    raise
+# Load the GGUF model
+print("Loading model into memory...")
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,  # Context window
+    n_threads=int(os.getenv("N_THREADS", "2")),  # CPU threads
+    n_batch=512,  # Batch size for prompt processing
+    verbose=True
 )
+print("✓ Model loaded successfully!")
+def generate_code(prompt, max_tokens=500, temperature=0.7):
+    """Generate code from prompt"""
+    try:
+        response = llm(
+            prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            stop=["</s>", "###", "\n\n\n"],  # Stop sequences
+            echo=False
+        )
+        return response['choices'][0]['text']
+    except Exception as e:
+        return f"Error generating code: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🦙 CodeLlama-7B Python Assistant")
+    gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)")
+    with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(
+                label="Enter your coding question or task",
+                placeholder="Write a Python function to...",
+                lines=5
+            )
+            with gr.Row():
+                max_tokens = gr.Slider(
+                    minimum=100,
+                    maximum=1000,
+                    value=500,
+                    step=50,
+                    label="Max Tokens"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Temperature"
+                )
+            submit_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg")
+            clear_btn = gr.Button("🗑️ Clear", size="sm")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Generated Code",
+                lines=15,
+                show_copy_button=True
+            )
+    # Button actions
+    submit_btn.click(
+        fn=generate_code,
+        inputs=[prompt_input, max_tokens, temperature],
+        outputs=output
+    )
+    clear_btn.click(
+        fn=lambda: ("", ""),
+        inputs=None,
+        outputs=[prompt_input, output]
+    )
+    # Example prompts
+    gr.Examples(
+        examples=[
+            ["Write a Python function to calculate fibonacci numbers"],
+            ["Create a binary search tree class with insert and search methods"],
+            ["Write a function to reverse a linked list"],
+            ["Implement quicksort algorithm in Python"],
+            ["Create a decorator to measure function execution time"]
+        ],
+        inputs=prompt_input
+    )
+    gr.Markdown("""
+    ### 💡 Tips:
+    - Be specific in your prompts for better results
+    - Lower temperature (0.3-0.5) for more focused code
+    - Higher temperature (0.7-0.9) for more creative solutions
+    - Model works best for Python code generation
+    """)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)