Simple

Sleeping

App Files Files Community

Waheeb2001 commited on Jun 25, 2025

Commit

431dec4

verified ·

1 Parent(s): df2f11e

Update main.py

Browse files

Files changed (1) hide show

main.py +53 -39

main.py CHANGED Viewed

@@ -1,46 +1,60 @@
 from ctransformers import AutoModelForCausalLM
-from fastapi import FastAPI, Form
-from pydantic import BaseModel
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-# Initialize FastAPI app
-app = FastAPI()
-# Load the GGUF model once
-try:
-    llm = AutoModelForCausalLM.from_pretrained(
-        "zephyr-7b-beta.Q4_K_S.gguf",
-        model_type="mistral",
-        max_new_tokens=1096,
-        threads=3
-    )
-    logging.info("Model loaded successfully")
-except Exception as e:
-    logging.error(f"Model failed to load: {e}")
-    raise e
-# Define Pydantic model for input validation
-class ValidationModel(BaseModel):
-    prompt: str
-# Root endpoint for health checks and UI
-@app.get("/")
-def read_root():
-    return {
-        "status": "running",
-        "message": "Zephyr LLM API is active",
-        "endpoints": ["/llm_on_cpu (POST)"]
-    }
-# LLM inference endpoint
-@app.post("/llm_on_cpu")
-async def stream(item: ValidationModel):
-    system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
-    E_INST = "</s>"
-    user, assistant = "<|user|>", "<|assistant|>"
-    prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt.strip()}{E_INST}\n{assistant}\n"
-    response = llm(prompt)
-    return {"response": response}

 from ctransformers import AutoModelForCausalLM
+import gradio as gr
+greety = """
+Follow us [Gathnex](https://medium.com/@gathnex), [linkedin](https://www.linkedin.com/company/gathnex/) and [Github](https://github.com/gathnexadmin) for  more update on Genrative AI, LLM,etc. A special thanks to the Gathnex team members who made a significant contribution to this project.
+"""
+llm = AutoModelForCausalLM.from_pretrained("zephyr-7b-beta.Q4_K_S.gguf",
+model_type='mistral',
+max_new_tokens = 1096,
+threads = 3,
+)
+def stream(prompt, UL):
+    system_prompt = 'You are a helpful AI assistant'
+    E_INST = "</s>"
+    user, assistant = "<|user|>", "<|assistant|>"
+    prompt = f"{system_prompt}{E_INST}\n{user}\n{prompt.strip()}{E_INST}\n{assistant}\n"
+    return llm(prompt)
+css = """
+  h1 {
+  text-align: center;
+}
+#duplicate-button {
+  margin: auto;
+  color: white;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+.contain {
+  max-width: 900px;
+  margin: auto;
+  padding-top: 1.5rem;
+}
+"""
+chat_interface = gr.ChatInterface(
+    fn=stream,
+    #additional_inputs_accordion_name = "Credentials",
+    #additional_inputs=[
+    #     gr.Textbox(label="OpenAI Key", lines=1),
+    #     gr.Textbox(label="Linkedin Access Token", lines=1),
+    #],
+    stop_btn=None,
+    examples=[
+        ["explain Large language model"],
+        ["what is quantum computing"]
+    ],
+)
+with gr.Blocks(css=css) as demo:
+    gr.HTML("<h1><center>Gathnex Free LLM Deployment Space<h1><center>")
+    gr.HTML("<h3><center><a href='https://medium.com/@gathnex'>Gathnex AI</a>💬<h3><center>")
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
+    chat_interface.render()
+    gr.Markdown(greety)
+if __name__ == "__main__":
+    demo.queue(max_size=10).launch()