Waheeb2001 commited on
Commit
431dec4
·
verified ·
1 Parent(s): df2f11e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +53 -39
main.py CHANGED
@@ -1,46 +1,60 @@
1
  from ctransformers import AutoModelForCausalLM
2
- from fastapi import FastAPI, Form
3
- from pydantic import BaseModel
4
- import logging
5
 
6
- # Set up logging
7
- logging.basicConfig(level=logging.INFO)
 
8
 
9
- # Initialize FastAPI app
10
- app = FastAPI()
 
 
 
11
 
12
- # Load the GGUF model once
13
- try:
14
- llm = AutoModelForCausalLM.from_pretrained(
15
- "zephyr-7b-beta.Q4_K_S.gguf",
16
- model_type="mistral",
17
- max_new_tokens=1096,
18
- threads=3
19
- )
20
- logging.info("Model loaded successfully")
21
- except Exception as e:
22
- logging.error(f"Model failed to load: {e}")
23
- raise e
24
 
25
- # Define Pydantic model for input validation
26
- class ValidationModel(BaseModel):
27
- prompt: str
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Root endpoint for health checks and UI
30
- @app.get("/")
31
- def read_root():
32
- return {
33
- "status": "running",
34
- "message": "Zephyr LLM API is active",
35
- "endpoints": ["/llm_on_cpu (POST)"]
36
- }
 
 
 
 
 
37
 
38
- # LLM inference endpoint
39
- @app.post("/llm_on_cpu")
40
- async def stream(item: ValidationModel):
41
- system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
42
- E_INST = "</s>"
43
- user, assistant = "<|user|>", "<|assistant|>"
44
- prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt.strip()}{E_INST}\n{assistant}\n"
45
- response = llm(prompt)
46
- return {"response": response}
 
1
  from ctransformers import AutoModelForCausalLM
2
+ import gradio as gr
 
 
3
 
4
+ greety = """
5
+ Follow us [Gathnex](https://medium.com/@gathnex), [linkedin](https://www.linkedin.com/company/gathnex/) and [Github](https://github.com/gathnexadmin) for more update on Genrative AI, LLM,etc. A special thanks to the Gathnex team members who made a significant contribution to this project.
6
+ """
7
 
8
+ llm = AutoModelForCausalLM.from_pretrained("zephyr-7b-beta.Q4_K_S.gguf",
9
+ model_type='mistral',
10
+ max_new_tokens = 1096,
11
+ threads = 3,
12
+ )
13
 
14
+ def stream(prompt, UL):
15
+ system_prompt = 'You are a helpful AI assistant'
16
+ E_INST = "</s>"
17
+ user, assistant = "<|user|>", "<|assistant|>"
18
+ prompt = f"{system_prompt}{E_INST}\n{user}\n{prompt.strip()}{E_INST}\n{assistant}\n"
19
+ return llm(prompt)
 
 
 
 
 
 
20
 
21
+ css = """
22
+ h1 {
23
+ text-align: center;
24
+ }
25
+ #duplicate-button {
26
+ margin: auto;
27
+ color: white;
28
+ background: #1565c0;
29
+ border-radius: 100vh;
30
+ }
31
+ .contain {
32
+ max-width: 900px;
33
+ margin: auto;
34
+ padding-top: 1.5rem;
35
+ }
36
+ """
37
 
38
+ chat_interface = gr.ChatInterface(
39
+ fn=stream,
40
+ #additional_inputs_accordion_name = "Credentials",
41
+ #additional_inputs=[
42
+ # gr.Textbox(label="OpenAI Key", lines=1),
43
+ # gr.Textbox(label="Linkedin Access Token", lines=1),
44
+ #],
45
+ stop_btn=None,
46
+ examples=[
47
+ ["explain Large language model"],
48
+ ["what is quantum computing"]
49
+ ],
50
+ )
51
 
52
+ with gr.Blocks(css=css) as demo:
53
+ gr.HTML("<h1><center>Gathnex Free LLM Deployment Space<h1><center>")
54
+ gr.HTML("<h3><center><a href='https://medium.com/@gathnex'>Gathnex AI</a>💬<h3><center>")
55
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
56
+ chat_interface.render()
57
+ gr.Markdown(greety)
58
+
59
+ if __name__ == "__main__":
60
+ demo.queue(max_size=10).launch()