EREN121232 commited on
Commit
1923dae
·
verified ·
1 Parent(s): 64be745

Add Space app

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+
4
+ import gradio as gr
5
+ from huggingface_hub import hf_hub_download
6
+ from llama_cpp import Llama
7
+
8
+
9
+ MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "EREN121232/MAJESTIC-FIN-R1-gguf")
10
+ MODEL_FILENAME = os.getenv("MODEL_FILENAME", "MAJESTIC-FIN-R1-Q8_0.gguf")
11
+ MODEL_LABEL = os.getenv("MODEL_LABEL", "MAJESTIC-FIN-R1 Q8_0")
12
+ N_CTX = int(os.getenv("N_CTX", "4096"))
13
+ N_THREADS = int(os.getenv("CPU_CORES", os.getenv("N_THREADS", str(os.cpu_count() or 2))))
14
+
15
+ _MODEL = None
16
+ _MODEL_LOCK = threading.Lock()
17
+ _INFER_LOCK = threading.Lock()
18
+
19
+
20
+ def get_model() -> Llama:
21
+ global _MODEL
22
+ with _MODEL_LOCK:
23
+ if _MODEL is None:
24
+ model_path = hf_hub_download(
25
+ repo_id=MODEL_REPO_ID,
26
+ filename=MODEL_FILENAME,
27
+ )
28
+ _MODEL = Llama(
29
+ model_path=model_path,
30
+ n_ctx=N_CTX,
31
+ n_threads=N_THREADS,
32
+ n_gpu_layers=0,
33
+ verbose=False,
34
+ )
35
+ return _MODEL
36
+
37
+
38
+ def generate(prompt: str, system_prompt: str, temperature: float, max_tokens: int, top_p: float, repeat_penalty: float) -> str:
39
+ prompt = prompt.strip()
40
+ system_prompt = system_prompt.strip()
41
+
42
+ if not prompt:
43
+ return "Please enter a prompt."
44
+
45
+ messages = []
46
+ if system_prompt:
47
+ messages.append({"role": "system", "content": system_prompt})
48
+ messages.append({"role": "user", "content": prompt})
49
+
50
+ llm = get_model()
51
+ with _INFER_LOCK:
52
+ response = llm.create_chat_completion(
53
+ messages=messages,
54
+ temperature=float(temperature),
55
+ max_tokens=int(max_tokens),
56
+ top_p=float(top_p),
57
+ repeat_penalty=float(repeat_penalty),
58
+ )
59
+
60
+ return response["choices"][0]["message"]["content"].strip()
61
+
62
+
63
+ with gr.Blocks(title="MAJESTIC FIN R1 Free API") as demo:
64
+ gr.Markdown(
65
+ f"""
66
+ # MAJESTIC FIN R1 Free API
67
+
68
+ Public CPU deployment for `{MODEL_LABEL}` backed by `llama-cpp-python`.
69
+ The API endpoint name is `/chat`.
70
+ """
71
+ )
72
+
73
+ prompt = gr.Textbox(
74
+ label="Prompt",
75
+ lines=8,
76
+ placeholder="Ask about finance, markets, accounting, or your fine-tuned task.",
77
+ )
78
+ output = gr.Textbox(label="Response", lines=14)
79
+
80
+ with gr.Accordion("Generation Settings", open=False):
81
+ system_prompt = gr.Textbox(
82
+ label="System Prompt",
83
+ lines=4,
84
+ value="You are MAJESTIC-FIN-R1, a helpful finance-focused assistant.",
85
+ )
86
+ temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
87
+ max_tokens = gr.Slider(64, 1024, value=256, step=32, label="Max Tokens")
88
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top P")
89
+ repeat_penalty = gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repeat Penalty")
90
+
91
+ run_button = gr.Button("Generate", variant="primary")
92
+
93
+ gr.Examples(
94
+ examples=[
95
+ ["Summarize the key risks in a company's balance sheet."],
96
+ ["Explain EBITDA vs free cash flow in simple terms."],
97
+ ["Give a short market outlook for a cautious investor."],
98
+ ],
99
+ inputs=prompt,
100
+ )
101
+
102
+ run_button.click(
103
+ fn=generate,
104
+ inputs=[prompt, system_prompt, temperature, max_tokens, top_p, repeat_penalty],
105
+ outputs=output,
106
+ api_name="chat",
107
+ show_progress="minimal",
108
+ concurrency_limit=1,
109
+ )
110
+
111
+ prompt.submit(
112
+ fn=generate,
113
+ inputs=[prompt, system_prompt, temperature, max_tokens, top_p, repeat_penalty],
114
+ outputs=output,
115
+ show_progress="minimal",
116
+ concurrency_limit=1,
117
+ )
118
+
119
+
120
+ if __name__ == "__main__":
121
+ demo.queue(max_size=16).launch()