Spaces:

EREN121232
/

MAJESTIC-FIN-R1-Free-API

Build error

App Files Files Community

EREN121232 commited on Apr 24

Commit

1923dae

verified ·

1 Parent(s): 64be745

Add Space app

Browse files

Files changed (1) hide show

app.py +121 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+import threading
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "EREN121232/MAJESTIC-FIN-R1-gguf")
+MODEL_FILENAME = os.getenv("MODEL_FILENAME", "MAJESTIC-FIN-R1-Q8_0.gguf")
+MODEL_LABEL = os.getenv("MODEL_LABEL", "MAJESTIC-FIN-R1 Q8_0")
+N_CTX = int(os.getenv("N_CTX", "4096"))
+N_THREADS = int(os.getenv("CPU_CORES", os.getenv("N_THREADS", str(os.cpu_count() or 2))))
+_MODEL = None
+_MODEL_LOCK = threading.Lock()
+_INFER_LOCK = threading.Lock()
+def get_model() -> Llama:
+    global _MODEL
+    with _MODEL_LOCK:
+        if _MODEL is None:
+            model_path = hf_hub_download(
+                repo_id=MODEL_REPO_ID,
+                filename=MODEL_FILENAME,
+            )
+            _MODEL = Llama(
+                model_path=model_path,
+                n_ctx=N_CTX,
+                n_threads=N_THREADS,
+                n_gpu_layers=0,
+                verbose=False,
+            )
+    return _MODEL
+def generate(prompt: str, system_prompt: str, temperature: float, max_tokens: int, top_p: float, repeat_penalty: float) -> str:
+    prompt = prompt.strip()
+    system_prompt = system_prompt.strip()
+    if not prompt:
+        return "Please enter a prompt."
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+    llm = get_model()
+    with _INFER_LOCK:
+        response = llm.create_chat_completion(
+            messages=messages,
+            temperature=float(temperature),
+            max_tokens=int(max_tokens),
+            top_p=float(top_p),
+            repeat_penalty=float(repeat_penalty),
+        )
+    return response["choices"][0]["message"]["content"].strip()
+with gr.Blocks(title="MAJESTIC FIN R1 Free API") as demo:
+    gr.Markdown(
+        f"""
+        # MAJESTIC FIN R1 Free API
+        Public CPU deployment for `{MODEL_LABEL}` backed by `llama-cpp-python`.
+        The API endpoint name is `/chat`.
+        """
+    )
+    prompt = gr.Textbox(
+        label="Prompt",
+        lines=8,
+        placeholder="Ask about finance, markets, accounting, or your fine-tuned task.",
+    )
+    output = gr.Textbox(label="Response", lines=14)
+    with gr.Accordion("Generation Settings", open=False):
+        system_prompt = gr.Textbox(
+            label="System Prompt",
+            lines=4,
+            value="You are MAJESTIC-FIN-R1, a helpful finance-focused assistant.",
+        )
+        temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
+        max_tokens = gr.Slider(64, 1024, value=256, step=32, label="Max Tokens")
+        top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top P")
+        repeat_penalty = gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repeat Penalty")
+    run_button = gr.Button("Generate", variant="primary")
+    gr.Examples(
+        examples=[
+            ["Summarize the key risks in a company's balance sheet."],
+            ["Explain EBITDA vs free cash flow in simple terms."],
+            ["Give a short market outlook for a cautious investor."],
+        ],
+        inputs=prompt,
+    )
+    run_button.click(
+        fn=generate,
+        inputs=[prompt, system_prompt, temperature, max_tokens, top_p, repeat_penalty],
+        outputs=output,
+        api_name="chat",
+        show_progress="minimal",
+        concurrency_limit=1,
+    )
+    prompt.submit(
+        fn=generate,
+        inputs=[prompt, system_prompt, temperature, max_tokens, top_p, repeat_penalty],
+        outputs=output,
+        show_progress="minimal",
+        concurrency_limit=1,
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=16).launch()