Spaces:
Sleeping
Sleeping
| import subprocess | |
| import os | |
| import urllib.request | |
| import gradio as gr | |
| def clone_power_infer(): | |
| repo_url = "https://github.com/SJTU-IPADS/PowerInfer.git" | |
| subprocess.run(["git", "clone", repo_url]) | |
| def install_requirements(): | |
| subprocess.run(["pip", "install", "-r", "requirements.txt"]) | |
| def cmake_builds(): | |
| subprocess.run(["cmake", "-S", ".", "-B", "build"]) | |
| # Run cmake --build build --config Release | |
| subprocess.run(["cmake", "--build", "build", "--config", "Release"]) | |
| clone_power_infer() | |
| os.chdir("PowerInfer") | |
| install_requirements() | |
| cmake_builds() | |
| os.mkdir("ReluLLaMA-7B-PowerInfer-GGUF") | |
| # URL to download the file from | |
| url = "https://huggingface.co/PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF/resolve/main/llama-7b-relu.powerinfer.gguf" | |
| # Destination path to save the downloaded file | |
| destination_path = "ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf" | |
| # Download the file | |
| urllib.request.urlretrieve(url, destination_path) | |
| def chat(question): | |
| command = [ | |
| "./build/bin/main", | |
| "-m", "./ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf", | |
| "-n", "128", | |
| "-t", "8", | |
| "-p", question | |
| ] | |
| # Run the command | |
| return subprocess.run(command, capture_output=True, text=True).stdout | |
| # if output.returncode == 0: | |
| # return output.stdout | |
| # else: | |
| # return "Error" + output.stderrt | |
| screen = gr.Interface( | |
| fn = chat, | |
| inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here π"), | |
| outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon π"), | |
| title="Inference with Powerinfer π©π»βπ»πβπ»π‘", | |
| description="This app aims to facilitate the inference of LLMs using Powerinferπ‘", | |
| theme="soft", | |
| # examples=["Hello", "what is the speed of human nerve impulses?"], | |
| ) | |
| screen.launch() |