Spaces:
Runtime error
Runtime error
| import os # to check if file exists | |
| import sys # to flush stdout | |
| import markdown # to render answer | |
| import gradio as gr | |
| #import transformers | |
| #from transformers import pipeline | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| model_repo="TheBloke/Nous-Hermes-13B-GGML" | |
| model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
| #model="TheBloke/Nous-Hermes-13B-GGML" | |
| #model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
| def download_model(): | |
| # See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py | |
| file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
| if os.path.exists(file_path): | |
| return file_path | |
| else: | |
| print("Downloading model...") | |
| sys.stdout.flush() | |
| file = hf_hub_download( | |
| repo_id=model_repo, filename=model_filename | |
| ) | |
| print("Downloaded " + file) | |
| return file | |
| def question_answer(context, question, max_tokens): | |
| mfile=download_model() | |
| # structure the prompt to make it easier for the ai | |
| question1="\"\"\"\n" + question + "\n\"\"\"\n" | |
| text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n" | |
| llm = Llama(model_path=mfile) | |
| output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True) | |
| print(output) | |
| # remove the context and leave only the answer | |
| answer=output['choices'][0]['text'] | |
| answer = answer.replace(text, "", 1) | |
| # render the markdown and return the html and question | |
| html_answer = markdown.markdown(answer) | |
| return question, html_answer | |
| ''' | |
| Output is of the form: | |
| { | |
| "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", | |
| "object": "text_completion", | |
| "created": 1679561337, | |
| "model": "./models/7B/ggml-model.bin", | |
| "choices": [ | |
| { | |
| "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", | |
| "index": 0, | |
| "logprobs": None, | |
| "finish_reason": "stop" | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": 14, | |
| "completion_tokens": 28, | |
| "total_tokens": 42 | |
| } | |
| } | |
| ''' | |
| # old transformers code | |
| #generator = pipeline(model=model, device_map="auto") | |
| #return generator(text) | |
| app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "html"]) | |
| app.launch() | |