from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr ## Download the GGUF model model_name = "cris177/Qwen2-Simple-Arguments" model_file = "Qwen2_arguments.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred model_path = hf_hub_download(model_name, filename=model_file) ## Instantiate model from downloaded file llm = Llama( model_path=model_path, n_ctx=2000, # Context length to use n_threads=2, # Number of CPU threads to use n_gpu_layers=0 # Number of model layers to offload to GPU ) def analyze_argument(argument): instruction = 'Based on the following argument, identify the following elements: premises, conclusion, propositions, type of argument, negation of propositions and validity.' alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response:""" prompt = alpaca_prompt.format(instruction, argument) output = llm(prompt, max_tokens=1000)['choices'][0]['text'].strip() return output description = """This tool analyzes simple arguments, that is, arguments composed of at most two propositions. It applies the fine-tuned LLM from https://huggingface.co/cris177/Qwen2-Simple-Arguments For faster inference we use the 4-bit quantization model https://huggingface.co/cris177/Qwen2-Simple-Arguments/resolve/main/Qwen2_arguments.Q4_K_M.gguf. It requires only 3 GB of RAM, and runs on just 2 vCPUs (which causes it to run somewhat slowly in this demo). """ gr.Interface(analyze_argument, inputs="text", outputs="text", title="Simple Arguments Analyzer", description=description, examples=[["If it's wednesday it's cold, and it's cold, therefore it's wednesday."]]).launch()