from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr

## Download the GGUF model
model_name = "cris177/Qwen2-Simple-Arguments"
model_file = "Qwen2_arguments.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
model_path = hf_hub_download(model_name, filename=model_file)

## Instantiate model from downloaded file
llm = Llama(
    model_path=model_path,
    n_ctx=2000,  # Context length to use
    n_threads=2,            # Number of CPU threads to use
    n_gpu_layers=0        # Number of model layers to offload to GPU
)


def analyze_argument(argument):
    instruction = 'Based on the following argument, identify the following elements: premises, conclusion, propositions, type of argument, negation of propositions and validity.'
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:"""
    prompt = alpaca_prompt.format(instruction, argument)

    output = llm(prompt, max_tokens=1000)['choices'][0]['text'].strip()
    return output

description = """This tool analyzes simple arguments, that is, arguments composed of at most two propositions.

It applies the fine-tuned LLM from https://huggingface.co/cris177/Qwen2-Simple-Arguments

For faster inference we use the 4-bit quantization model https://huggingface.co/cris177/Qwen2-Simple-Arguments/resolve/main/Qwen2_arguments.Q4_K_M.gguf.

It requires only 3 GB of RAM, and runs on just 2 vCPUs (which causes it to run somewhat slowly in this demo).
"""

gr.Interface(analyze_argument, inputs="text", outputs="text",
    title="Simple Arguments Analyzer",
    description=description,
    examples=[["If it's wednesday it's cold, and it's cold, therefore it's wednesday."]]).launch()