| import torch
|
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
|
| import gradio as gr
|
|
|
|
|
| hf_token = os.getenv("HF_TOKEN")
|
| login(token=hf_token)
|
|
|
|
|
| quantization_config = BitsAndBytesConfig(
|
| load_in_4bit=True,
|
| bnb_4bit_compute_dtype=torch.bfloat16,
|
| bnb_4bit_use_double_quant=True,
|
| bnb_4bit_quant_type="nf4"
|
| )
|
|
|
| model_name = "mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
|
| tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| model = AutoModelForCausalLM.from_pretrained(
|
| model_name,
|
| quantization_config=quantization_config,
|
| torch_dtype=torch.bfloat16,
|
| device_map="auto"
|
| )
|
|
|
|
|
| def generate_qa(text):
|
| prompt = f"""### Instruction:
|
| Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
|
| Each question must refer to the SAP note number from text if additional context is needed.
|
| Only output the pairs in the format:
|
| Q1: ...
|
| A1: ...
|
| ...
|
| Q20: ...
|
| A20: ...
|
|
|
| ### Input:
|
| {text}
|
|
|
| ### Response:
|
| """
|
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| outputs = model.generate(
|
| input_ids=inputs.input_ids,
|
| attention_mask=inputs.attention_mask,
|
| max_new_tokens=2500,
|
| do_sample=True,
|
| temperature=0.9,
|
| top_p=0.95,
|
| repetition_penalty=1.1,
|
| pad_token_id=tokenizer.eos_token_id
|
| )
|
|
|
| output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| qa_pairs = output_text.split("### Response:")[-1].strip()
|
| return qa_pairs
|
|
|
|
|
| demo = gr.Interface(
|
| fn=generate_qa,
|
| inputs=gr.Textbox(lines=20, label="SAP Note Text"),
|
| outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
|
| title="Mistral Q&A Generator for SAP Notes",
|
| description="Upload or paste SAP Note content to generate 20 question-answer pairs."
|
| )
|
|
|
| demo.launch()
|
|
|