File size: 3,252 Bytes
79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 257e5a2 79cd217 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
---
library_name: transformers
tags:
- unsloth
---
# Model Card for Model ID
This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450
## Requirements
```python
!pip install gradio
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
import os
os.environ["WANDB_DISABLED"] = "true"
```
### Gradio App
```python
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
model_id = "DisgustingOzil/Academic-ShortQA-Generator"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
def partition_text(text, partition_size):
words = text.split()
total_words = len(words)
words_per_partition = total_words // partition_size
partitions = []
for i in range(0, total_words, words_per_partition):
partition = " ".join(words[i:i+words_per_partition])
if len(partition) > 100: # Ensuring meaningful length for MCQ generation
partitions.append(partition)
return partitions
def generate_mcqs_for_partition(Instruction, partition, temperature, top_k):
inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=512,
num_return_sequences=1,
temperature=temperature,
top_k=top_k
)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return output_text
def generate_mcqs(Instruction, text, partition_count, temperature, top_k):
partitions = partition_text(text, partition_count)
mcqs_output = []
for part in partitions:
output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k)
pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>'
matches = re.findall(pattern, output_text, re.DOTALL)
for match in matches:
question = match[0].strip()
correct_answer = match[1].strip()
mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n")
return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."
iface = gr.Interface(
fn=generate_mcqs,
inputs=[
gr.Textbox(label="Instruction"),
gr.Textbox(lines=10, label="Input Biology Text"),
gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"),
gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"),
gr.Slider(minimum=1, maximum=50, step=1, label="Top K")
],
outputs="text",
title="ShortQA Generator",
description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters."
)
if __name__ == "__main__":
iface.launch(debug=True, share=True)
```
|