File size: 3,252 Bytes
79cd217
 
 
 
 
 
 
 
257e5a2
79cd217
 
 
257e5a2
79cd217
257e5a2
79cd217
257e5a2
 
 
79cd217
257e5a2
 
79cd217
 
 
 
 
 
257e5a2
79cd217
257e5a2
 
 
 
 
79cd217
257e5a2
 
 
79cd217
257e5a2
79cd217
257e5a2
 
79cd217
257e5a2
 
79cd217
257e5a2
 
79cd217
257e5a2
 
 
 
 
 
 
 
 
 
79cd217
257e5a2
 
 
 
 
 
 
 
 
 
 
79cd217
257e5a2
 
 
79cd217
257e5a2
 
 
 
79cd217
257e5a2
 
 
 
79cd217
257e5a2
79cd217
257e5a2
 
 
 
 
 
 
 
 
 
 
 
 
79cd217
257e5a2
 
79cd217
 
 
 
 
257e5a2
79cd217
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
---
library_name: transformers
tags:
- unsloth
---

# Model Card for Model ID

This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450



## Requirements

```python

!pip install gradio
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"

import os
os.environ["WANDB_DISABLED"] = "true"






```

### Gradio App
```python
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

model_id = "DisgustingOzil/Academic-ShortQA-Generator"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def partition_text(text, partition_size):
    words = text.split()
    total_words = len(words)
    words_per_partition = total_words // partition_size
    partitions = []
    for i in range(0, total_words, words_per_partition):
        partition = " ".join(words[i:i+words_per_partition])
        if len(partition) > 100:  # Ensuring meaningful length for MCQ generation
            partitions.append(partition)
    return partitions

def generate_mcqs_for_partition(Instruction, partition, temperature, top_k):
    inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_length=512,
        num_return_sequences=1,
        temperature=temperature,
        top_k=top_k
    )
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return output_text

def generate_mcqs(Instruction, text, partition_count, temperature, top_k):
    partitions = partition_text(text, partition_count)
    mcqs_output = []

    for part in partitions:
        output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k)
        pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>'
        matches = re.findall(pattern, output_text, re.DOTALL)

        for match in matches:
            question = match[0].strip()
            correct_answer = match[1].strip()
            mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n")

    return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."

iface = gr.Interface(
    fn=generate_mcqs,
    inputs=[
        gr.Textbox(label="Instruction"),
        gr.Textbox(lines=10, label="Input Biology Text"),
        gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"),
        gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"),
        gr.Slider(minimum=1, maximum=50, step=1, label="Top K")
    ],
    outputs="text",
    title="ShortQA Generator",
    description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters."
)

if __name__ == "__main__":
    iface.launch(debug=True, share=True)





```