|
|
--- |
|
|
library_name: transformers |
|
|
tags: |
|
|
- unsloth |
|
|
--- |
|
|
|
|
|
# Model Card for Model ID |
|
|
|
|
|
This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450 |
|
|
|
|
|
|
|
|
|
|
|
## Requirements |
|
|
|
|
|
```python |
|
|
|
|
|
!pip install gradio |
|
|
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121 |
|
|
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git" |
|
|
|
|
|
import os |
|
|
os.environ["WANDB_DISABLED"] = "true" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
|
|
### Gradio App |
|
|
```python |
|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import re |
|
|
|
|
|
model_id = "DisgustingOzil/Academic-ShortQA-Generator" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
|
|
|
|
|
### Instruction: |
|
|
{} |
|
|
|
|
|
### Input: |
|
|
{} |
|
|
|
|
|
### Response: |
|
|
{}""" |
|
|
|
|
|
def partition_text(text, partition_size): |
|
|
words = text.split() |
|
|
total_words = len(words) |
|
|
words_per_partition = total_words // partition_size |
|
|
partitions = [] |
|
|
for i in range(0, total_words, words_per_partition): |
|
|
partition = " ".join(words[i:i+words_per_partition]) |
|
|
if len(partition) > 100: # Ensuring meaningful length for MCQ generation |
|
|
partitions.append(partition) |
|
|
return partitions |
|
|
|
|
|
def generate_mcqs_for_partition(Instruction, partition, temperature, top_k): |
|
|
inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt") |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_length=512, |
|
|
num_return_sequences=1, |
|
|
temperature=temperature, |
|
|
top_k=top_k |
|
|
) |
|
|
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
return output_text |
|
|
|
|
|
def generate_mcqs(Instruction, text, partition_count, temperature, top_k): |
|
|
partitions = partition_text(text, partition_count) |
|
|
mcqs_output = [] |
|
|
|
|
|
for part in partitions: |
|
|
output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k) |
|
|
pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>' |
|
|
matches = re.findall(pattern, output_text, re.DOTALL) |
|
|
|
|
|
for match in matches: |
|
|
question = match[0].strip() |
|
|
correct_answer = match[1].strip() |
|
|
mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n") |
|
|
|
|
|
return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input." |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_mcqs, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Instruction"), |
|
|
gr.Textbox(lines=10, label="Input Biology Text"), |
|
|
gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"), |
|
|
gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"), |
|
|
gr.Slider(minimum=1, maximum=50, step=1, label="Top K") |
|
|
], |
|
|
outputs="text", |
|
|
title="ShortQA Generator", |
|
|
description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch(debug=True, share=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|