ChavanN commited on
Commit
eea9b81
Β·
verified Β·
1 Parent(s): 39c040c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
3
+ import gradio as gr
4
+
5
+ # Authenticate using token from environment
6
+ hf_token = os.getenv("HF_TOKEN")
7
+ login(token=hf_token)
8
+
9
+ # Use quantization for low-memory GPU inference
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_compute_dtype=torch.bfloat16,
13
+ bnb_4bit_use_double_quant=True,
14
+ bnb_4bit_quant_type="nf4"
15
+ )
16
+
17
+ model_name = "mistralai/Mistral-7B-Instruct-v0.3"
18
+
19
+ # Load model and tokenizer
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ quantization_config=quantization_config,
24
+ torch_dtype=torch.bfloat16,
25
+ device_map="auto"
26
+ )
27
+
28
+ # Define generation function
29
+ def generate_qa(text):
30
+ prompt = f"""### Instruction:
31
+ Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
32
+ Each question must refer to the SAP note number from text if additional context is needed.
33
+ Only output the pairs in the format:
34
+ Q1: ...
35
+ A1: ...
36
+ ...
37
+ Q20: ...
38
+ A20: ...
39
+
40
+ ### Input:
41
+ {text}
42
+
43
+ ### Response:
44
+ """
45
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
+ outputs = model.generate(
47
+ input_ids=inputs.input_ids,
48
+ attention_mask=inputs.attention_mask,
49
+ max_new_tokens=2500,
50
+ do_sample=True,
51
+ temperature=0.9,
52
+ top_p=0.95,
53
+ repetition_penalty=1.1,
54
+ pad_token_id=tokenizer.eos_token_id
55
+ )
56
+
57
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
+ qa_pairs = output_text.split("### Response:")[-1].strip()
59
+ return qa_pairs
60
+
61
+ # Define Gradio UI
62
+ demo = gr.Interface(
63
+ fn=generate_qa,
64
+ inputs=gr.Textbox(lines=20, label="SAP Note Text"),
65
+ outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
66
+ title="Mistral Q&A Generator for SAP Notes",
67
+ description="Upload or paste SAP Note content to generate 20 question-answer pairs."
68
+ )
69
+
70
+ demo.launch()