ChavanN commited on
Commit
8ea8c0d
Β·
verified Β·
1 Parent(s): eea9b81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -70
app.py CHANGED
@@ -1,70 +1,71 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
3
- import gradio as gr
4
-
5
- # Authenticate using token from environment
6
- hf_token = os.getenv("HF_TOKEN")
7
- login(token=hf_token)
8
-
9
- # Use quantization for low-memory GPU inference
10
- quantization_config = BitsAndBytesConfig(
11
- load_in_4bit=True,
12
- bnb_4bit_compute_dtype=torch.bfloat16,
13
- bnb_4bit_use_double_quant=True,
14
- bnb_4bit_quant_type="nf4"
15
- )
16
-
17
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
18
-
19
- # Load model and tokenizer
20
- tokenizer = AutoTokenizer.from_pretrained(model_name)
21
- model = AutoModelForCausalLM.from_pretrained(
22
- model_name,
23
- quantization_config=quantization_config,
24
- torch_dtype=torch.bfloat16,
25
- device_map="auto"
26
- )
27
-
28
- # Define generation function
29
- def generate_qa(text):
30
- prompt = f"""### Instruction:
31
- Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
32
- Each question must refer to the SAP note number from text if additional context is needed.
33
- Only output the pairs in the format:
34
- Q1: ...
35
- A1: ...
36
- ...
37
- Q20: ...
38
- A20: ...
39
-
40
- ### Input:
41
- {text}
42
-
43
- ### Response:
44
- """
45
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
- outputs = model.generate(
47
- input_ids=inputs.input_ids,
48
- attention_mask=inputs.attention_mask,
49
- max_new_tokens=2500,
50
- do_sample=True,
51
- temperature=0.9,
52
- top_p=0.95,
53
- repetition_penalty=1.1,
54
- pad_token_id=tokenizer.eos_token_id
55
- )
56
-
57
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
- qa_pairs = output_text.split("### Response:")[-1].strip()
59
- return qa_pairs
60
-
61
- # Define Gradio UI
62
- demo = gr.Interface(
63
- fn=generate_qa,
64
- inputs=gr.Textbox(lines=20, label="SAP Note Text"),
65
- outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
66
- title="Mistral Q&A Generator for SAP Notes",
67
- description="Upload or paste SAP Note content to generate 20 question-answer pairs."
68
- )
69
-
70
- demo.launch()
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
3
+ import gradio as gr
4
+ import os
5
+
6
+ # Authenticate using token from environment
7
+ hf_token = os.getenv("HF_TOKEN")
8
+ login(token=hf_token)
9
+
10
+ # Use quantization for low-memory GPU inference
11
+ quantization_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_compute_dtype=torch.bfloat16,
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_quant_type="nf4"
16
+ )
17
+
18
+ model_name = "mistralai/Mistral-7B-Instruct-v0.3"
19
+
20
+ # Load model and tokenizer
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_name,
24
+ quantization_config=quantization_config,
25
+ torch_dtype=torch.bfloat16,
26
+ device_map="auto"
27
+ )
28
+
29
+ # Define generation function
30
+ def generate_qa(text):
31
+ prompt = f"""### Instruction:
32
+ Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
33
+ Each question must refer to the SAP note number from text if additional context is needed.
34
+ Only output the pairs in the format:
35
+ Q1: ...
36
+ A1: ...
37
+ ...
38
+ Q20: ...
39
+ A20: ...
40
+
41
+ ### Input:
42
+ {text}
43
+
44
+ ### Response:
45
+ """
46
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
+ outputs = model.generate(
48
+ input_ids=inputs.input_ids,
49
+ attention_mask=inputs.attention_mask,
50
+ max_new_tokens=2500,
51
+ do_sample=True,
52
+ temperature=0.9,
53
+ top_p=0.95,
54
+ repetition_penalty=1.1,
55
+ pad_token_id=tokenizer.eos_token_id
56
+ )
57
+
58
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ qa_pairs = output_text.split("### Response:")[-1].strip()
60
+ return qa_pairs
61
+
62
+ # Define Gradio UI
63
+ demo = gr.Interface(
64
+ fn=generate_qa,
65
+ inputs=gr.Textbox(lines=20, label="SAP Note Text"),
66
+ outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
67
+ title="Mistral Q&A Generator for SAP Notes",
68
+ description="Upload or paste SAP Note content to generate 20 question-answer pairs."
69
+ )
70
+
71
+ demo.launch()