ChavanN commited on
Commit
11c1251
Β·
verified Β·
1 Parent(s): e5b9dd4

Upload 2 files

Browse files
Files changed (2) hide show
  1. agent.py +132 -0
  2. requirements.txt +5 -0
agent.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from fastapi import FastAPI, Request
2
+ # from pydantic import BaseModel
3
+ # # from unsloth import FastLanguageModel
4
+ # import torch
5
+ # import re
6
+
7
+
8
+
9
+
10
+ # app = FastAPI()
11
+
12
+ # # Load model once on startup
13
+ # model, tokenizer = FastLanguageModel.from_pretrained(
14
+ # model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
15
+ # max_seq_length = 2048,
16
+ # dtype = None,
17
+ # load_in_4bit = True,
18
+ # )
19
+ # FastLanguageModel.for_inference(model)
20
+
21
+ # class SAPNoteRequest(BaseModel):
22
+ # text: str
23
+
24
+ # @app.post("/generate_qa")
25
+ # def generate_qa(req: SAPNoteRequest):
26
+ # text = req.text
27
+ # match = re.search(r"SAP Note\s*(\d+)", text)
28
+ # sap_note_number = match.group(1) if match else "UNKNOWN"
29
+
30
+ # prompt = f"""
31
+ # Generate 20 question-answer pairs based on the following SAP Note.
32
+ # Each question should include the SAP note number {sap_note_number} to clarify context.
33
+
34
+ # \"\"\"{text}\"\"\"
35
+
36
+ # Q1: question
37
+ # A1: answer
38
+
39
+ # ### Response:
40
+ # """
41
+ # inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
42
+ # outputs = model.generate(
43
+ # inputs.input_ids,
44
+ # max_new_tokens=2048,
45
+ # do_sample=True,
46
+ # temperature=0.7,
47
+ # top_p=0.95,
48
+ # repetition_penalty=1.2
49
+ # )
50
+ # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ # qa_pairs = output_text.split("### Response:")[-1].strip()
52
+ # return {"qa_pairs": qa_pairs}
53
+
54
+
55
+ ### Hugging face code
56
+
57
+ # import torch
58
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
59
+
60
+ # # Quantization settings
61
+ # quantization_config = BitsAndBytesConfig(
62
+ # load_in_4bit=True,
63
+ # bnb_4bit_quant_type="nf4",
64
+ # bnb_4bit_compute_dtype=torch.float16,
65
+ # )
66
+
67
+ import torch
68
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
69
+ import gradio as gr
70
+
71
+ # Use quantization for low-memory GPU inference
72
+ quantization_config = BitsAndBytesConfig(
73
+ load_in_4bit=True,
74
+ bnb_4bit_compute_dtype=torch.bfloat16,
75
+ bnb_4bit_use_double_quant=True,
76
+ bnb_4bit_quant_type="nf4"
77
+ )
78
+
79
+ model_name = "mistralai/Mistral-7B-Instruct-v0.3"
80
+
81
+ # Load model and tokenizer
82
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
83
+ model = AutoModelForCausalLM.from_pretrained(
84
+ model_name,
85
+ quantization_config=quantization_config,
86
+ torch_dtype=torch.bfloat16,
87
+ device_map="auto"
88
+ )
89
+
90
+ # Define generation function
91
+ def generate_qa(text):
92
+ prompt = f"""### Instruction:
93
+ Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
94
+ Each question must refer to the SAP note number from text if additional context is needed.
95
+ Only output the pairs in the format:
96
+ Q1: ...
97
+ A1: ...
98
+ ...
99
+ Q20: ...
100
+ A20: ...
101
+
102
+ ### Input:
103
+ {text}
104
+
105
+ ### Response:
106
+ """
107
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
108
+ outputs = model.generate(
109
+ input_ids=inputs.input_ids,
110
+ attention_mask=inputs.attention_mask,
111
+ max_new_tokens=2500,
112
+ do_sample=True,
113
+ temperature=0.9,
114
+ top_p=0.95,
115
+ repetition_penalty=1.1,
116
+ pad_token_id=tokenizer.eos_token_id
117
+ )
118
+
119
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
120
+ qa_pairs = output_text.split("### Response:")[-1].strip()
121
+ return qa_pairs
122
+
123
+ # Define Gradio UI
124
+ demo = gr.Interface(
125
+ fn=generate_qa,
126
+ inputs=gr.Textbox(lines=20, label="SAP Note Text"),
127
+ outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
128
+ title="Mistral Q&A Generator for SAP Notes",
129
+ description="Upload or paste SAP Note content to generate 20 question-answer pairs."
130
+ )
131
+
132
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers>=4.38.2
2
+ torch>=2.1.0
3
+ accelerate
4
+ bitsandbytes
5
+ gradio