ChavanN commited on
Commit
39c040c
Β·
verified Β·
1 Parent(s): 71817fe

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -132
app.py DELETED
@@ -1,132 +0,0 @@
1
- # from fastapi import FastAPI, Request
2
- # from pydantic import BaseModel
3
- # # from unsloth import FastLanguageModel
4
- # import torch
5
- # import re
6
-
7
-
8
-
9
-
10
- # app = FastAPI()
11
-
12
- # # Load model once on startup
13
- # model, tokenizer = FastLanguageModel.from_pretrained(
14
- # model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
15
- # max_seq_length = 2048,
16
- # dtype = None,
17
- # load_in_4bit = True,
18
- # )
19
- # FastLanguageModel.for_inference(model)
20
-
21
- # class SAPNoteRequest(BaseModel):
22
- # text: str
23
-
24
- # @app.post("/generate_qa")
25
- # def generate_qa(req: SAPNoteRequest):
26
- # text = req.text
27
- # match = re.search(r"SAP Note\s*(\d+)", text)
28
- # sap_note_number = match.group(1) if match else "UNKNOWN"
29
-
30
- # prompt = f"""
31
- # Generate 20 question-answer pairs based on the following SAP Note.
32
- # Each question should include the SAP note number {sap_note_number} to clarify context.
33
-
34
- # \"\"\"{text}\"\"\"
35
-
36
- # Q1: question
37
- # A1: answer
38
-
39
- # ### Response:
40
- # """
41
- # inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
42
- # outputs = model.generate(
43
- # inputs.input_ids,
44
- # max_new_tokens=2048,
45
- # do_sample=True,
46
- # temperature=0.7,
47
- # top_p=0.95,
48
- # repetition_penalty=1.2
49
- # )
50
- # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
- # qa_pairs = output_text.split("### Response:")[-1].strip()
52
- # return {"qa_pairs": qa_pairs}
53
-
54
-
55
- ### Hugging face code
56
-
57
- # import torch
58
- # from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
59
-
60
- # # Quantization settings
61
- # quantization_config = BitsAndBytesConfig(
62
- # load_in_4bit=True,
63
- # bnb_4bit_quant_type="nf4",
64
- # bnb_4bit_compute_dtype=torch.float16,
65
- # )
66
-
67
- import torch
68
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
69
- import gradio as gr
70
-
71
- # Use quantization for low-memory GPU inference
72
- quantization_config = BitsAndBytesConfig(
73
- load_in_4bit=True,
74
- bnb_4bit_compute_dtype=torch.bfloat16,
75
- bnb_4bit_use_double_quant=True,
76
- bnb_4bit_quant_type="nf4"
77
- )
78
-
79
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
80
-
81
- # Load model and tokenizer
82
- tokenizer = AutoTokenizer.from_pretrained(model_name)
83
- model = AutoModelForCausalLM.from_pretrained(
84
- model_name,
85
- quantization_config=quantization_config,
86
- torch_dtype=torch.bfloat16,
87
- device_map="auto"
88
- )
89
-
90
- # Define generation function
91
- def generate_qa(text):
92
- prompt = f"""### Instruction:
93
- Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
94
- Each question must refer to the SAP note number from text if additional context is needed.
95
- Only output the pairs in the format:
96
- Q1: ...
97
- A1: ...
98
- ...
99
- Q20: ...
100
- A20: ...
101
-
102
- ### Input:
103
- {text}
104
-
105
- ### Response:
106
- """
107
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
108
- outputs = model.generate(
109
- input_ids=inputs.input_ids,
110
- attention_mask=inputs.attention_mask,
111
- max_new_tokens=2500,
112
- do_sample=True,
113
- temperature=0.9,
114
- top_p=0.95,
115
- repetition_penalty=1.1,
116
- pad_token_id=tokenizer.eos_token_id
117
- )
118
-
119
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
120
- qa_pairs = output_text.split("### Response:")[-1].strip()
121
- return qa_pairs
122
-
123
- # Define Gradio UI
124
- demo = gr.Interface(
125
- fn=generate_qa,
126
- inputs=gr.Textbox(lines=20, label="SAP Note Text"),
127
- outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
128
- title="Mistral Q&A Generator for SAP Notes",
129
- description="Upload or paste SAP Note content to generate 20 question-answer pairs."
130
- )
131
-
132
- demo.launch()