ChavanN commited on
Commit
233e142
Β·
verified Β·
1 Parent(s): f00984f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -51
app.py CHANGED
@@ -1,73 +1,46 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
3
  import gradio as gr
 
 
4
  import os
5
- from huggingface_hub import login
6
- # Authenticate using token from environment
7
- hf_token = os.getenv("HF_TOKEN")
8
- login(token=hf_token)
9
 
10
-
11
- # Use quantization for low-memory GPU inference
12
- quantization_config = BitsAndBytesConfig(
13
- load_in_4bit=True,
14
- bnb_4bit_compute_dtype=torch.bfloat16,
15
- bnb_4bit_use_double_quant=True,
16
- bnb_4bit_quant_type="nf4"
17
  )
18
 
19
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
20
-
21
- # Load model and tokenizer
22
- tokenizer = AutoTokenizer.from_pretrained(model_name, token = hf_token)
23
  model = AutoModelForCausalLM.from_pretrained(
24
- model_name,
25
- quantization_config=quantization_config,
26
- torch_dtype=torch.bfloat16,
27
- token = hf_token,
28
- device_map="auto"
 
29
  )
30
 
31
- # Define generation function
32
  def generate_qa(text):
33
  prompt = f"""### Instruction:
34
  Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
35
  Each question must refer to the SAP note number from text if additional context is needed.
36
- Only output the pairs in the format:
37
- Q1: ...
38
- A1: ...
39
- ...
40
- Q20: ...
41
- A20: ...
42
 
43
  ### Input:
44
  {text}
45
 
46
- ### Response:
47
- """
48
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
49
- outputs = model.generate(
50
- input_ids=inputs.input_ids,
51
- attention_mask=inputs.attention_mask,
52
- max_new_tokens=2500,
53
- do_sample=True,
54
- temperature=0.9,
55
- top_p=0.95,
56
- repetition_penalty=1.1,
57
- pad_token_id=tokenizer.eos_token_id
58
- )
59
-
60
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
- qa_pairs = output_text.split("### Response:")[-1].strip()
62
- return qa_pairs
63
 
64
- # Define Gradio UI
65
  demo = gr.Interface(
66
  fn=generate_qa,
67
  inputs=gr.Textbox(lines=20, label="SAP Note Text"),
68
- outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
69
- title="Mistral Q&A Generator for SAP Notes",
70
- description="Upload or paste SAP Note content to generate 20 question-answer pairs."
71
  )
72
 
73
- demo.launch()
 
 
 
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
3
+ from huggingface_hub import hf_hub_download
4
  import os
 
 
 
 
5
 
6
+ # Download the GGUF model from Hugging Face (TheBloke's quantized Mistral)
7
+ model_path = hf_hub_download(
8
+ repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
9
+ filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
10
+ cache_dir="./"
 
 
11
  )
12
 
13
+ # Load model directly from downloaded file
 
 
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
+ model_path,
16
+ model_type="mistral",
17
+ max_new_tokens=2048,
18
+ temperature=0.9,
19
+ repetition_penalty=1.1,
20
+ top_p=0.95
21
  )
22
 
23
+ # Function to generate Q&A pairs
24
  def generate_qa(text):
25
  prompt = f"""### Instruction:
26
  Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
27
  Each question must refer to the SAP note number from text if additional context is needed.
 
 
 
 
 
 
28
 
29
  ### Input:
30
  {text}
31
 
32
+ ### Response:"""
33
+ response = model(prompt)
34
+ return response.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Gradio Interface
37
  demo = gr.Interface(
38
  fn=generate_qa,
39
  inputs=gr.Textbox(lines=20, label="SAP Note Text"),
40
+ outputs=gr.Textbox(lines=30, label="Generated Q&A Pairs"),
41
+ title="SAP Note Q&A Generator (Mistral GGUF on CPU)",
42
+ description="Paste SAP Note content to generate 20 Q&A pairs using Mistral 7B Instruct (Quantized for CPU)"
43
  )
44
 
45
+ if __name__ == "__main__":
46
+ demo.launch()