Spaces:

ChavanN
/

qa_agent

Runtime error

App Files Files Community

ChavanN commited on May 15, 2025

Commit

233e142

verified ·

1 Parent(s): f00984f

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -51

app.py CHANGED Viewed

@@ -1,73 +1,46 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, GenerationConfig, BitsAndBytesConfig
 import gradio as gr
 import os
-from huggingface_hub import login
-# Authenticate using token from environment
-hf_token = os.getenv("HF_TOKEN")
-login(token=hf_token)
-# Use quantization for low-memory GPU inference
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.bfloat16,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4"
 )
-model_name = "mistralai/Mistral-7B-Instruct-v0.3"
-# Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name, token = hf_token)
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    quantization_config=quantization_config,
-    torch_dtype=torch.bfloat16,
-    token = hf_token,
-    device_map="auto"
 )
-# Define generation function
 def generate_qa(text):
     prompt = f"""### Instruction:
 Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
 Each question must refer to the SAP note number from text if additional context is needed.
-Only output the pairs in the format:
-Q1: ...
-A1: ...
-...
-Q20: ...
-A20: ...
 ### Input:
 {text}
-### Response:
-"""
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        input_ids=inputs.input_ids,
-        attention_mask=inputs.attention_mask,
-        max_new_tokens=2500,
-        do_sample=True,
-        temperature=0.9,
-        top_p=0.95,
-        repetition_penalty=1.1,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    qa_pairs = output_text.split("### Response:")[-1].strip()
-    return qa_pairs
-# Define Gradio UI
 demo = gr.Interface(
     fn=generate_qa,
     inputs=gr.Textbox(lines=20, label="SAP Note Text"),
-    outputs=gr.Textbox(lines=25, label="Generated Q&A Pairs"),
-    title="Mistral Q&A Generator for SAP Notes",
-    description="Upload or paste SAP Note content to generate 20 question-answer pairs."
 )
-demo.launch()

 import gradio as gr
+from ctransformers import AutoModelForCausalLM
+from huggingface_hub import hf_hub_download
 import os
+# Download the GGUF model from Hugging Face (TheBloke's quantized Mistral)
+model_path = hf_hub_download(
+    repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+    filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    cache_dir="./"
 )
+# Load model directly from downloaded file
 model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    model_type="mistral",
+    max_new_tokens=2048,
+    temperature=0.9,
+    repetition_penalty=1.1,
+    top_p=0.95
 )
+# Function to generate Q&A pairs
 def generate_qa(text):
     prompt = f"""### Instruction:
 Based on the following SAP Note, generate exactly 20 unique and informative question-answer pairs.
 Each question must refer to the SAP note number from text if additional context is needed.
 ### Input:
 {text}
+### Response:"""
+    response = model(prompt)
+    return response.strip()
+# Gradio Interface
 demo = gr.Interface(
     fn=generate_qa,
     inputs=gr.Textbox(lines=20, label="SAP Note Text"),
+    outputs=gr.Textbox(lines=30, label="Generated Q&A Pairs"),
+    title="SAP Note Q&A Generator (Mistral GGUF on CPU)",
+    description="Paste SAP Note content to generate 20 Q&A pairs using Mistral 7B Instruct (Quantized for CPU)"
 )
+if __name__ == "__main__":
+    demo.launch()