Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,18 +7,19 @@ from PIL import Image
|
|
| 7 |
import pytesseract
|
| 8 |
import torch
|
| 9 |
import os
|
|
|
|
| 10 |
|
| 11 |
-
# π
|
| 12 |
-
login(token=os.environ.get("
|
| 13 |
|
| 14 |
-
# Check
|
| 15 |
if not torch.cuda.is_available():
|
| 16 |
raise RuntimeError("β GPU not detected! Please enable GPU in Space settings.")
|
| 17 |
|
| 18 |
-
# Model
|
| 19 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 20 |
|
| 21 |
-
#
|
| 22 |
def extract_text_from_pdf(file):
|
| 23 |
text = ""
|
| 24 |
with pdfplumber.open(file) as pdf:
|
|
@@ -49,7 +50,7 @@ def chunk_text(text, max_chars=6000):
|
|
| 49 |
chunks.append(current_chunk)
|
| 50 |
return chunks
|
| 51 |
|
| 52 |
-
#
|
| 53 |
def create_prompt(text_chunk):
|
| 54 |
return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
|
| 55 |
|
|
@@ -89,7 +90,8 @@ DOCUMENT CHUNK:
|
|
| 89 |
{text_chunk}
|
| 90 |
"""
|
| 91 |
|
| 92 |
-
#
|
|
|
|
| 93 |
def analyze_document(file, status_box, cancel_flag):
|
| 94 |
filename = file.name
|
| 95 |
ext = os.path.splitext(filename)[-1].lower()
|
|
@@ -107,12 +109,12 @@ def analyze_document(file, status_box, cancel_flag):
|
|
| 107 |
chunks = chunk_text(raw_text)
|
| 108 |
full_summary = ""
|
| 109 |
|
| 110 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("
|
| 111 |
model = AutoModelForCausalLM.from_pretrained(
|
| 112 |
model_id,
|
| 113 |
device_map="auto",
|
| 114 |
torch_dtype=torch.float16,
|
| 115 |
-
token=os.environ.get("
|
| 116 |
trust_remote_code=True
|
| 117 |
)
|
| 118 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
@@ -131,9 +133,10 @@ def analyze_document(file, status_box, cancel_flag):
|
|
| 131 |
status_box.value = "β
Completed"
|
| 132 |
return full_summary
|
| 133 |
|
| 134 |
-
#
|
| 135 |
with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
|
| 136 |
gr.Markdown("## π US Tender Analyzer β Document Intelligence (OCR + GPU-Accelerated)")
|
|
|
|
| 137 |
with gr.Row():
|
| 138 |
with gr.Column(scale=1):
|
| 139 |
file_input = gr.File(label="π Upload Tender Document (PDF/DOCX)")
|
|
|
|
| 7 |
import pytesseract
|
| 8 |
import torch
|
| 9 |
import os
|
| 10 |
+
import spaces
|
| 11 |
|
| 12 |
+
# π Hugging Face Token Authentication
|
| 13 |
+
login(token=os.environ.get("token"))
|
| 14 |
|
| 15 |
+
# π Check GPU availability
|
| 16 |
if not torch.cuda.is_available():
|
| 17 |
raise RuntimeError("β GPU not detected! Please enable GPU in Space settings.")
|
| 18 |
|
| 19 |
+
# π§ Model details
|
| 20 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 21 |
|
| 22 |
+
# π OCR + Text Extractor
|
| 23 |
def extract_text_from_pdf(file):
|
| 24 |
text = ""
|
| 25 |
with pdfplumber.open(file) as pdf:
|
|
|
|
| 50 |
chunks.append(current_chunk)
|
| 51 |
return chunks
|
| 52 |
|
| 53 |
+
# π§Ύ Prompt template for US tender evaluation
|
| 54 |
def create_prompt(text_chunk):
|
| 55 |
return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
|
| 56 |
|
|
|
|
| 90 |
{text_chunk}
|
| 91 |
"""
|
| 92 |
|
| 93 |
+
# β
GPU Decorated Main Function
|
| 94 |
+
@spaces.GPU(duration=600) # Allocate GPU for up to 10 minutes
|
| 95 |
def analyze_document(file, status_box, cancel_flag):
|
| 96 |
filename = file.name
|
| 97 |
ext = os.path.splitext(filename)[-1].lower()
|
|
|
|
| 109 |
chunks = chunk_text(raw_text)
|
| 110 |
full_summary = ""
|
| 111 |
|
| 112 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("token"))
|
| 113 |
model = AutoModelForCausalLM.from_pretrained(
|
| 114 |
model_id,
|
| 115 |
device_map="auto",
|
| 116 |
torch_dtype=torch.float16,
|
| 117 |
+
token=os.environ.get("token"),
|
| 118 |
trust_remote_code=True
|
| 119 |
)
|
| 120 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
|
|
| 133 |
status_box.value = "β
Completed"
|
| 134 |
return full_summary
|
| 135 |
|
| 136 |
+
# π Gradio UI
|
| 137 |
with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
|
| 138 |
gr.Markdown("## π US Tender Analyzer β Document Intelligence (OCR + GPU-Accelerated)")
|
| 139 |
+
|
| 140 |
with gr.Row():
|
| 141 |
with gr.Column(scale=1):
|
| 142 |
file_input = gr.File(label="π Upload Tender Document (PDF/DOCX)")
|