sejalkishan commited on
Commit
8ac0d0d
Β·
verified Β·
1 Parent(s): 5aa66ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -7,18 +7,19 @@ from PIL import Image
7
  import pytesseract
8
  import torch
9
  import os
 
10
 
11
- # πŸ” Authenticate with Hugging Face token
12
- login(token=os.environ.get("HF_TOKEN"))
13
 
14
- # Check for GPU
15
  if not torch.cuda.is_available():
16
  raise RuntimeError("❌ GPU not detected! Please enable GPU in Space settings.")
17
 
18
- # Model
19
  model_id = "mistralai/Mistral-7B-Instruct-v0.2"
20
 
21
- # ⛏️ OCR Fallback for image-based PDFs
22
  def extract_text_from_pdf(file):
23
  text = ""
24
  with pdfplumber.open(file) as pdf:
@@ -49,7 +50,7 @@ def chunk_text(text, max_chars=6000):
49
  chunks.append(current_chunk)
50
  return chunks
51
 
52
- # πŸ“‹ US Tender Q&A prompt
53
  def create_prompt(text_chunk):
54
  return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
55
 
@@ -89,7 +90,8 @@ DOCUMENT CHUNK:
89
  {text_chunk}
90
  """
91
 
92
- # 🧠 Main analyzer
 
93
  def analyze_document(file, status_box, cancel_flag):
94
  filename = file.name
95
  ext = os.path.splitext(filename)[-1].lower()
@@ -107,12 +109,12 @@ def analyze_document(file, status_box, cancel_flag):
107
  chunks = chunk_text(raw_text)
108
  full_summary = ""
109
 
110
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("HF_TOKEN"))
111
  model = AutoModelForCausalLM.from_pretrained(
112
  model_id,
113
  device_map="auto",
114
  torch_dtype=torch.float16,
115
- token=os.environ.get("HF_TOKEN"),
116
  trust_remote_code=True
117
  )
118
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -131,9 +133,10 @@ def analyze_document(file, status_box, cancel_flag):
131
  status_box.value = "βœ… Completed"
132
  return full_summary
133
 
134
- # 🎨 Gradio UI
135
  with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
136
  gr.Markdown("## πŸ“„ US Tender Analyzer – Document Intelligence (OCR + GPU-Accelerated)")
 
137
  with gr.Row():
138
  with gr.Column(scale=1):
139
  file_input = gr.File(label="πŸ“Ž Upload Tender Document (PDF/DOCX)")
 
7
  import pytesseract
8
  import torch
9
  import os
10
+ import spaces
11
 
12
+ # πŸ” Hugging Face Token Authentication
13
+ login(token=os.environ.get("token"))
14
 
15
+ # πŸ” Check GPU availability
16
  if not torch.cuda.is_available():
17
  raise RuntimeError("❌ GPU not detected! Please enable GPU in Space settings.")
18
 
19
+ # 🧠 Model details
20
  model_id = "mistralai/Mistral-7B-Instruct-v0.2"
21
 
22
+ # πŸ“„ OCR + Text Extractor
23
  def extract_text_from_pdf(file):
24
  text = ""
25
  with pdfplumber.open(file) as pdf:
 
50
  chunks.append(current_chunk)
51
  return chunks
52
 
53
+ # 🧾 Prompt template for US tender evaluation
54
  def create_prompt(text_chunk):
55
  return f"""You are an expert in analyzing U.S. government tenders. For the following document chunk, extract answers to the listed questions. Format your response in Q&A.
56
 
 
90
  {text_chunk}
91
  """
92
 
93
+ # βœ… GPU Decorated Main Function
94
+ @spaces.GPU(duration=600) # Allocate GPU for up to 10 minutes
95
  def analyze_document(file, status_box, cancel_flag):
96
  filename = file.name
97
  ext = os.path.splitext(filename)[-1].lower()
 
109
  chunks = chunk_text(raw_text)
110
  full_summary = ""
111
 
112
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("token"))
113
  model = AutoModelForCausalLM.from_pretrained(
114
  model_id,
115
  device_map="auto",
116
  torch_dtype=torch.float16,
117
+ token=os.environ.get("token"),
118
  trust_remote_code=True
119
  )
120
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
133
  status_box.value = "βœ… Completed"
134
  return full_summary
135
 
136
+ # 🌐 Gradio UI
137
  with gr.Blocks(title="Smart Tender Analyzer - US Edition") as demo:
138
  gr.Markdown("## πŸ“„ US Tender Analyzer – Document Intelligence (OCR + GPU-Accelerated)")
139
+
140
  with gr.Row():
141
  with gr.Column(scale=1):
142
  file_input = gr.File(label="πŸ“Ž Upload Tender Document (PDF/DOCX)")