File size: 7,250 Bytes
538dec9
b1e29d6
8f7bc43
 
 
b1e29d6
8f7bc43
 
538dec9
8f7bc43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e29d6
8f7bc43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e29d6
6da4021
8f7bc43
6da4021
8f7bc43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6da4021
8f7bc43
 
 
 
 
 
6da4021
8f7bc43
 
 
 
 
 
 
 
 
b1e29d6
6da4021
8f7bc43
6da4021
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import requests
from bs4 import BeautifulSoup
import re
from io import BytesIO
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def chunk_text(text, max_tokens=3000, chars_per_token=4):
    """Split text into smaller chunks that won't exceed token limits."""
    max_chars = max_tokens * chars_per_token
    chunks = []
    
    # Simple chunking by paragraphs
    paragraphs = text.split('\n\n')
    current_chunk = ""
    
    for para in paragraphs:
        if len(current_chunk) + len(para) < max_chars:
            current_chunk += para + "\n\n"
        else:
            chunks.append(current_chunk)
            current_chunk = para + "\n\n"
    
    if current_chunk:
        chunks.append(current_chunk)
        
    return chunks

def fetch_active_tenders():
    """Fetch active tenders from PPRA website with error handling."""
    try:
        logger.info("Fetching active tenders from PPRA website")
        url = "https://www.ppra.org.pk/dad_tenders.asp"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers, timeout=15)
        if response.status_code != 200:
            logger.error(f"Failed to fetch tenders. Status code: {response.status_code}")
            return []
            
        soup = BeautifulSoup(response.text, "html.parser")
        tables = soup.find_all("table")
        
        # Find the right table with tender data
        target_table = None
        for table in tables:
            if table.find("tr") and table.find("tr").find("th") and "Tender Description" in table.find("tr").text:
                target_table = table
                break
                
        if not target_table:
            # Try a different approach if the table header isn't found
            target_table = soup.find("table", {"class": "data"})
            if not target_table:
                # Last resort: get the largest table
                tables = sorted(tables, key=lambda t: len(t.find_all("tr")), reverse=True)
                target_table = tables[0] if tables else None
        
        if not target_table:
            logger.error("Could not find tender table on the page")
            return []
            
        rows = target_table.find_all("tr")[1:]  # Skip header
        tenders = []
        
        for row in rows:
            cols = row.find_all("td")
            if len(cols) >= 5:
                link_element = cols[4].find("a")
                link = ""
                if link_element and link_element.has_attr("href"):
                    link = "https://www.ppra.org.pk/" + link_element["href"].strip()
                
                tender = {
                    "title": cols[0].text.strip(),
                    "department": cols[1].text.strip(),
                    "closing_date": cols[3].text.strip(),
                    "link": link
                }
                tenders.append(tender)
        
        logger.info(f"Successfully fetched {len(tenders)} tenders")
        return tenders
    except Exception as e:
        logger.error(f"Error fetching tenders: {str(e)}")
        return []

def fetch_tender_details(url):
    """Fetch and parse tender details from the provided URL."""
    try:
        logger.info(f"Fetching tender details from {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers, timeout=15)
        if response.status_code != 200:
            logger.error(f"Failed to fetch tender details. Status code: {response.status_code}")
            return "Could not fetch details from this link."
            
        # Check if it's a PDF
        if url.lower().endswith('.pdf') or 'application/pdf' in response.headers.get('Content-Type', ''):
            logger.info("PDF detected, extracting text preview")
            try:
                import pdfplumber
                pdf = pdfplumber.open(BytesIO(response.content))
                text = ""
                # Extract first few pages
                for i in range(min(3, len(pdf.pages))):
                    text += pdf.pages[i].extract_text() or ""
                return text[:5000]  # Return first 5000 chars
            except ImportError:
                return "PDF document detected. Install pdfplumber to extract content."
        
        # HTML content
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
            
        # Get text and clean it
        text = soup.get_text(separator="\n", strip=True)
        
        # Clean up text - remove excessive newlines
        text = re.sub(r'\n\s*\n', '\n\n', text)
        
        logger.info(f"Successfully fetched tender details ({len(text)} chars)")
        return text[:5000]  # Limit to 5000 chars to prevent token issues
    except Exception as e:
        logger.error(f"Error fetching tender details: {str(e)}")
        return f"Could not fetch details from this link. Error: {str(e)}"

def get_ppra_resources():
    """Return PPRA resources dictionary."""
    return {
        "Home": "https://www.ppra.org.pk/",
        "Active Tenders": "https://www.ppra.org.pk/dad_tenders.asp",
        "Procurement Guidelines (PDF)": "https://www.ppra.org.pk/pguidelines.pdf",
        "PPRA Ordinance": "https://www.ppra.org.pk/ordinance.asp",
        "Rules": "https://www.ppra.org.pk/Rules.asp",
        "Regulations Page": "https://www.ppra.org.pk/regulation.asp",
        "Regulations, 2024 - Disposal of Public Assets": "https://www.ppra.org.pk/SRO615I2025.pdf",
        "Specimens for Advertisement (Amended 2024)": "https://www.ppra.org.pk/SRO461I2024.pdf",
        "Blacklisting & Debarment Regulations 2024": "https://www.ppra.org.pk/SRO460I2024.pdf",
        "Review Petition Rule 19(3), 2021": "https://www.ppra.org.pk/SRO19I2021.pdf",
        "Regulation 2009": "https://www.ppra.org.pk/reg2009.pdf",
        "Regulation 2010 - Consultancy Services": "https://www.ppra.org.pk/reg2010.pdf",
        "Regulation 2011": "https://www.ppra.org.pk/reg2011.pdf",
        "Eligible Bidders Tax Compliance 2015": "https://www.ppra.org.pk/reg2015.pdf",
        "Transaction of Business Board Meeting Regulations (2021)": "https://www.ppra.org.pk/SRO15I2021.pdf",
        "Review Petition and Grievances (SRO90I2022)": "https://www.ppra.org.pk/SRO90I2022.pdf",
        "National Standard Procurement Docs (SRO370I2022)": "https://www.ppra.org.pk/SRO370I2022.pdf",
        "Manner of Advertisement (SRO591I2022)": "https://www.ppra.org.pk/SRO591I2022.pdf",
        "Declaration of Beneficial Owners (SRO592I2022)": "https://www.ppra.org.pk/SRO592I2022.pdf",
        "E-Pak Procurement Regulation (SRO296I2023)": "https://www.ppra.org.pk/SRO296I2023.pdf",
        "Board Info": "https://www.ppra.org.pk/board.asp"
    }