File size: 18,851 Bytes
b16c761
 
f870260
b16c761
257ceea
 
 
5ccfe8a
eb4723c
 
 
940e8f0
235f3ff
b20df45
 
 
 
 
257ceea
7eb6e14
033b089
a40ca22
177b3ba
334c3ac
177b3ba
fc56780
14a7033
fc56780
 
 
b16c761
eb4723c
 
 
fc56780
fee88dd
 
 
8e2e8af
a97bf03
8e2e8af
4731096
a97bf03
8e2e8af
fee88dd
 
 
 
 
 
 
83b9e37
8e2e8af
 
 
83b9e37
 
fee88dd
 
 
 
 
 
 
a3e9aea
 
 
fee88dd
 
 
a3e9aea
 
14a7033
fc56780
7eb6e14
293da92
 
b20df45
 
 
 
 
 
 
 
 
fc56780
b20df45
fee88dd
293da92
fc56780
c8e4e26
fee88dd
293da92
 
83930c9
16f7031
83930c9
 
 
 
 
 
 
 
 
c8e4e26
293da92
 
 
 
 
235f3ff
924b6e3
235f3ff
83930c9
293da92
 
 
 
 
 
 
 
83930c9
 
 
 
 
 
38205f7
 
 
 
83930c9
38205f7
 
 
 
83930c9
16f7031
fee88dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28a8a0
fee88dd
b28a8a0
 
257ceea
abef4b9
b28a8a0
257ceea
b28a8a0
 
 
 
257ceea
b28a8a0
257ceea
 
abef4b9
257ceea
 
b28a8a0
fe0afe2
 
1a663b3
fe0afe2
 
5ccfe8a
 
8775db6
caee6b1
 
 
 
5ccfe8a
caee6b1
 
 
 
 
 
 
 
5ccfe8a
caee6b1
eb4723c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28a8a0
d6646e0
40e92f8
b28a8a0
984716c
 
 
3b4f8db
257ceea
 
 
b28a8a0
 
 
 
984716c
 
 
 
40e92f8
984716c
 
 
40e92f8
984716c
 
 
a97bf03
257ceea
 
b28a8a0
a97bf03
257ceea
 
b28a8a0
257ceea
 
b28a8a0
257ceea
b28a8a0
257ceea
 
b28a8a0
 
 
 
8e2e8af
b28a8a0
 
 
 
 
 
 
 
0b121c4
3b4f8db
 
16e795d
5ccfe8a
0b121c4
16e795d
fdfb395
 
8775db6
0b121c4
5ccfe8a
 
0b121c4
b28a8a0
 
 
 
 
3b4f8db
 
 
 
24c5190
 
 
b28a8a0
257ceea
 
b28a8a0
257ceea
b28a8a0
5398576
63c6731
a40ca22
5398576
4e2ce20
 
e109d92
4e2ce20
41d8fdc
 
 
 
 
eff7f9d
334c3ac
9449415
 
 
 
 
 
 
eff7f9d
 
 
 
 
 
7dce7a7
 
334c3ac
9449415
b499179
569a866
24c5190
 
eb4723c
 
528dcf6
1fb691b
528dcf6
 
 
 
1fb691b
528dcf6
 
 
 
 
0e71a5e
 
 
 
 
 
 
 
 
528dcf6
 
 
1fb691b
528dcf6
 
257ceea
528dcf6
0e71a5e
a458269
0e71a5e
bcef9fb
0e71a5e
 
 
 
 
16e795d
 
75b2c75
83930c9
 
293da92
 
 
 
 
 
 
 
 
 
 
1bd8cbd
 
 
22603de
1bd8cbd
75b2c75
 
83930c9
a40ca22
 
 
 
 
 
 
 
 
75b2c75
569a866
41d8fdc
 
 
 
 
 
235f3ff
 
 
 
 
 
569a866
41d8fdc
 
75b2c75
b499179
83930c9
 
 
118ba9e
83930c9
257ceea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
import gradio as gr
import os
os.system("apt update && apt install -y tesseract-ocr poppler-utils")
from openai import OpenAI
import pandas as pd
from docx import Document
import time
import re
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi, login
from datetime import datetime
from langchain_openai import ChatOpenAI
from langchain_community.callbacks import get_openai_callback


from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from manabUtils import retrieve_chunks, retrieve_chunks_GPC
from technicalDocCompliance import compliance_tech, compliance_tech_pdf
from MyRules import manualRules
from manabCQgenetaion import compliance_import_OEM
from cq_domestic import domesticCQ



os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
api = HfApi(token=os.getenv("HF_TOKEN"))
repo_id = "manabb/nrl"
file_path_in_repo="LLMLogs.txt"
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
#=====
#Payment type
manual_payment_type="""
1. Management discouraged the payment thorugh bank.
2. Advance payment without bank gurantee is not allowed. Require Competant Authority approval if given.
3. If payment term is milestone payment, then requirement of bank guarantee against each milestone payment release is to be written.
4. Standard payment term: Payment shall be made within 30 days after receipt and acceptance of material.
5. As per NRL GPC or GPC or GPC(general purchase condition) is a complied payment term.
"""

#=================
#BasisOfEstimate

manual_basis_of_estimate="""
1. Estimated cost should be worked out realistically using market survey, budgetary quotations, or published catalogues/MRP when no historical data is available.
2. For custom-built equipment, obtain budgetary quotes from potential parties. Ideally three quotes, but if less than three, use available quotes with average if multiple.
3. Minimum three budgetory offer or offer is required for estimate calculation. If less than three offers, then reason is to be written.
4. Estimates should consider inflation, technology changes, profit margins etc.
5. If estimates cannot be made meaningfully, full reason should be recorded.
6. For procurements up to Rs.1,00,000, detailed estimates are not required.
7. If the Tender Type of the proposal is OEM, the basis of estimate can be firm offer collected from OEM single vendor.
"""

#=======================
PQC_rules="""
1. If the proposal value is more than fifty lakh, the PQC shall include financial criteria
2. PQC should be unrestrictive enough to not exclude any capable vendor/contractor.
3. PQC should be restrictive enough to exclude incapable vendors/contractors.
4. Framing of PQC requires due consideration to adequacy of competition.
5. Functional head approval is mandatory if there is PQC is written in a proposal.
6. PQC should be carefully decided for each procurement with approval of Competent Authority (CA).
7. Bidders must submit authenticated documents in support of eligibility criteria.
8. Sudden multiple times increase in requirement should not blindly adopt past PQCs.
9. PQC misjudgement in either direction (too restrictive or unrestrictive) is detrimental.
10. PQC should be clarified in tender documents that authenticated documents are required.
11. Adequacy of competition must be evaluated while framing PQC.
12. PQC should balance inclusion of capable vendors and exclusion of incapable ones."""

#===========================
#retriever = retrieve_chunks(repo_id)
#retriever=retrieve_chunks_GPC()
def create_qa_chain(retriever):    
    prompt = ChatPromptTemplate.from_template(
        "Use context to answer: {context}\n\nQ: {input}"
    )
    chain = (
        {"context": retriever | (lambda docs: "\n\n".join(doc.page_content for doc in docs)),
         "input": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return chain


#=======================

#============starting extract_docx_text
def respond(message, history, doc_choice):
    
    word_count = len(message.strip().split())
    
    # If less than 3 words, do not call LLM, just ask user to clarify
    if word_count < 3:
        correction_msg = "Please **clarify** or expand your question (at least 3 words)."
        new_history = history + [
            {"role": "user", "content": message},
            {"role": "assistant", "content": correction_msg},
        ]
        return "", new_history
    else:
        retriever=None
        if doc_choice == "gpc_goods":        
            retriever=retrieve_chunks_GPC()
        else:
            retriever = retrieve_chunks(repo_id)        
        qa_chain = create_qa_chain(retriever)
        with get_openai_callback() as cb:
            answer = qa_chain.invoke(message)
        #answer = qa_chain.invoke(message)
        docs = retriever.invoke(message)
        refs=[]
        if doc_choice == "gpc_goods":
            refs= [f"NRL GPC point No: {d.metadata.get('condition_number', 'N/A')} / Heading: {d.metadata.get('condition_heading', 'N/A')}" for d in docs]
        else:            
            refs = [f"Page {d.metadata.get('page', 'N/A')}" for d in docs]
        full_answer = f"""Input tokens: {cb.prompt_tokens}, 
        Ouput tokens: {cb.completion_tokens}, Total tokens: {cb.total_tokens}, 
        Cost: ${cb.total_cost}\n{answer}\n\n**References:**\n""" + "\n".join(refs)
        
        # CRITICAL: Append ONLY pure dicts - no metadata, tuples, or extras
        new_history = history + [  # Or history.append() then return history
            {"role": "user", "content": message},
            {"role": "assistant", "content": full_answer}
        ]
        history_string = "\n".join([
            f"{item['role']}: {item['content']}"
            for item in new_history
        ])
        # Clear input
        try:
            update_log("\nFrom Chat: "+datetime.now().isoformat()+"\n"+history_string+"\n")
        except Exception as ee:
            print(f"Error: {ee} - not saved the log")
        return "", new_history  # Return cleared msg, updated history
#====================
def extract_docx_text(file_path):
    doc = Document(file_path)
    final_data = []
    for table_idx, table in enumerate(doc.tables):
        for row in table.rows:
            cells = [cell.text.strip() for cell in row.cells]
            if len(cells) == 2:
                key = cells[0].replace(':', '').strip()
                value = cells[1].strip()
                if key and value:
                    final_data.append({'Field': key, 'Value': value, 'Source': f'Table_{table_idx+1}'})
            else:
                combined = ' | '.join([c for c in cells if c])
                if combined:
                    final_data.append({'Field': 'Multi-Column Data', 'Value': combined, 'Source': f'Table_{table_idx+1}'})
    return pd.DataFrame(final_data)

def generate_response(manual, proposal):
    prompt = f"""
    You are a strict compliance checker for Govt. procurement policies.

    Check whether the proposal complies with MANUAL requirements. Respond in EXACT format:

    Status: COMPLIANT or NON-COMPLIANT
    Severity: HIGH or MEDIUM or LOW
    Deviations: <short bullet-style description or 'None'>
    Fix: <clear corrective action>

    COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>

    MANUAL: {manual}

    proposal: {proposal}
    """
    
    response = client.chat.completions.create(  
        model="gpt-4o-mini",  
        messages=[{"role": "user", "content": prompt}],
        temperature=0.1
    )
    return response.choices[0].message.content 

def generate_html(llm_response):
    """Convert LLM response to HTML table row with line breaks."""
    lines = llm_response.strip().split('\n')  # Fixed: \n not \\n
    html_lines = []
    
    for line in lines:
        line = line.strip()
        if line:  # Skip empty lines
            html_lines.append(line)
    
    # Build proper <tr><td> with <br> for lines
    content = '<br>'.join(html_lines)  # Single <br> between lines
    html = f"<tr><td>{content}</td></tr>"
    
    return html
    
#================================================Gradio==================

def update_log(newRecords):
    # Download existing, append, re-upload

    try:
        # Download current version
        downloaded_path = hf_hub_download(
            repo_id=repo_id, 
            filename=file_path_in_repo,
            repo_type="dataset"
        )
        
        # Append new line
        with open(downloaded_path, 'a', encoding='utf-8') as f:
            f.write("\n"+newRecords+"\n")
        
        # Re-upload (overwrites)
        api.upload_file(
            path_or_fileobj=downloaded_path,
            path_in_repo=file_path_in_repo,
            repo_id=repo_id,
            repo_type="dataset",
            commit_message="Append new log entry"
        )
        
    except Exception:
        print("File not found - created new")
#==============================

def loop_function(df):  
    text = "<hr>"  
    Value_of_proposal = ""
    E_file_No=""
    Name_of_proposal=""
    PR_no=""
    txt_forRecord=""
    for index, row in df.iterrows():
        key = str(row['Field'])
        value = str(row['Value'])
        i = 0
        proposal_details = ""
        manual_rules = ""
        
        if key == "Justification/Reason for Procurement":
            continue
        if key == "File No.":
            E_file_No= f"E-File No: {value}. \n"
            text+="<h2>"+E_file_No+"</h2>"
            continue
        if key == "PR No.":
            PR_no= f"PR No: {value}. \n"
            text+="<h2>"+PR_no+"</h2>"
            continue
        if key == "Name of proposal":
            Name_of_proposal= f"Name of the proposal : {value}. \n"
            text+="<h2>"+Name_of_proposal+"</h2><hr>"
            continue
        if key == "Value (Rs)":
            Value_of_proposal = f"The proposal Value is {value}. \n"
            text+="<h2>"+Value_of_proposal+"</h2>"
            continue
        if key == "Category":
            Category_of_proposal = f"The proposal Category is {value}. \n"
            continue
        if key == "Tender Type":
            Tender_Type_of_proposal = f"The proposal Tender Type is {value}. \n"
            continue
            
        if key == "PQC for Open tenders":
            manual_rules = PQC_rules
            proposal_details = f"The Pre Qualifying Criteria (PQC) of the proposal is under: {value}. {Value_of_proposal}"
            i = 1
        elif key == "Basis of estimate":  # FIXED: elif
            manual_rules = manual_basis_of_estimate
            proposal_details = f"The basis of estimate of the proposal is under: {value}.{Tender_Type_of_proposal}"
            i = 1
        elif key == "Payment Terms":
            manual_rules = manual_payment_type
            proposal_details = f"The Payment Terms of the proposal is {value}."
            i = 1
            
        if i == 1:
            try:
                rr = generate_response(manual_rules, proposal_details)
                txt_forRecord+="\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr
                #update_log("\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr)
                text += """
                    <div style="color: white !important; background: #006400 !important; padding: 10px; font-size: 14px;"> 
                    
                    """
                #text +="<p>"+rr+"</p>"
                #text +="<p>Same is given below in line wise format....</p>"
                text += "<table><tr><td>As per proposal, "+key + " : "+value+"</td></tr>"
                
                rr_html=generate_html(rr)
                text += rr_html
                text += "</table></div><hr>"
                yield text
                time.sleep(3)
            except Exception as e:
                print(f"Error: {e} - skipping row")
                continue
    try:
        update_log("\n"+txt_forRecord+"\n")
    except Exception as ee:
        print(f"Error: {ee} - not saved the log")
def loop_function_tech(df):  
    #to be prepared
    yield "coming soon"
def check_compliance(file):  # FIXED: now streams
    if file.name.endswith(".docx"):
        df1 = extract_docx_text(file.name)
        yield from loop_function(df1)  # FIXED: delegate yields
    else:
        yield "Unsupported file format"
def check_compliance_tech(file_name):
    if file_name.name.endswith(".pdf"): 
        MANUAL_RULES = manualRules()
        dd=compliance_tech(file_name, client, MANUAL_RULES)
        kkk="<table>"
        kkk+=generate_html(dd)
        kkk+="</table>"
        yield kkk
        update_log("\n Technical Complaince response: "+datetime.now().isoformat()+"\n"+kkk+"\n")
    else:
        yield "Unsupported file format"
def generateCqOemImport(file_name):
    if file_name.name.endswith(".pdf"): 
        extraction_item_value_html, extraction_html, compliance_html, input_tokens ,output_tokens, total_tokens, result_dic = compliance_import_OEM(file_name, client) 
        kkk=domesticCQ(extraction_item_value_html,result_dic)
        dd=f"""
        \n
        <table>
        <tr><td>Input tokens:</td><td>{input_tokens}</td></tr>
        <tr><td>Output tokens:</td><td>{output_tokens}</td></tr>
        <tr><td>Total tokens:</td><td>{total_tokens}</td></tr>
        </table>
        \n Vendor quoted the price as under in the offer. \n
        {extraction_item_value_html}
        \n The extracted value against each point is as under. \n
        {extraction_html}
        \n The compliance of the offer against our rules are as under. \n
        {compliance_html}
        \n\n
        The Draft CQ is as under: \n\n\n
        {kkk}
        """
        yield dd
        update_log("\n CQ-OEM: "+datetime.now().isoformat()+"\n"+dd+"\n")
    else:
        yield "Unsupported file format"

#================================
css = """
#admin-file .label, #admin-file label { color: #FFFFFF !important; font-size: 16px !important; }
#admin-file { background-color: #000000 !important; }
#compliance-btn { 
    color: #FFFFFF !important; 
    background-color: red !important;  /* Red background */
    font-size: 16px !important; 
}
#compliance-btn:hover { 
    background-color: #CC0000 !important;  /* Darker red on hover */
    color: #FFFFFF !important;
}
#compliance-btn-tech { 
    color: #FFFFFF !important; 
    background-color: green !important;  /* Red background */
    font-size: 16px !important; 
}
#compliance-btn-tech:hover { 
    background-color: #006400 !important;  /* Darker red on hover */
    color: #FFFFFF !important;
}
#compliance-out textarea, #compliance-out .label, #compliance-out label { 
    color: #FFFFFF !important; 
    background-color: #000000 !important; 
    font-size: 16px !important; 
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Tabs(elem_id="main-tabs"):  
        with gr.TabItem("Compliance Check of Arohan Admin File"):
            with gr.Row():
                inp = gr.File( 
                    label="Upload Admin File in word i.e. docx format",
                    file_types=[".docx"],
                    elem_id="admin-file"
                )
            run_btn = gr.Button("Check compliance", elem_id="compliance-btn")
            out = gr.HTML(label="Compliance Result")
            #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
            run_btn.click(check_compliance, inputs=inp, outputs=out, api_name="manabAdminNotCheck")
        
        with gr.TabItem("NRL ChatBot"):
                gr.Markdown("""# RAG Chatbot - NRL Documents""")
                # RADIO BUTTON for document selection
                doc_selector = gr.Radio(
                    choices=[
                        ("GPC Goods", "gpc_goods"),
                        ("Procurement Manual", "manual")
                    ],
                    value="gpc_goods",  # Default
                    label="Select Document:",
                    info="Choose which document to query"
                )
                chatbot = gr.Chatbot(height=500)  # Defaults to messages
                msg = gr.Textbox(placeholder="Ask a question...", label="Query")
                submit_btn = gr.Button("Submit")
                
                # Events
                submit_btn.click(respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot], api_name="manabChat")
                msg.submit(respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot], api_name="manabChat")
        with gr.TabItem("Compliance Check of user technical doc"):
            with gr.Row():
                inp_tech = gr.File( 
                    label="Upload technical document in pdf format",
                    file_types=[".pdf"],
                    elem_id="tech-file"
                )
            run_btn_tech = gr.Button("Check compliance of technical document", elem_id="compliance-btn-tech")
            out_tech = gr.HTML(label="Compliance Result of technical document")
            #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
            run_btn_tech.click(check_compliance_tech, inputs=inp_tech, outputs=out_tech, api_name="manabComplianceCheck")
        with gr.TabItem("CQ generation - OEM "):
            with gr.Row():
                inp_OEM_import = gr.File( 
                    label="Upload vendor's offer in pdf format",
                    file_types=[".pdf"],
                    elem_id="oem-import-file"
                )
                manabLebel=gr.HTML("""
                        <div style="color: red; background: black; padding: 20px; text-align: justify; font-size: 20px;">
                            Disclaimer: AI generated outcome is only for reference. Consider the 
                            following only as a draft and recheck is mandatory.
                        </div>"""                        
                )
            run_btn_oemImport = gr.Button("Generate CQ of OEM-offer", elem_id="generateOEMImport-btn")
            out_tech = gr.HTML(label="Generated CQ")
            #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
            run_btn_oemImport.click(generateCqOemImport, inputs=inp_OEM_import, outputs=out_tech, api_name="manabCQGeneration")
        with gr.TabItem("Generate TAC"):
            gr.HTML("""
                        <div style="color: white; background: black; padding: 20px; text-align: center; font-size: 24px;">
                            🚧 Coming Soon 🚧
                        </div> """
                   )
demo.queue().launch()