NRLCommercialAI / app.py
manabb's picture
Update app.py
3dcf5c8 verified
import gradio as gr
import os
os.system("apt update && apt install -y tesseract-ocr poppler-utils")
from openai import OpenAI
import pandas as pd
from docx import Document
import time
import re
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi, login
from datetime import datetime
from langchain_openai import ChatOpenAI
from langchain_community.callbacks import get_openai_callback
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from manabUtils import retrieve_chunks, retrieve_chunks_GPC
from technicalDocCompliance import compliance_tech, compliance_tech_pdf
from MyRules import manualRules
from manabCQgenetaion import compliance_import_OEM
from cq_domestic import domesticCQ
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
api = HfApi(token=os.getenv("HF_TOKEN"))
repo_id = "manabb/nrl"
file_path_in_repo="LLMLogs.txt"
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=500)
#=====
#Payment type
manual_payment_type="""
1. Management discouraged the payment thorugh bank.
2. Advance payment without bank gurantee is not allowed. Require Competant Authority approval if given.
3. If payment term is milestone payment, then requirement of bank guarantee against each milestone payment release is to be written.
4. Standard payment term: Payment shall be made within 30 days after receipt and acceptance of material.
5. As per NRL GPC or GPC or GPC(general purchase condition) is a complied payment term.
"""
#=================
#BasisOfEstimate
manual_basis_of_estimate="""
1. Estimated cost should be worked out realistically using market survey, budgetary quotations, or published catalogues/MRP when no historical data is available.
2. For custom-built equipment, obtain budgetary quotes from potential parties. Ideally three quotes, but if less than three, use available quotes with average if multiple.
3. Minimum three budgetory offer or offer is required for estimate calculation. If less than three offers, then reason is to be written.
4. Estimates should consider inflation, technology changes, profit margins etc.
5. If estimates cannot be made meaningfully, full reason should be recorded.
6. For procurements up to Rs.1,00,000, detailed estimates are not required.
7. If the Tender Type of the proposal is OEM, the basis of estimate can be firm offer collected from OEM single vendor.
"""
#=======================
PQC_rules="""
1. If the proposal value is more than fifty lakh, the PQC shall include financial criteria
2. PQC should be unrestrictive enough to not exclude any capable vendor/contractor.
3. PQC should be restrictive enough to exclude incapable vendors/contractors.
4. Framing of PQC requires due consideration to adequacy of competition.
5. Functional head approval is mandatory if there is PQC is written in a proposal.
6. PQC should be carefully decided for each procurement with approval of Competent Authority (CA).
7. Bidders must submit authenticated documents in support of eligibility criteria.
8. Sudden multiple times increase in requirement should not blindly adopt past PQCs.
9. PQC misjudgement in either direction (too restrictive or unrestrictive) is detrimental.
10. PQC should be clarified in tender documents that authenticated documents are required.
11. Adequacy of competition must be evaluated while framing PQC.
12. PQC should balance inclusion of capable vendors and exclusion of incapable ones."""
#===========================
#retriever = retrieve_chunks(repo_id)
#retriever=retrieve_chunks_GPC()
def limit_context(docs, max_chars=4000):
text = "\n\n".join(doc.page_content for doc in docs)
return text[:max_chars]
def create_qa_chain(retriever):
prompt = ChatPromptTemplate.from_template(
"Use context to answer: {context}\n\nQ: {input}"
)
chain = (
{
#"context": retriever | (lambda docs: "\n\n".join(doc.page_content for doc in docs)),
"context": retriever | (lambda docs: limit_context(docs)),
"input": RunnablePassthrough()
}
| prompt
| llm
| StrOutputParser()
)
return chain
#=======================
#============starting extract_docx_text
def respond(message, history, doc_choice):
word_count = len(message.strip().split())
# If less than 3 words, do not call LLM, just ask user to clarify
if word_count < 3:
correction_msg = "Please **clarify** or expand your question (at least 3 words)."
new_history = history + [
{"role": "user", "content": message},
{"role": "assistant", "content": correction_msg},
]
return "", new_history
else:
retriever=None
if doc_choice == "gpc_goods":
retriever=retrieve_chunks_GPC()
else:
retriever = retrieve_chunks(repo_id)
qa_chain = create_qa_chain(retriever)
with get_openai_callback() as cb:
answer = qa_chain.invoke(message)
#answer = qa_chain.invoke(message)
docs = retriever.invoke(message)
refs=[]
if doc_choice == "gpc_goods":
refs= [f"NRL GPC point No: {d.metadata.get('condition_number', 'N/A')} / Heading: {d.metadata.get('condition_heading', 'N/A')}" for d in docs]
else:
refs = [f"Page {d.metadata.get('page', 'N/A')}" for d in docs]
full_answer = f"""Input tokens: {cb.prompt_tokens},
Ouput tokens: {cb.completion_tokens}, Total tokens: {cb.total_tokens},
Cost: ${cb.total_cost}\n{answer}\n\n**References:**\n""" + "\n".join(refs)
# CRITICAL: Append ONLY pure dicts - no metadata, tuples, or extras
new_history = history + [ # Or history.append() then return history
{"role": "user", "content": message},
{"role": "assistant", "content": full_answer}
]
history_string = "\n".join([
f"{item['role']}: {item['content']}"
for item in new_history
])
# Clear input
try:
update_log("\nFrom Chat: "+datetime.now().isoformat()+"\n"+history_string+"\n")
except Exception as ee:
print(f"Error: {ee} - not saved the log")
return "", new_history # Return cleared msg, updated history
#====================
def extract_docx_text(file_path):
doc = Document(file_path)
final_data = []
for table_idx, table in enumerate(doc.tables):
for row in table.rows:
cells = [cell.text.strip() for cell in row.cells]
if len(cells) == 2:
key = cells[0].replace(':', '').strip()
value = cells[1].strip()
if key and value:
final_data.append({'Field': key, 'Value': value, 'Source': f'Table_{table_idx+1}'})
else:
combined = ' | '.join([c for c in cells if c])
if combined:
final_data.append({'Field': 'Multi-Column Data', 'Value': combined, 'Source': f'Table_{table_idx+1}'})
return pd.DataFrame(final_data)
def generate_response(manual, proposal):
prompt = f"""
You are a strict compliance checker for Govt. procurement policies.
Check whether the proposal complies with MANUAL requirements. Respond in EXACT format:
Status: COMPLIANT or NON-COMPLIANT
Severity: HIGH or MEDIUM or LOW
Deviations: <short bullet-style description or 'None'>
Fix: <clear corrective action>
COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>
MANUAL: {manual}
proposal: {proposal}
"""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
temperature=0.1
)
return response.choices[0].message.content
def generate_html(llm_response):
"""Convert LLM response to HTML table row with line breaks."""
lines = llm_response.strip().split('\n') # Fixed: \n not \\n
html_lines = []
for line in lines:
line = line.strip()
if line: # Skip empty lines
html_lines.append(line)
# Build proper <tr><td> with <br> for lines
content = '<br>'.join(html_lines) # Single <br> between lines
html = f"<tr><td>{content}</td></tr>"
return html
#================================================Gradio==================
def update_log(newRecords):
# Download existing, append, re-upload
try:
# Download current version
downloaded_path = hf_hub_download(
repo_id=repo_id,
filename=file_path_in_repo,
repo_type="dataset"
)
# Append new line
with open(downloaded_path, 'a', encoding='utf-8') as f:
f.write("\n"+newRecords+"\n")
# Re-upload (overwrites)
api.upload_file(
path_or_fileobj=downloaded_path,
path_in_repo=file_path_in_repo,
repo_id=repo_id,
repo_type="dataset",
commit_message="Append new log entry"
)
except Exception:
print("File not found - created new")
#==============================
def loop_function(df):
text = "<hr>"
Value_of_proposal = ""
E_file_No=""
Name_of_proposal=""
PR_no=""
txt_forRecord=""
for index, row in df.iterrows():
key = str(row['Field'])
value = str(row['Value'])
i = 0
proposal_details = ""
manual_rules = ""
if key == "Justification/Reason for Procurement":
continue
if key == "File No.":
E_file_No= f"E-File No: {value}. \n"
text+="<h2>"+E_file_No+"</h2>"
continue
if key == "PR No.":
PR_no= f"PR No: {value}. \n"
text+="<h2>"+PR_no+"</h2>"
continue
if key == "Name of proposal":
Name_of_proposal= f"Name of the proposal : {value}. \n"
text+="<h2>"+Name_of_proposal+"</h2><hr>"
continue
if key == "Value (Rs)":
Value_of_proposal = f"The proposal Value is {value}. \n"
text+="<h2>"+Value_of_proposal+"</h2>"
continue
if key == "Category":
Category_of_proposal = f"The proposal Category is {value}. \n"
continue
if key == "Tender Type":
Tender_Type_of_proposal = f"The proposal Tender Type is {value}. \n"
continue
if key == "PQC for Open tenders":
manual_rules = PQC_rules
proposal_details = f"The Pre Qualifying Criteria (PQC) of the proposal is under: {value}. {Value_of_proposal}"
i = 1
elif key == "Basis of estimate": # FIXED: elif
manual_rules = manual_basis_of_estimate
proposal_details = f"The basis of estimate of the proposal is under: {value}.{Tender_Type_of_proposal}"
i = 1
elif key == "Payment Terms":
manual_rules = manual_payment_type
proposal_details = f"The Payment Terms of the proposal is {value}."
i = 1
if i == 1:
try:
rr = generate_response(manual_rules, proposal_details)
txt_forRecord+="\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr
#update_log("\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr)
text += """
<span style="color: #006400 !important; font-weight: bold; font-size: 14px;">
"""
#text +="<p>"+rr+"</p>"
#text +="<p>Same is given below in line wise format....</p>"
text += "<table><tr><td>As per proposal, "+key + " : "+value+"</td></tr>"
rr_html=generate_html(rr)
text += rr_html
text += "</table></span><hr>"
yield text
time.sleep(3)
except Exception as e:
print(f"Error: {e} - skipping row")
continue
try:
update_log("\n"+txt_forRecord+"\n")
except Exception as ee:
print(f"Error: {ee} - not saved the log")
def loop_function_tech(df):
#to be prepared
yield "coming soon"
def check_compliance(file): # FIXED: now streams
if file.name.endswith(".docx"):
df1 = extract_docx_text(file.name)
yield from loop_function(df1) # FIXED: delegate yields
else:
yield "Unsupported file format"
def check_compliance_tech(file_name):
if file_name.name.endswith(".pdf"):
MANUAL_RULES = manualRules()
dd=compliance_tech(file_name, client, MANUAL_RULES)
kkk="<table>"
kkk+=generate_html(dd)
kkk+="</table>"
yield kkk
update_log("\n Technical Complaince response: "+datetime.now().isoformat()+"\n"+kkk+"\n")
else:
yield "Unsupported file format"
def generateCqOemImport(file_name):
if file_name.name.endswith(".pdf"):
extraction_item_value_html, extraction_html, compliance_html, input_tokens ,output_tokens, total_tokens, result_dic = compliance_import_OEM(file_name, client)
kkk=domesticCQ(extraction_item_value_html,result_dic)
dd=f"""
\n
<table>
<tr><td>Input tokens:</td><td>{input_tokens}</td></tr>
<tr><td>Output tokens:</td><td>{output_tokens}</td></tr>
<tr><td>Total tokens:</td><td>{total_tokens}</td></tr>
</table>
\n Vendor quoted the price as under in the offer. \n
{extraction_item_value_html}
\n The extracted value against each point is as under. \n
{extraction_html}
\n The compliance of the offer against our rules are as under. \n
{compliance_html}
\n\n
The Draft CQ is as under: \n\n\n
{kkk}
"""
yield dd
update_log("\n CQ-OEM: "+datetime.now().isoformat()+"\n"+dd+"\n")
else:
yield "Unsupported file format"
#================================
css1 = """
* {
background-color: #000000 !important;
color: #FFFFFF !important;
border-color: #444444 !important;
}
button, .btn {
background: #333333 !important;
color: #FFFFFF !important;
border: 1px solid #555555 !important;
}
button:hover {
background: #555555 !important;
}
input, textarea, select {
background: #111111 !important;
color: #FFFFFF !important;
border: 1px solid #666666 !important;
}
label {
color: #FFFFFF !important;
font-weight: bold !important;
}
#nrlchatbot, .chatbot {
background: #000000 !important;
color: #FFFFFF !important;
}
.chatbot .message {
background: #111111 !important;
color: #FFFFFF !important;
border: 1px solid #333333 !important;
}
.tabitem, h1, h2, h3, .markdown {
background: transparent !important;
color: #FFFFFF !important;
}
span[style] {
color: inherit !important; /* Respect inline styles */
}
span[style*="color"] {
color: inherit !important !important; /* Double !important */
}
span {
all: unset !important;
}
#compliance-btn { background: red !important; }
#compliance-btn-tech { background: #006600 !important; }
#admin-file { background: #00008B !important; }
#chatRadio {
background: #111111 !important;
}
#chatRadio label {
color: #FFFFFF !important;
background: transparent !important;
border-radius: 5px !important;
padding: 8px !important;
margin: 4px !important;
}
#chatRadio input:checked + label::before {
content: "✓" !important;
color: #00FF00 !important;
font-weight: bold !important;
margin-right: 8px !important;
}
"""
#=====================
css = """
.gradio-container {
background: linear-gradient(135deg, #000000, #1a1a1a) !important;
color: #ffffff !important;
}
.gradio-container .default {
background-color: #111111 !important;
color: #ffffff !important;
}
.gradio-container button {
background: #333333 !important;
color: white !important;
border: 1px solid #555 !important; /* ← Add border consistency */
}
.gradio-container input, .gradio-container textarea {
background-color: #222222 !important;
color: white !important;
border: 1px solid #444444 !important;
}
#admin-file .label, #admin-file label {
color: #FFFFFF !important;
font-size: 16px !important;
background-color: #00008B !important;
}
#admin-file { background-color: #00008B !important; }
#compliance-btn {
color: #FFFFFF !important;
background-color: red !important; /* Red background */
font-size: 16px !important;
}
#compliance-btn:hover {
background-color: #CC0000 !important; /* Darker red on hover */
color: #FFFFFF !important;
}
#compliance-btn-tech {
color: #FFFFFF !important;
background-color: #006600 !important; /* Darker green (was green=too bright) */
font-size: 16px !important;
}
#compliance-btn-tech:hover {
background-color: #006400 !important; /* Darker red on hover */
color: #FFFFFF !important;
}
#compliance-out textarea, #compliance-out .label, #compliance-out label {
color: #FFFFFF !important;
background-color: #000000 !important;
font-size: 16px !important;
}
#nrlchatbot {
background: linear-gradient(135deg, #000000, #1a1a1a) !important;
color: #ffffff !important;
}
#chatRadio {
background: linear-gradient(135deg, #000000, #1a1a1a) !important;
color: #ffffff !important;
}
#nrlchatbotLabe {
background-color: #00008B !important;
color: #ffffff !important;
font-size: 16px !important;
}
#compliance-result {
background-color: #FFFFFF !important;
color: #000000 !important;
font-size: 16px !important;
}
#out_cq_elemid {
background-color: #FFFFFF !important;
color: #000000 !important;
font-size: 16px !important;
}
#out_tech_elemid {
background-color: #FFFFFF !important;
color: #000000 !important;
font-size: 16px !important;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Tabs(elem_id="main-tabs"):
with gr.TabItem("Compliance Check of Arohan Admin File"):
with gr.Row():
inp = gr.File(
label="Upload Admin File in word i.e. docx format",
file_types=[".docx"],
elem_id="admin-file"
)
run_btn = gr.Button("Check compliance", elem_id="compliance-btn")
out = gr.HTML(label="Compliance Result", elem_id="compliance-result")
#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
run_btn.click(check_compliance, inputs=inp, outputs=out, api_name="manabAdminNoteCheck")
with gr.TabItem("NRL ChatBot"):
#gr.Markdown("""# RAG Chatbot - NRL Documents""", elem_id="nrlchatbotLabe")
# Replace your Markdown line:
gr.HTML("""
<h1 style="color: #FFFFFF !important; text-align: center; font-size: 28px; margin: 20px;">
RAG Chatbot - NRL Documents
</h1>
""")
# RADIO BUTTON for document selection
doc_selector = gr.Radio(
choices=[
("GPC Goods", "gpc_goods"),
("Procurement Manual", "manual")
],
value="gpc_goods", # Default
label="Select Document:",
info="Choose which document to query",
elem_id="chatRadio"
)
chatbot = gr.Chatbot(height=500, elem_id="nrlchatbot") # Defaults to messages
msg = gr.Textbox(placeholder="Ask a question...", label="Query")
submit_btn = gr.Button("Submit")
# Events
submit_btn.click(fn=respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot], api_name="manabChat")
msg.submit(respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot])
with gr.TabItem("Compliance Check of user technical doc"):
with gr.Row():
inp_tech = gr.File(
label="Upload technical document in pdf format",
file_types=[".pdf"],
elem_id="tech-file"
)
run_btn_tech = gr.Button("Check compliance of technical document", elem_id="compliance-btn-tech")
out_tech = gr.HTML(label="Compliance Result of technical document", elem_id="out_tech_elemid")
#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
run_btn_tech.click(check_compliance_tech, inputs=inp_tech, outputs=out_tech, api_name="manabTechDocCheck")
with gr.TabItem("CQ generation - OEM "):
with gr.Row():
inp_OEM_import = gr.File(
label="Upload vendor's offer in pdf format",
file_types=[".pdf"],
elem_id="oem-import-file"
)
manabLebel=gr.HTML("""
<div style="color: red; background: black; padding: 20px; text-align: justify; font-size: 20px;">
Disclaimer: AI generated outcome is only for reference. Consider the
following only as a draft and recheck is mandatory.
</div>"""
)
run_btn_oemImport = gr.Button("Generate CQ of OEM-offer", elem_id="generateOEMImport-btn")
out_cq = gr.HTML(label="Generated CQ", elem_id="out_cq_elemid")
#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
run_btn_oemImport.click(generateCqOemImport, inputs=inp_OEM_import, outputs=out_cq, api_name="manabCQGeneration")
with gr.TabItem("Generate TAC"):
gr.HTML("""
<div style="color: white; background: black; padding: 20px; text-align: center; font-size: 24px;">
🚧 Coming Soon 🚧
</div> """
)
demo.queue().launch()
#demo.launch()