Spaces:

manabb
/

NRLCommercialAI

Running

App Files Files Community

NRLCommercialAI / app.py

manabb

Update app.py

3dcf5c8 verified 6 days ago

raw

history blame contribute delete

22.6 kB

	import gradio as gr
	import os
	os.system("apt update && apt install -y tesseract-ocr poppler-utils")
	from openai import OpenAI
	import pandas as pd
	from docx import Document
	import time
	import re
	from huggingface_hub import hf_hub_download
	from huggingface_hub import HfApi, login
	from datetime import datetime
	from langchain_openai import ChatOpenAI
	from langchain_community.callbacks import get_openai_callback


	from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser

	from manabUtils import retrieve_chunks, retrieve_chunks_GPC
	from technicalDocCompliance import compliance_tech, compliance_tech_pdf
	from MyRules import manualRules
	from manabCQgenetaion import compliance_import_OEM
	from cq_domestic import domesticCQ



	os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
	os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	api = HfApi(token=os.getenv("HF_TOKEN"))
	repo_id = "manabb/nrl"
	file_path_in_repo="LLMLogs.txt"
	llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=500)
	#=====
	#Payment type
	manual_payment_type="""
	1. Management discouraged the payment thorugh bank.
	2. Advance payment without bank gurantee is not allowed. Require Competant Authority approval if given.
	3. If payment term is milestone payment, then requirement of bank guarantee against each milestone payment release is to be written.
	4. Standard payment term: Payment shall be made within 30 days after receipt and acceptance of material.
	5. As per NRL GPC or GPC or GPC(general purchase condition) is a complied payment term.
	"""

	#=================
	#BasisOfEstimate

	manual_basis_of_estimate="""
	1. Estimated cost should be worked out realistically using market survey, budgetary quotations, or published catalogues/MRP when no historical data is available.
	2. For custom-built equipment, obtain budgetary quotes from potential parties. Ideally three quotes, but if less than three, use available quotes with average if multiple.
	3. Minimum three budgetory offer or offer is required for estimate calculation. If less than three offers, then reason is to be written.
	4. Estimates should consider inflation, technology changes, profit margins etc.
	5. If estimates cannot be made meaningfully, full reason should be recorded.
	6. For procurements up to Rs.1,00,000, detailed estimates are not required.
	7. If the Tender Type of the proposal is OEM, the basis of estimate can be firm offer collected from OEM single vendor.
	"""

	#=======================
	PQC_rules="""
	1. If the proposal value is more than fifty lakh, the PQC shall include financial criteria
	2. PQC should be unrestrictive enough to not exclude any capable vendor/contractor.
	3. PQC should be restrictive enough to exclude incapable vendors/contractors.
	4. Framing of PQC requires due consideration to adequacy of competition.
	5. Functional head approval is mandatory if there is PQC is written in a proposal.
	6. PQC should be carefully decided for each procurement with approval of Competent Authority (CA).
	7. Bidders must submit authenticated documents in support of eligibility criteria.
	8. Sudden multiple times increase in requirement should not blindly adopt past PQCs.
	9. PQC misjudgement in either direction (too restrictive or unrestrictive) is detrimental.
	10. PQC should be clarified in tender documents that authenticated documents are required.
	11. Adequacy of competition must be evaluated while framing PQC.
	12. PQC should balance inclusion of capable vendors and exclusion of incapable ones."""

	#===========================
	#retriever = retrieve_chunks(repo_id)
	#retriever=retrieve_chunks_GPC()
	def limit_context(docs, max_chars=4000):
	text = "\n\n".join(doc.page_content for doc in docs)
	return text[:max_chars]

	def create_qa_chain(retriever):
	prompt = ChatPromptTemplate.from_template(
	"Use context to answer: {context}\n\nQ: {input}"
	)
	chain = (
	{
	#"context": retriever \| (lambda docs: "\n\n".join(doc.page_content for doc in docs)),
	"context": retriever \| (lambda docs: limit_context(docs)),
	"input": RunnablePassthrough()
	}
	\| prompt
	\| llm
	\| StrOutputParser()
	)
	return chain


	#=======================

	#============starting extract_docx_text
	def respond(message, history, doc_choice):

	word_count = len(message.strip().split())

	# If less than 3 words, do not call LLM, just ask user to clarify
	if word_count < 3:
	correction_msg = "Please clarify or expand your question (at least 3 words)."
	new_history = history + [
	{"role": "user", "content": message},
	{"role": "assistant", "content": correction_msg},
	]
	return "", new_history
	else:
	retriever=None
	if doc_choice == "gpc_goods":
	retriever=retrieve_chunks_GPC()
	else:
	retriever = retrieve_chunks(repo_id)
	qa_chain = create_qa_chain(retriever)
	with get_openai_callback() as cb:
	answer = qa_chain.invoke(message)
	#answer = qa_chain.invoke(message)
	docs = retriever.invoke(message)
	refs=[]
	if doc_choice == "gpc_goods":
	refs= [f"NRL GPC point No: {d.metadata.get('condition_number', 'N/A')} / Heading: {d.metadata.get('condition_heading', 'N/A')}" for d in docs]
	else:
	refs = [f"Page {d.metadata.get('page', 'N/A')}" for d in docs]
	full_answer = f"""Input tokens: {cb.prompt_tokens},
	Ouput tokens: {cb.completion_tokens}, Total tokens: {cb.total_tokens},
	Cost: ${cb.total_cost}\n{answer}\n\nReferences:\n""" + "\n".join(refs)

	# CRITICAL: Append ONLY pure dicts - no metadata, tuples, or extras
	new_history = history + [ # Or history.append() then return history
	{"role": "user", "content": message},
	{"role": "assistant", "content": full_answer}
	]
	history_string = "\n".join([
	f"{item['role']}: {item['content']}"
	for item in new_history
	])
	# Clear input
	try:
	update_log("\nFrom Chat: "+datetime.now().isoformat()+"\n"+history_string+"\n")
	except Exception as ee:
	print(f"Error: {ee} - not saved the log")
	return "", new_history # Return cleared msg, updated history
	#====================
	def extract_docx_text(file_path):
	doc = Document(file_path)
	final_data = []
	for table_idx, table in enumerate(doc.tables):
	for row in table.rows:
	cells = [cell.text.strip() for cell in row.cells]
	if len(cells) == 2:
	key = cells[0].replace(':', '').strip()
	value = cells[1].strip()
	if key and value:
	final_data.append({'Field': key, 'Value': value, 'Source': f'Table_{table_idx+1}'})
	else:
	combined = ' \| '.join([c for c in cells if c])
	if combined:
	final_data.append({'Field': 'Multi-Column Data', 'Value': combined, 'Source': f'Table_{table_idx+1}'})
	return pd.DataFrame(final_data)

	def generate_response(manual, proposal):
	prompt = f"""
	You are a strict compliance checker for Govt. procurement policies.

	Check whether the proposal complies with MANUAL requirements. Respond in EXACT format:

	Status: COMPLIANT or NON-COMPLIANT
	Severity: HIGH or MEDIUM or LOW
	Deviations: <short bullet-style description or 'None'>
	Fix: <clear corrective action>

	COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>

	MANUAL: {manual}

	proposal: {proposal}
	"""

	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.1
	)
	return response.choices[0].message.content

	def generate_html(llm_response):
	"""Convert LLM response to HTML table row with line breaks."""
	lines = llm_response.strip().split('\n') # Fixed: \n not \\n
	html_lines = []

	for line in lines:
	line = line.strip()
	if line: # Skip empty lines
	html_lines.append(line)

	# Build proper <tr><td> with <br> for lines
	content = '<br>'.join(html_lines) # Single <br> between lines
	html = f"<tr><td>{content}</td></tr>"

	return html

	#================================================Gradio==================

	def update_log(newRecords):
	# Download existing, append, re-upload

	try:
	# Download current version
	downloaded_path = hf_hub_download(
	repo_id=repo_id,
	filename=file_path_in_repo,
	repo_type="dataset"
	)

	# Append new line
	with open(downloaded_path, 'a', encoding='utf-8') as f:
	f.write("\n"+newRecords+"\n")

	# Re-upload (overwrites)
	api.upload_file(
	path_or_fileobj=downloaded_path,
	path_in_repo=file_path_in_repo,
	repo_id=repo_id,
	repo_type="dataset",
	commit_message="Append new log entry"
	)

	except Exception:
	print("File not found - created new")
	#==============================

	def loop_function(df):
	text = "<hr>"
	Value_of_proposal = ""
	E_file_No=""
	Name_of_proposal=""
	PR_no=""
	txt_forRecord=""
	for index, row in df.iterrows():
	key = str(row['Field'])
	value = str(row['Value'])
	i = 0
	proposal_details = ""
	manual_rules = ""

	if key == "Justification/Reason for Procurement":
	continue
	if key == "File No.":
	E_file_No= f"E-File No: {value}. \n"
	text+="<h2>"+E_file_No+"</h2>"
	continue
	if key == "PR No.":
	PR_no= f"PR No: {value}. \n"
	text+="<h2>"+PR_no+"</h2>"
	continue
	if key == "Name of proposal":
	Name_of_proposal= f"Name of the proposal : {value}. \n"
	text+="<h2>"+Name_of_proposal+"</h2><hr>"
	continue
	if key == "Value (Rs)":
	Value_of_proposal = f"The proposal Value is {value}. \n"
	text+="<h2>"+Value_of_proposal+"</h2>"
	continue
	if key == "Category":
	Category_of_proposal = f"The proposal Category is {value}. \n"
	continue
	if key == "Tender Type":
	Tender_Type_of_proposal = f"The proposal Tender Type is {value}. \n"
	continue

	if key == "PQC for Open tenders":
	manual_rules = PQC_rules
	proposal_details = f"The Pre Qualifying Criteria (PQC) of the proposal is under: {value}. {Value_of_proposal}"
	i = 1
	elif key == "Basis of estimate": # FIXED: elif
	manual_rules = manual_basis_of_estimate
	proposal_details = f"The basis of estimate of the proposal is under: {value}.{Tender_Type_of_proposal}"
	i = 1
	elif key == "Payment Terms":
	manual_rules = manual_payment_type
	proposal_details = f"The Payment Terms of the proposal is {value}."
	i = 1

	if i == 1:
	try:
	rr = generate_response(manual_rules, proposal_details)
	txt_forRecord+="\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr
	#update_log("\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr)
	text += """
	<span style="color: #006400 !important; font-weight: bold; font-size: 14px;">

	"""
	#text +="<p>"+rr+"</p>"
	#text +="<p>Same is given below in line wise format....</p>"
	text += "<table><tr><td>As per proposal, "+key + " : "+value+"</td></tr>"

	rr_html=generate_html(rr)
	text += rr_html
	text += "</table></span><hr>"
	yield text
	time.sleep(3)
	except Exception as e:
	print(f"Error: {e} - skipping row")
	continue
	try:
	update_log("\n"+txt_forRecord+"\n")
	except Exception as ee:
	print(f"Error: {ee} - not saved the log")
	def loop_function_tech(df):
	#to be prepared
	yield "coming soon"
	def check_compliance(file): # FIXED: now streams
	if file.name.endswith(".docx"):
	df1 = extract_docx_text(file.name)
	yield from loop_function(df1) # FIXED: delegate yields
	else:
	yield "Unsupported file format"
	def check_compliance_tech(file_name):
	if file_name.name.endswith(".pdf"):
	MANUAL_RULES = manualRules()
	dd=compliance_tech(file_name, client, MANUAL_RULES)
	kkk="<table>"
	kkk+=generate_html(dd)
	kkk+="</table>"
	yield kkk
	update_log("\n Technical Complaince response: "+datetime.now().isoformat()+"\n"+kkk+"\n")
	else:
	yield "Unsupported file format"
	def generateCqOemImport(file_name):
	if file_name.name.endswith(".pdf"):
	extraction_item_value_html, extraction_html, compliance_html, input_tokens ,output_tokens, total_tokens, result_dic = compliance_import_OEM(file_name, client)
	kkk=domesticCQ(extraction_item_value_html,result_dic)
	dd=f"""
	\n
	<table>
	<tr><td>Input tokens:</td><td>{input_tokens}</td></tr>
	<tr><td>Output tokens:</td><td>{output_tokens}</td></tr>
	<tr><td>Total tokens:</td><td>{total_tokens}</td></tr>
	</table>
	\n Vendor quoted the price as under in the offer. \n
	{extraction_item_value_html}
	\n The extracted value against each point is as under. \n
	{extraction_html}
	\n The compliance of the offer against our rules are as under. \n
	{compliance_html}
	\n\n
	The Draft CQ is as under: \n\n\n
	{kkk}
	"""
	yield dd
	update_log("\n CQ-OEM: "+datetime.now().isoformat()+"\n"+dd+"\n")
	else:
	yield "Unsupported file format"

	#================================
	css1 = """
	* {
	background-color: #000000 !important;
	color: #FFFFFF !important;
	border-color: #444444 !important;
	}

	button, .btn {
	background: #333333 !important;
	color: #FFFFFF !important;
	border: 1px solid #555555 !important;
	}

	button:hover {
	background: #555555 !important;
	}

	input, textarea, select {
	background: #111111 !important;
	color: #FFFFFF !important;
	border: 1px solid #666666 !important;
	}

	label {
	color: #FFFFFF !important;
	font-weight: bold !important;
	}

	#nrlchatbot, .chatbot {
	background: #000000 !important;
	color: #FFFFFF !important;
	}

	.chatbot .message {
	background: #111111 !important;
	color: #FFFFFF !important;
	border: 1px solid #333333 !important;
	}

	.tabitem, h1, h2, h3, .markdown {
	background: transparent !important;
	color: #FFFFFF !important;
	}
	span[style] {
	color: inherit !important; /* Respect inline styles */
	}

	span[style*="color"] {
	color: inherit !important !important; /* Double !important */
	}

	span {
	all: unset !important;
	}
	#compliance-btn { background: red !important; }
	#compliance-btn-tech { background: #006600 !important; }
	#admin-file { background: #00008B !important; }
	#chatRadio {
	background: #111111 !important;
	}

	#chatRadio label {
	color: #FFFFFF !important;
	background: transparent !important;
	border-radius: 5px !important;
	padding: 8px !important;
	margin: 4px !important;
	}

	#chatRadio input:checked + label::before {
	content: "✓" !important;
	color: #00FF00 !important;
	font-weight: bold !important;
	margin-right: 8px !important;
	}
	"""

	#=====================
	css = """
	.gradio-container {
	background: linear-gradient(135deg, #000000, #1a1a1a) !important;
	color: #ffffff !important;
	}

	.gradio-container .default {
	background-color: #111111 !important;
	color: #ffffff !important;
	}

	.gradio-container button {
	background: #333333 !important;
	color: white !important;
	border: 1px solid #555 !important; /* ← Add border consistency */
	}

	.gradio-container input, .gradio-container textarea {
	background-color: #222222 !important;
	color: white !important;
	border: 1px solid #444444 !important;
	}

	#admin-file .label, #admin-file label {
	color: #FFFFFF !important;
	font-size: 16px !important;
	background-color: #00008B !important;
	}
	#admin-file { background-color: #00008B !important; }
	#compliance-btn {
	color: #FFFFFF !important;
	background-color: red !important; /* Red background */
	font-size: 16px !important;
	}
	#compliance-btn:hover {
	background-color: #CC0000 !important; /* Darker red on hover */
	color: #FFFFFF !important;
	}
	#compliance-btn-tech {
	color: #FFFFFF !important;
	background-color: #006600 !important; /* Darker green (was green=too bright) */
	font-size: 16px !important;
	}
	#compliance-btn-tech:hover {
	background-color: #006400 !important; /* Darker red on hover */
	color: #FFFFFF !important;
	}
	#compliance-out textarea, #compliance-out .label, #compliance-out label {
	color: #FFFFFF !important;
	background-color: #000000 !important;
	font-size: 16px !important;
	}
	#nrlchatbot {
	background: linear-gradient(135deg, #000000, #1a1a1a) !important;
	color: #ffffff !important;
	}
	#chatRadio {
	background: linear-gradient(135deg, #000000, #1a1a1a) !important;
	color: #ffffff !important;
	}
	#nrlchatbotLabe {
	background-color: #00008B !important;
	color: #ffffff !important;
	font-size: 16px !important;
	}
	#compliance-result {
	background-color: #FFFFFF !important;
	color: #000000 !important;
	font-size: 16px !important;
	}
	#out_cq_elemid {
	background-color: #FFFFFF !important;
	color: #000000 !important;
	font-size: 16px !important;
	}
	#out_tech_elemid {
	background-color: #FFFFFF !important;
	color: #000000 !important;
	font-size: 16px !important;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Tabs(elem_id="main-tabs"):
	with gr.TabItem("Compliance Check of Arohan Admin File"):
	with gr.Row():
	inp = gr.File(
	label="Upload Admin File in word i.e. docx format",
	file_types=[".docx"],
	elem_id="admin-file"
	)
	run_btn = gr.Button("Check compliance", elem_id="compliance-btn")
	out = gr.HTML(label="Compliance Result", elem_id="compliance-result")
	#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
	run_btn.click(check_compliance, inputs=inp, outputs=out, api_name="manabAdminNoteCheck")

	with gr.TabItem("NRL ChatBot"):
	#gr.Markdown("""# RAG Chatbot - NRL Documents""", elem_id="nrlchatbotLabe")
	# Replace your Markdown line:
	gr.HTML("""
	<h1 style="color: #FFFFFF !important; text-align: center; font-size: 28px; margin: 20px;">
	RAG Chatbot - NRL Documents
	</h1>
	""")

	# RADIO BUTTON for document selection
	doc_selector = gr.Radio(
	choices=[
	("GPC Goods", "gpc_goods"),
	("Procurement Manual", "manual")
	],
	value="gpc_goods", # Default
	label="Select Document:",
	info="Choose which document to query",
	elem_id="chatRadio"
	)
	chatbot = gr.Chatbot(height=500, elem_id="nrlchatbot") # Defaults to messages
	msg = gr.Textbox(placeholder="Ask a question...", label="Query")
	submit_btn = gr.Button("Submit")

	# Events
	submit_btn.click(fn=respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot], api_name="manabChat")
	msg.submit(respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot])
	with gr.TabItem("Compliance Check of user technical doc"):
	with gr.Row():
	inp_tech = gr.File(
	label="Upload technical document in pdf format",
	file_types=[".pdf"],
	elem_id="tech-file"
	)
	run_btn_tech = gr.Button("Check compliance of technical document", elem_id="compliance-btn-tech")
	out_tech = gr.HTML(label="Compliance Result of technical document", elem_id="out_tech_elemid")
	#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
	run_btn_tech.click(check_compliance_tech, inputs=inp_tech, outputs=out_tech, api_name="manabTechDocCheck")
	with gr.TabItem("CQ generation - OEM "):
	with gr.Row():
	inp_OEM_import = gr.File(
	label="Upload vendor's offer in pdf format",
	file_types=[".pdf"],
	elem_id="oem-import-file"
	)
	manabLebel=gr.HTML("""
	<div style="color: red; background: black; padding: 20px; text-align: justify; font-size: 20px;">
	Disclaimer: AI generated outcome is only for reference. Consider the
	following only as a draft and recheck is mandatory.
	</div>"""
	)
	run_btn_oemImport = gr.Button("Generate CQ of OEM-offer", elem_id="generateOEMImport-btn")
	out_cq = gr.HTML(label="Generated CQ", elem_id="out_cq_elemid")
	#out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out")
	run_btn_oemImport.click(generateCqOemImport, inputs=inp_OEM_import, outputs=out_cq, api_name="manabCQGeneration")
	with gr.TabItem("Generate TAC"):
	gr.HTML("""
	<div style="color: white; background: black; padding: 20px; text-align: center; font-size: 24px;">
	🚧 Coming Soon 🚧
	</div> """
	)
	demo.queue().launch()
	#demo.launch()