Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| os.system("apt update && apt install -y tesseract-ocr poppler-utils") | |
| from openai import OpenAI | |
| import pandas as pd | |
| from docx import Document | |
| import time | |
| import re | |
| from huggingface_hub import hf_hub_download | |
| from huggingface_hub import HfApi, login | |
| from datetime import datetime | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.callbacks import get_openai_callback | |
| from langchain_core.prompts import ChatPromptTemplate, PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| from manabUtils import retrieve_chunks, retrieve_chunks_GPC | |
| from technicalDocCompliance import compliance_tech, compliance_tech_pdf | |
| from MyRules import manualRules | |
| from manabCQgenetaion import compliance_import_OEM | |
| from cq_domestic import domesticCQ | |
| os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN") | |
| os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") | |
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| api = HfApi(token=os.getenv("HF_TOKEN")) | |
| repo_id = "manabb/nrl" | |
| file_path_in_repo="LLMLogs.txt" | |
| llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=500) | |
| #===== | |
| #Payment type | |
| manual_payment_type=""" | |
| 1. Management discouraged the payment thorugh bank. | |
| 2. Advance payment without bank gurantee is not allowed. Require Competant Authority approval if given. | |
| 3. If payment term is milestone payment, then requirement of bank guarantee against each milestone payment release is to be written. | |
| 4. Standard payment term: Payment shall be made within 30 days after receipt and acceptance of material. | |
| 5. As per NRL GPC or GPC or GPC(general purchase condition) is a complied payment term. | |
| """ | |
| #================= | |
| #BasisOfEstimate | |
| manual_basis_of_estimate=""" | |
| 1. Estimated cost should be worked out realistically using market survey, budgetary quotations, or published catalogues/MRP when no historical data is available. | |
| 2. For custom-built equipment, obtain budgetary quotes from potential parties. Ideally three quotes, but if less than three, use available quotes with average if multiple. | |
| 3. Minimum three budgetory offer or offer is required for estimate calculation. If less than three offers, then reason is to be written. | |
| 4. Estimates should consider inflation, technology changes, profit margins etc. | |
| 5. If estimates cannot be made meaningfully, full reason should be recorded. | |
| 6. For procurements up to Rs.1,00,000, detailed estimates are not required. | |
| 7. If the Tender Type of the proposal is OEM, the basis of estimate can be firm offer collected from OEM single vendor. | |
| """ | |
| #======================= | |
| PQC_rules=""" | |
| 1. If the proposal value is more than fifty lakh, the PQC shall include financial criteria | |
| 2. PQC should be unrestrictive enough to not exclude any capable vendor/contractor. | |
| 3. PQC should be restrictive enough to exclude incapable vendors/contractors. | |
| 4. Framing of PQC requires due consideration to adequacy of competition. | |
| 5. Functional head approval is mandatory if there is PQC is written in a proposal. | |
| 6. PQC should be carefully decided for each procurement with approval of Competent Authority (CA). | |
| 7. Bidders must submit authenticated documents in support of eligibility criteria. | |
| 8. Sudden multiple times increase in requirement should not blindly adopt past PQCs. | |
| 9. PQC misjudgement in either direction (too restrictive or unrestrictive) is detrimental. | |
| 10. PQC should be clarified in tender documents that authenticated documents are required. | |
| 11. Adequacy of competition must be evaluated while framing PQC. | |
| 12. PQC should balance inclusion of capable vendors and exclusion of incapable ones.""" | |
| #=========================== | |
| #retriever = retrieve_chunks(repo_id) | |
| #retriever=retrieve_chunks_GPC() | |
| def limit_context(docs, max_chars=4000): | |
| text = "\n\n".join(doc.page_content for doc in docs) | |
| return text[:max_chars] | |
| def create_qa_chain(retriever): | |
| prompt = ChatPromptTemplate.from_template( | |
| "Use context to answer: {context}\n\nQ: {input}" | |
| ) | |
| chain = ( | |
| { | |
| #"context": retriever | (lambda docs: "\n\n".join(doc.page_content for doc in docs)), | |
| "context": retriever | (lambda docs: limit_context(docs)), | |
| "input": RunnablePassthrough() | |
| } | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return chain | |
| #======================= | |
| #============starting extract_docx_text | |
| def respond(message, history, doc_choice): | |
| word_count = len(message.strip().split()) | |
| # If less than 3 words, do not call LLM, just ask user to clarify | |
| if word_count < 3: | |
| correction_msg = "Please **clarify** or expand your question (at least 3 words)." | |
| new_history = history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": correction_msg}, | |
| ] | |
| return "", new_history | |
| else: | |
| retriever=None | |
| if doc_choice == "gpc_goods": | |
| retriever=retrieve_chunks_GPC() | |
| else: | |
| retriever = retrieve_chunks(repo_id) | |
| qa_chain = create_qa_chain(retriever) | |
| with get_openai_callback() as cb: | |
| answer = qa_chain.invoke(message) | |
| #answer = qa_chain.invoke(message) | |
| docs = retriever.invoke(message) | |
| refs=[] | |
| if doc_choice == "gpc_goods": | |
| refs= [f"NRL GPC point No: {d.metadata.get('condition_number', 'N/A')} / Heading: {d.metadata.get('condition_heading', 'N/A')}" for d in docs] | |
| else: | |
| refs = [f"Page {d.metadata.get('page', 'N/A')}" for d in docs] | |
| full_answer = f"""Input tokens: {cb.prompt_tokens}, | |
| Ouput tokens: {cb.completion_tokens}, Total tokens: {cb.total_tokens}, | |
| Cost: ${cb.total_cost}\n{answer}\n\n**References:**\n""" + "\n".join(refs) | |
| # CRITICAL: Append ONLY pure dicts - no metadata, tuples, or extras | |
| new_history = history + [ # Or history.append() then return history | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": full_answer} | |
| ] | |
| history_string = "\n".join([ | |
| f"{item['role']}: {item['content']}" | |
| for item in new_history | |
| ]) | |
| # Clear input | |
| try: | |
| update_log("\nFrom Chat: "+datetime.now().isoformat()+"\n"+history_string+"\n") | |
| except Exception as ee: | |
| print(f"Error: {ee} - not saved the log") | |
| return "", new_history # Return cleared msg, updated history | |
| #==================== | |
| def extract_docx_text(file_path): | |
| doc = Document(file_path) | |
| final_data = [] | |
| for table_idx, table in enumerate(doc.tables): | |
| for row in table.rows: | |
| cells = [cell.text.strip() for cell in row.cells] | |
| if len(cells) == 2: | |
| key = cells[0].replace(':', '').strip() | |
| value = cells[1].strip() | |
| if key and value: | |
| final_data.append({'Field': key, 'Value': value, 'Source': f'Table_{table_idx+1}'}) | |
| else: | |
| combined = ' | '.join([c for c in cells if c]) | |
| if combined: | |
| final_data.append({'Field': 'Multi-Column Data', 'Value': combined, 'Source': f'Table_{table_idx+1}'}) | |
| return pd.DataFrame(final_data) | |
| def generate_response(manual, proposal): | |
| prompt = f""" | |
| You are a strict compliance checker for Govt. procurement policies. | |
| Check whether the proposal complies with MANUAL requirements. Respond in EXACT format: | |
| Status: COMPLIANT or NON-COMPLIANT | |
| Severity: HIGH or MEDIUM or LOW | |
| Deviations: <short bullet-style description or 'None'> | |
| Fix: <clear corrective action> | |
| COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning> | |
| MANUAL: {manual} | |
| proposal: {proposal} | |
| """ | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.1 | |
| ) | |
| return response.choices[0].message.content | |
| def generate_html(llm_response): | |
| """Convert LLM response to HTML table row with line breaks.""" | |
| lines = llm_response.strip().split('\n') # Fixed: \n not \\n | |
| html_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line: # Skip empty lines | |
| html_lines.append(line) | |
| # Build proper <tr><td> with <br> for lines | |
| content = '<br>'.join(html_lines) # Single <br> between lines | |
| html = f"<tr><td>{content}</td></tr>" | |
| return html | |
| #================================================Gradio================== | |
| def update_log(newRecords): | |
| # Download existing, append, re-upload | |
| try: | |
| # Download current version | |
| downloaded_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename=file_path_in_repo, | |
| repo_type="dataset" | |
| ) | |
| # Append new line | |
| with open(downloaded_path, 'a', encoding='utf-8') as f: | |
| f.write("\n"+newRecords+"\n") | |
| # Re-upload (overwrites) | |
| api.upload_file( | |
| path_or_fileobj=downloaded_path, | |
| path_in_repo=file_path_in_repo, | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| commit_message="Append new log entry" | |
| ) | |
| except Exception: | |
| print("File not found - created new") | |
| #============================== | |
| def loop_function(df): | |
| text = "<hr>" | |
| Value_of_proposal = "" | |
| E_file_No="" | |
| Name_of_proposal="" | |
| PR_no="" | |
| txt_forRecord="" | |
| for index, row in df.iterrows(): | |
| key = str(row['Field']) | |
| value = str(row['Value']) | |
| i = 0 | |
| proposal_details = "" | |
| manual_rules = "" | |
| if key == "Justification/Reason for Procurement": | |
| continue | |
| if key == "File No.": | |
| E_file_No= f"E-File No: {value}. \n" | |
| text+="<h2>"+E_file_No+"</h2>" | |
| continue | |
| if key == "PR No.": | |
| PR_no= f"PR No: {value}. \n" | |
| text+="<h2>"+PR_no+"</h2>" | |
| continue | |
| if key == "Name of proposal": | |
| Name_of_proposal= f"Name of the proposal : {value}. \n" | |
| text+="<h2>"+Name_of_proposal+"</h2><hr>" | |
| continue | |
| if key == "Value (Rs)": | |
| Value_of_proposal = f"The proposal Value is {value}. \n" | |
| text+="<h2>"+Value_of_proposal+"</h2>" | |
| continue | |
| if key == "Category": | |
| Category_of_proposal = f"The proposal Category is {value}. \n" | |
| continue | |
| if key == "Tender Type": | |
| Tender_Type_of_proposal = f"The proposal Tender Type is {value}. \n" | |
| continue | |
| if key == "PQC for Open tenders": | |
| manual_rules = PQC_rules | |
| proposal_details = f"The Pre Qualifying Criteria (PQC) of the proposal is under: {value}. {Value_of_proposal}" | |
| i = 1 | |
| elif key == "Basis of estimate": # FIXED: elif | |
| manual_rules = manual_basis_of_estimate | |
| proposal_details = f"The basis of estimate of the proposal is under: {value}.{Tender_Type_of_proposal}" | |
| i = 1 | |
| elif key == "Payment Terms": | |
| manual_rules = manual_payment_type | |
| proposal_details = f"The Payment Terms of the proposal is {value}." | |
| i = 1 | |
| if i == 1: | |
| try: | |
| rr = generate_response(manual_rules, proposal_details) | |
| txt_forRecord+="\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr | |
| #update_log("\n"+datetime.now().isoformat()+"\n"+E_file_No+"\n"+rr) | |
| text += """ | |
| <span style="color: #006400 !important; font-weight: bold; font-size: 14px;"> | |
| """ | |
| #text +="<p>"+rr+"</p>" | |
| #text +="<p>Same is given below in line wise format....</p>" | |
| text += "<table><tr><td>As per proposal, "+key + " : "+value+"</td></tr>" | |
| rr_html=generate_html(rr) | |
| text += rr_html | |
| text += "</table></span><hr>" | |
| yield text | |
| time.sleep(3) | |
| except Exception as e: | |
| print(f"Error: {e} - skipping row") | |
| continue | |
| try: | |
| update_log("\n"+txt_forRecord+"\n") | |
| except Exception as ee: | |
| print(f"Error: {ee} - not saved the log") | |
| def loop_function_tech(df): | |
| #to be prepared | |
| yield "coming soon" | |
| def check_compliance(file): # FIXED: now streams | |
| if file.name.endswith(".docx"): | |
| df1 = extract_docx_text(file.name) | |
| yield from loop_function(df1) # FIXED: delegate yields | |
| else: | |
| yield "Unsupported file format" | |
| def check_compliance_tech(file_name): | |
| if file_name.name.endswith(".pdf"): | |
| MANUAL_RULES = manualRules() | |
| dd=compliance_tech(file_name, client, MANUAL_RULES) | |
| kkk="<table>" | |
| kkk+=generate_html(dd) | |
| kkk+="</table>" | |
| yield kkk | |
| update_log("\n Technical Complaince response: "+datetime.now().isoformat()+"\n"+kkk+"\n") | |
| else: | |
| yield "Unsupported file format" | |
| def generateCqOemImport(file_name): | |
| if file_name.name.endswith(".pdf"): | |
| extraction_item_value_html, extraction_html, compliance_html, input_tokens ,output_tokens, total_tokens, result_dic = compliance_import_OEM(file_name, client) | |
| kkk=domesticCQ(extraction_item_value_html,result_dic) | |
| dd=f""" | |
| \n | |
| <table> | |
| <tr><td>Input tokens:</td><td>{input_tokens}</td></tr> | |
| <tr><td>Output tokens:</td><td>{output_tokens}</td></tr> | |
| <tr><td>Total tokens:</td><td>{total_tokens}</td></tr> | |
| </table> | |
| \n Vendor quoted the price as under in the offer. \n | |
| {extraction_item_value_html} | |
| \n The extracted value against each point is as under. \n | |
| {extraction_html} | |
| \n The compliance of the offer against our rules are as under. \n | |
| {compliance_html} | |
| \n\n | |
| The Draft CQ is as under: \n\n\n | |
| {kkk} | |
| """ | |
| yield dd | |
| update_log("\n CQ-OEM: "+datetime.now().isoformat()+"\n"+dd+"\n") | |
| else: | |
| yield "Unsupported file format" | |
| #================================ | |
| css1 = """ | |
| * { | |
| background-color: #000000 !important; | |
| color: #FFFFFF !important; | |
| border-color: #444444 !important; | |
| } | |
| button, .btn { | |
| background: #333333 !important; | |
| color: #FFFFFF !important; | |
| border: 1px solid #555555 !important; | |
| } | |
| button:hover { | |
| background: #555555 !important; | |
| } | |
| input, textarea, select { | |
| background: #111111 !important; | |
| color: #FFFFFF !important; | |
| border: 1px solid #666666 !important; | |
| } | |
| label { | |
| color: #FFFFFF !important; | |
| font-weight: bold !important; | |
| } | |
| #nrlchatbot, .chatbot { | |
| background: #000000 !important; | |
| color: #FFFFFF !important; | |
| } | |
| .chatbot .message { | |
| background: #111111 !important; | |
| color: #FFFFFF !important; | |
| border: 1px solid #333333 !important; | |
| } | |
| .tabitem, h1, h2, h3, .markdown { | |
| background: transparent !important; | |
| color: #FFFFFF !important; | |
| } | |
| span[style] { | |
| color: inherit !important; /* Respect inline styles */ | |
| } | |
| span[style*="color"] { | |
| color: inherit !important !important; /* Double !important */ | |
| } | |
| span { | |
| all: unset !important; | |
| } | |
| #compliance-btn { background: red !important; } | |
| #compliance-btn-tech { background: #006600 !important; } | |
| #admin-file { background: #00008B !important; } | |
| #chatRadio { | |
| background: #111111 !important; | |
| } | |
| #chatRadio label { | |
| color: #FFFFFF !important; | |
| background: transparent !important; | |
| border-radius: 5px !important; | |
| padding: 8px !important; | |
| margin: 4px !important; | |
| } | |
| #chatRadio input:checked + label::before { | |
| content: "✓" !important; | |
| color: #00FF00 !important; | |
| font-weight: bold !important; | |
| margin-right: 8px !important; | |
| } | |
| """ | |
| #===================== | |
| css = """ | |
| .gradio-container { | |
| background: linear-gradient(135deg, #000000, #1a1a1a) !important; | |
| color: #ffffff !important; | |
| } | |
| .gradio-container .default { | |
| background-color: #111111 !important; | |
| color: #ffffff !important; | |
| } | |
| .gradio-container button { | |
| background: #333333 !important; | |
| color: white !important; | |
| border: 1px solid #555 !important; /* ← Add border consistency */ | |
| } | |
| .gradio-container input, .gradio-container textarea { | |
| background-color: #222222 !important; | |
| color: white !important; | |
| border: 1px solid #444444 !important; | |
| } | |
| #admin-file .label, #admin-file label { | |
| color: #FFFFFF !important; | |
| font-size: 16px !important; | |
| background-color: #00008B !important; | |
| } | |
| #admin-file { background-color: #00008B !important; } | |
| #compliance-btn { | |
| color: #FFFFFF !important; | |
| background-color: red !important; /* Red background */ | |
| font-size: 16px !important; | |
| } | |
| #compliance-btn:hover { | |
| background-color: #CC0000 !important; /* Darker red on hover */ | |
| color: #FFFFFF !important; | |
| } | |
| #compliance-btn-tech { | |
| color: #FFFFFF !important; | |
| background-color: #006600 !important; /* Darker green (was green=too bright) */ | |
| font-size: 16px !important; | |
| } | |
| #compliance-btn-tech:hover { | |
| background-color: #006400 !important; /* Darker red on hover */ | |
| color: #FFFFFF !important; | |
| } | |
| #compliance-out textarea, #compliance-out .label, #compliance-out label { | |
| color: #FFFFFF !important; | |
| background-color: #000000 !important; | |
| font-size: 16px !important; | |
| } | |
| #nrlchatbot { | |
| background: linear-gradient(135deg, #000000, #1a1a1a) !important; | |
| color: #ffffff !important; | |
| } | |
| #chatRadio { | |
| background: linear-gradient(135deg, #000000, #1a1a1a) !important; | |
| color: #ffffff !important; | |
| } | |
| #nrlchatbotLabe { | |
| background-color: #00008B !important; | |
| color: #ffffff !important; | |
| font-size: 16px !important; | |
| } | |
| #compliance-result { | |
| background-color: #FFFFFF !important; | |
| color: #000000 !important; | |
| font-size: 16px !important; | |
| } | |
| #out_cq_elemid { | |
| background-color: #FFFFFF !important; | |
| color: #000000 !important; | |
| font-size: 16px !important; | |
| } | |
| #out_tech_elemid { | |
| background-color: #FFFFFF !important; | |
| color: #000000 !important; | |
| font-size: 16px !important; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Tabs(elem_id="main-tabs"): | |
| with gr.TabItem("Compliance Check of Arohan Admin File"): | |
| with gr.Row(): | |
| inp = gr.File( | |
| label="Upload Admin File in word i.e. docx format", | |
| file_types=[".docx"], | |
| elem_id="admin-file" | |
| ) | |
| run_btn = gr.Button("Check compliance", elem_id="compliance-btn") | |
| out = gr.HTML(label="Compliance Result", elem_id="compliance-result") | |
| #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out") | |
| run_btn.click(check_compliance, inputs=inp, outputs=out, api_name="manabAdminNoteCheck") | |
| with gr.TabItem("NRL ChatBot"): | |
| #gr.Markdown("""# RAG Chatbot - NRL Documents""", elem_id="nrlchatbotLabe") | |
| # Replace your Markdown line: | |
| gr.HTML(""" | |
| <h1 style="color: #FFFFFF !important; text-align: center; font-size: 28px; margin: 20px;"> | |
| RAG Chatbot - NRL Documents | |
| </h1> | |
| """) | |
| # RADIO BUTTON for document selection | |
| doc_selector = gr.Radio( | |
| choices=[ | |
| ("GPC Goods", "gpc_goods"), | |
| ("Procurement Manual", "manual") | |
| ], | |
| value="gpc_goods", # Default | |
| label="Select Document:", | |
| info="Choose which document to query", | |
| elem_id="chatRadio" | |
| ) | |
| chatbot = gr.Chatbot(height=500, elem_id="nrlchatbot") # Defaults to messages | |
| msg = gr.Textbox(placeholder="Ask a question...", label="Query") | |
| submit_btn = gr.Button("Submit") | |
| # Events | |
| submit_btn.click(fn=respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot], api_name="manabChat") | |
| msg.submit(respond, inputs=[msg, chatbot, doc_selector], outputs=[msg, chatbot]) | |
| with gr.TabItem("Compliance Check of user technical doc"): | |
| with gr.Row(): | |
| inp_tech = gr.File( | |
| label="Upload technical document in pdf format", | |
| file_types=[".pdf"], | |
| elem_id="tech-file" | |
| ) | |
| run_btn_tech = gr.Button("Check compliance of technical document", elem_id="compliance-btn-tech") | |
| out_tech = gr.HTML(label="Compliance Result of technical document", elem_id="out_tech_elemid") | |
| #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out") | |
| run_btn_tech.click(check_compliance_tech, inputs=inp_tech, outputs=out_tech, api_name="manabTechDocCheck") | |
| with gr.TabItem("CQ generation - OEM "): | |
| with gr.Row(): | |
| inp_OEM_import = gr.File( | |
| label="Upload vendor's offer in pdf format", | |
| file_types=[".pdf"], | |
| elem_id="oem-import-file" | |
| ) | |
| manabLebel=gr.HTML(""" | |
| <div style="color: red; background: black; padding: 20px; text-align: justify; font-size: 20px;"> | |
| Disclaimer: AI generated outcome is only for reference. Consider the | |
| following only as a draft and recheck is mandatory. | |
| </div>""" | |
| ) | |
| run_btn_oemImport = gr.Button("Generate CQ of OEM-offer", elem_id="generateOEMImport-btn") | |
| out_cq = gr.HTML(label="Generated CQ", elem_id="out_cq_elemid") | |
| #out = gr.Textbox(lines=15, label="Compliance Result",elem_id="compliance-out") | |
| run_btn_oemImport.click(generateCqOemImport, inputs=inp_OEM_import, outputs=out_cq, api_name="manabCQGeneration") | |
| with gr.TabItem("Generate TAC"): | |
| gr.HTML(""" | |
| <div style="color: white; background: black; padding: 20px; text-align: center; font-size: 24px;"> | |
| 🚧 Coming Soon 🚧 | |
| </div> """ | |
| ) | |
| demo.queue().launch() | |
| #demo.launch() | |