NRLCommercialAI-dev / CQgenerationOEMImport.py
manabb's picture
Update CQgenerationOEMImport.py
c57e003 verified
#CQgenerationOEMImport.py
from openai import OpenAI # Core import for client[web:30][web:32]
from openai import OpenAI
from langchain_community.document_loaders import PyMuPDFLoader # pip install pymupdf[web:42]
import os
import re
def normalize_text(s: str) -> str:
"""Normalize whitespace / newlines in page_content."""
s = s.replace("\r\n", "\n").replace("\r", "\n")
s = s.replace("\t", " ")
# collapse 3+ newlines to 2
s = re.sub(r"\n{3,}", "\n\n", s)
# multiple spaces -> 1
s = re.sub(r"[ \u00A0]{2,}", " ", s)
# strip
return s.strip()
def compliance_import_OEM(file: str, client, MANUAL_RULES):
# Extract full PDF text (handles layout/tables well)
loader = PyMuPDFLoader(file)
docs = loader.load()
for d in docs:
d.page_content = normalize_text(d.page_content)
doc_text = "\n\n".join(doc.page_content for doc in docs) # Flatten to string[cite:5]
PROMPT = f"""
Document content (complete extracted text):
{doc_text[:16000]} # Truncate if needed for token limits
You are a strict procurement compliance auditor.
Consider the document content as vendor's offer.
Your task is to check whether vendor's offer FULLY complies against each point of the MANUAL RULES.
MANDATORY INSTRUCTIONS:
1. Do NOT assume anything.
2. Do NOT interpret beyond what is written.
3. If information is missing → mark as NON-COMPLIANT.
4. If partially satisfied → mark as NON-COMPLIANT.
5. Only explicit written evidence is valid.
6. Quote exact sentence from document as evidence.
7. Do not provide explanation beyond required format.
Pointwise compliance response of all the points of the rules is to be given stricly in the folowwing format.
OUTPUT FORMAT (STRICT):
Rule heading: Heading of the rule
Status: COMPLIANT / NON-COMPLIANT
As per vendor: "<Exact quoted sentence from document>" OR "Not found in document"
COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>
MANUAL RULES:
{MANUAL_RULES}
"""
#with open(file, "rb") as f:
#uploaded_file = client.files.create(file=f, purpose="vision") # Fixed var name & method[web:27][web:34]
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": PROMPT}],
temperature=0,
max_tokens=1200
)
return response.choices[0].message.content # Fixed: access output text[web:32]