#CQgenerationOEMImport.py from openai import OpenAI # Core import for client[web:30][web:32] from openai import OpenAI from langchain_community.document_loaders import PyMuPDFLoader # pip install pymupdf[web:42] import os import re def normalize_text(s: str) -> str: """Normalize whitespace / newlines in page_content.""" s = s.replace("\r\n", "\n").replace("\r", "\n") s = s.replace("\t", " ") # collapse 3+ newlines to 2 s = re.sub(r"\n{3,}", "\n\n", s) # multiple spaces -> 1 s = re.sub(r"[ \u00A0]{2,}", " ", s) # strip return s.strip() def compliance_import_OEM(file: str, client, MANUAL_RULES): # Extract full PDF text (handles layout/tables well) loader = PyMuPDFLoader(file) docs = loader.load() for d in docs: d.page_content = normalize_text(d.page_content) doc_text = "\n\n".join(doc.page_content for doc in docs) # Flatten to string[cite:5] PROMPT = f""" Document content (complete extracted text): {doc_text[:16000]} # Truncate if needed for token limits You are a strict procurement compliance auditor. Consider the document content as vendor's offer. Your task is to check whether vendor's offer FULLY complies against each point of the MANUAL RULES. MANDATORY INSTRUCTIONS: 1. Do NOT assume anything. 2. Do NOT interpret beyond what is written. 3. If information is missing → mark as NON-COMPLIANT. 4. If partially satisfied → mark as NON-COMPLIANT. 5. Only explicit written evidence is valid. 6. Quote exact sentence from document as evidence. 7. Do not provide explanation beyond required format. Pointwise compliance response of all the points of the rules is to be given stricly in the folowwing format. OUTPUT FORMAT (STRICT): Rule heading: Heading of the rule Status: COMPLIANT / NON-COMPLIANT As per vendor: "" OR "Not found in document" COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning> MANUAL RULES: {MANUAL_RULES} """ #with open(file, "rb") as f: #uploaded_file = client.files.create(file=f, purpose="vision") # Fixed var name & method[web:27][web:34] response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": PROMPT}], temperature=0, max_tokens=1200 ) return response.choices[0].message.content # Fixed: access output text[web:32]