manabb commited on
Commit
c7c6dab
·
verified ·
1 Parent(s): a8ce881

Create CQgenerationOEMImport.py

Browse files
Files changed (1) hide show
  1. CQgenerationOEMImport.py +75 -0
CQgenerationOEMImport.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #CQgenerationOEMImport.py
2
+
3
+ from openai import OpenAI # Core import for client[web:30][web:32]
4
+ from openai import OpenAI
5
+ from langchain_community.document_loaders import PyMuPDFLoader # pip install pymupdf[web:42]
6
+ import os
7
+ import re
8
+
9
+ def normalize_text(s: str) -> str:
10
+ """Normalize whitespace / newlines in page_content."""
11
+ s = s.replace("\r\n", "\n").replace("\r", "\n")
12
+ s = s.replace("\t", " ")
13
+
14
+ # collapse 3+ newlines to 2
15
+ s = re.sub(r"\n{3,}", "\n\n", s)
16
+
17
+ # multiple spaces -> 1
18
+ s = re.sub(r"[ \u00A0]{2,}", " ", s)
19
+
20
+ # strip
21
+ return s.strip()
22
+
23
+ def compliance_tech(file: str, client, MANUAL_RULES):
24
+ # Extract full PDF text (handles layout/tables well)
25
+ loader = PyMuPDFLoader(file)
26
+ docs = loader.load()
27
+ for d in docs:
28
+ d.page_content = normalize_text(d.page_content)
29
+ doc_text = "\n\n".join(doc.page_content for doc in docs) # Flatten to string[cite:5]
30
+ PROMPT = f"""
31
+ Document content (complete extracted text):
32
+
33
+ {doc_text[:16000]} # Truncate if needed for token limits
34
+
35
+ You are a strict procurement compliance auditor.
36
+
37
+ Consider the document content as vendor's offer.
38
+
39
+ Your task is to check whether vendor's offer FULLY complies against each point of the MANUAL RULES.
40
+
41
+ MANDATORY INSTRUCTIONS:
42
+
43
+ 1. Do NOT assume anything.
44
+ 2. Do NOT interpret beyond what is written.
45
+ 3. If information is missing → mark as NON-COMPLIANT.
46
+ 4. If partially satisfied → mark as NON-COMPLIANT.
47
+ 5. Only explicit written evidence is valid.
48
+ 6. Quote exact sentence from document as evidence.
49
+ 7. Do not provide explanation beyond required format.
50
+
51
+ Pointwise compliance response of all the points of the rules is to be given stricly in the folowwing format.
52
+
53
+ OUTPUT FORMAT (STRICT):
54
+
55
+ Rule heading: Heading of the rule
56
+ Status: COMPLIANT / NON-COMPLIANT
57
+ As per vendor: "<Exact quoted sentence from document>" OR "Not found in document"
58
+ COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>
59
+
60
+ MANUAL RULES:
61
+ {MANUAL_RULES}
62
+ """
63
+
64
+ #with open(file, "rb") as f:
65
+ #uploaded_file = client.files.create(file=f, purpose="vision") # Fixed var name & method[web:27][web:34]
66
+
67
+ response = client.chat.completions.create(
68
+ model="gpt-4o-mini",
69
+ messages=[{"role": "user", "content": PROMPT}],
70
+ temperature=0,
71
+ max_tokens=1200
72
+ )
73
+
74
+ return response.choices[0].message.content # Fixed: access output text[web:32]
75
+