manabb commited on
Commit
747816b
·
verified ·
1 Parent(s): f3a7fb6

Create manabCQgenetaion.py

Browse files
Files changed (1) hide show
  1. manabCQgenetaion.py +74 -0
manabCQgenetaion.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #manabCQgenetaion.py
2
+
3
+ from openai import OpenAI # Core import for client[web:30][web:32]
4
+ from openai import OpenAI
5
+ from langchain_community.document_loaders import PyMuPDFLoader # pip install pymupdf[web:42]
6
+ import os
7
+ import re
8
+
9
+ def normalize_text(s: str) -> str:
10
+ """Normalize whitespace / newlines in page_content."""
11
+ s = s.replace("\r\n", "\n").replace("\r", "\n")
12
+ s = s.replace("\t", " ")
13
+
14
+ # collapse 3+ newlines to 2
15
+ s = re.sub(r"\n{3,}", "\n\n", s)
16
+
17
+ # multiple spaces -> 1
18
+ s = re.sub(r"[ \u00A0]{2,}", " ", s)
19
+
20
+ # strip
21
+ return s.strip()
22
+
23
+ def CQ_generation(file: str, client, MANUAL_RULES):
24
+ # Extract full PDF text (handles layout/tables well)
25
+ loader = PyMuPDFLoader(file)
26
+ docs = loader.load()
27
+ for d in docs:
28
+ d.page_content = normalize_text(d.page_content)
29
+ doc_text = "\n\n".join(doc.page_content for doc in docs) # Flatten to string[cite:5]
30
+ PROMPT = f"""
31
+ Document content (complete extracted text):
32
+
33
+ {doc_text[:16000]} # Truncate if needed for token limits
34
+
35
+ You are a strict procurement compliance auditor.
36
+
37
+ Your task is to check whether the uploaded file FULLY complies against each point of each heading of the MANUAL RULES.
38
+
39
+ MANDATORY INSTRUCTIONS:
40
+
41
+ 1. Do NOT assume anything.
42
+ 2. Do NOT interpret beyond what is written.
43
+ 3. If information is missing → mark as NON-COMPLIANT.
44
+ 4. If partially satisfied → mark as NON-COMPLIANT.
45
+ 5. Only explicit written evidence is valid.
46
+ 6. Quote exact sentence from document as evidence.
47
+ 7. Do not provide explanation beyond required format.
48
+ 8. Include E File No in the response.
49
+
50
+ Summarise the response only on rule headingwise and not pointwise mentioned under each heading.
51
+
52
+ OUTPUT FORMAT (STRICT):
53
+
54
+ Rule heading: Heading of the rule
55
+ Status: COMPLIANT / NON-COMPLIANT
56
+ Evidence: "<Exact quoted sentence from document>" OR "Not found in document"
57
+ Deviations: <short bullet-style description or 'None'>
58
+ COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>
59
+
60
+ MANUAL RULES:
61
+ {MANUAL_RULES}
62
+ """
63
+
64
+ #with open(file, "rb") as f:
65
+ #uploaded_file = client.files.create(file=f, purpose="vision") # Fixed var name & method[web:27][web:34]
66
+
67
+ response = client.chat.completions.create(
68
+ model="gpt-4o-mini",
69
+ messages=[{"role": "user", "content": PROMPT}],
70
+ temperature=0,
71
+ max_tokens=1200
72
+ )
73
+
74
+ return response.choices[0].message.content # Fixed: access output text[web:32]