NRLCommercialAI-dev

Running

App Files Files Community

manabb commited on Feb 19

Commit

747816b

verified ·

1 Parent(s): f3a7fb6

Create manabCQgenetaion.py

Browse files

Files changed (1) hide show

manabCQgenetaion.py +74 -0

manabCQgenetaion.py ADDED Viewed

	@@ -0,0 +1,74 @@

+#manabCQgenetaion.py
+from openai import OpenAI  # Core import for client[web:30][web:32]
+from openai import OpenAI
+from langchain_community.document_loaders import PyMuPDFLoader  # pip install pymupdf[web:42]
+import os
+import re
+def normalize_text(s: str) -> str:
+    """Normalize whitespace / newlines in page_content."""
+    s = s.replace("\r\n", "\n").replace("\r", "\n")
+    s = s.replace("\t", " ")
+    # collapse 3+ newlines to 2
+    s = re.sub(r"\n{3,}", "\n\n", s)
+    # multiple spaces -> 1
+    s = re.sub(r"[ \u00A0]{2,}", " ", s)
+    # strip
+    return s.strip()
+def CQ_generation(file: str, client, MANUAL_RULES):
+    # Extract full PDF text (handles layout/tables well)
+    loader = PyMuPDFLoader(file)
+    docs = loader.load()
+    for d in docs:
+        d.page_content = normalize_text(d.page_content)
+    doc_text = "\n\n".join(doc.page_content for doc in docs)  # Flatten to string[cite:5]
+    PROMPT = f"""
+    Document content (complete extracted text):
+    {doc_text[:16000]}  # Truncate if needed for token limits
+    You are a strict procurement compliance auditor.
+    Your task is to check whether the uploaded file FULLY complies against each point of each heading of the MANUAL RULES.
+    MANDATORY INSTRUCTIONS:
+    1. Do NOT assume anything.
+    2. Do NOT interpret beyond what is written.
+    3. If information is missing → mark as NON-COMPLIANT.
+    4. If partially satisfied → mark as NON-COMPLIANT.
+    5. Only explicit written evidence is valid.
+    6. Quote exact sentence from document as evidence.
+    7. Do not provide explanation beyond required format.
+    8. Include E File No in the response.
+    Summarise the response only on rule headingwise and not pointwise mentioned under each heading.
+    OUTPUT FORMAT (STRICT):
+    Rule heading: Heading of the rule
+    Status: COMPLIANT / NON-COMPLIANT
+    Evidence: "<Exact quoted sentence from document>" OR "Not found in document"
+    Deviations: <short bullet-style description or 'None'>
+    COMPLIANCE ANALYSIS: <2–4 sentences explaining reasoning>
+    MANUAL RULES:
+    {MANUAL_RULES}
+    """
+    #with open(file, "rb") as f:
+        #uploaded_file = client.files.create(file=f, purpose="vision")  # Fixed var name & method[web:27][web:34]
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": PROMPT}],
+        temperature=0,
+        max_tokens=1200
+    )
+    return response.choices[0].message.content  # Fixed: access output text[web:32]