Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import torch | |
| class LLMAnalyser_BIG: | |
| """ | |
| A wrapper for a Hugging Face language model to analyse documents. | |
| """ | |
| def __init__(self, model_name: str): | |
| """ | |
| Initialises the analyser by loading the specified model. | |
| """ | |
| print(f"Initialising LLM Analyser ({model_name})...") | |
| try: | |
| self.llm_pipeline = pipeline( | |
| "text-generation", | |
| model=model_name, | |
| dtype=torch.bfloat16, | |
| ) | |
| print(f"Model '{model_name}' loaded successfully.") | |
| except Exception as e: | |
| print(f"Failed to load model. Error: {e}") | |
| self.llm_pipeline = None | |
| def _construct_prompt(self, stakeholder_inputs_text: str, criteria_text: str) -> str: | |
| """ | |
| Creates a detailed, structured prompt for the language model to analyse multiple stakeholder documents. | |
| """ | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are an expert business analyst for a major bank. Your task is to analyse documents from different stakeholders regarding a new product. You must first summarize the key points from each stakeholder and then evaluate how the collective information aligns with a given set of business and regulatory criteria." | |
| }, | |
| { | |
| "role": "user", | |
| "content": f""" | |
| **STAKEHOLDER DOCUMENTS:** | |
| --- | |
| {stakeholder_inputs_text} | |
| --- | |
| **ANALYSIS CRITERIA:** | |
| --- | |
| {criteria_text} | |
| --- | |
| **YOUR TASK:** | |
| Carefully review all the stakeholder documents and the analysis criteria. Structure your response in three parts: | |
| 1. **Executive Summary:** Provide a high-level overview of the new product initiative, combining the key information from all stakeholders. | |
| 2. **Stakeholder Input Summary:** For each stakeholder document provided (e.g., Legal, Finance, Marketing), create a bulleted list summarizing their main points, concerns, and recommendations. Use the document's filename as the title for each section. Please make sure to include if a unit has approved or disapproved the product. | |
| 3 **Stakeholder final verdict:** For each stakeholder document provided (e.g., Legal, Finance, Marketing), write either APPROVED or REJECTED based on their assessment. | |
| 3. **Criteria Fulfillment Analysis:** For each point in the **ANALYSIS CRITERIA**, state whether the provided documents fulfill it, partially fulfill it, or do not fulfill it. Justify your assessment with specific evidence from the stakeholder documents and identify any gaps or conflicts between stakeholders. | |
| """ | |
| } | |
| ] | |
| prompt = self.llm_pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| return prompt | |
| def analyse_document(self, document_text: str, regulation_text: str) -> str: | |
| """ | |
| Analyses the document text against the regulation text using the LLM. | |
| Returns: | |
| A string containing the model's analysis and suggestions. | |
| """ | |
| if not self.llm_pipeline: | |
| return "LLM pipeline is not available. Cannot perform analysis." | |
| if not document_text or not regulation_text: | |
| return "Error: Document text or regulation text is empty." | |
| prompt = self._construct_prompt(document_text, regulation_text) | |
| print("Sending request to the language model... (This may take a moment)") | |
| try: | |
| results = self.llm_pipeline( | |
| prompt, | |
| max_new_tokens=1024, # Increased from 512 to allow for more detailed analysis | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| # The output now contains the full text (prompt + generation). | |
| # We only want the generated part. | |
| generated_text = results[0]['generated_text'] | |
| # The generated text starts after the prompt ends. | |
| return generated_text[len(prompt):] | |
| except Exception as e: | |
| return f"An error occurred during model inference: {e}" |