Spaces:

khaledsayed1
/

agentic_rag

Running

File size: 18,117 Bytes

import os
import gradio as gr
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.tools.retriever import create_retriever_tool
from langgraph.graph import MessagesState, StateGraph, START, END
from langchain.chat_models import init_chat_model
from langgraph.prebuilt import ToolNode, tools_condition
from pydantic import BaseModel, Field
from typing import Literal
from langchain_core.messages import HumanMessage, AIMessage
import logging



# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Get API key from Hugging Face Spaces secrets
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if GOOGLE_API_KEY:
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
else:
    logger.warning("GOOGLE_API_KEY not found in environment variables")

class LegalConsultingBot:
    def __init__(self):
        self.graph = None
        self.retriever_tool = None
        self.response_model = None
        self.grader_model = None
        self.initialize_bot()
    
    def initialize_bot(self):
        """Initialize the bot with error handling."""
        try:
            if not GOOGLE_API_KEY:
                logger.error("Google API key not available")
                return
                
            self.setup_models()
            self.setup_retriever()
            self.setup_workflow()
            logger.info("Bot initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize bot: {e}")
    
    def setup_models(self):
        """Initialize the language models."""
        try:
            self.response_model = init_chat_model(
                "gemini-2.0-flash", 
                model_provider="google_genai", 
                temperature=0
            )
            self.grader_model = init_chat_model(
                "gemini-2.0-flash", 
                model_provider="google_genai", 
                temperature=0
            )
            logger.info("Models initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize models: {e}")
            raise
    
    def setup_retriever(self):
        """Initialize the document retriever with legal consulting URLs."""
        urls = [
            "https://firststepslegal.co.uk/",
            "https://sprintlaw.co.uk/",
            "https://www.smbs.solutions/legal-and-compliance-resources-legal-assistance-for-businesses",
            "https://ignition.law/lawyers-for-smes/",
            "https://www.cocredo.co.uk/news/free-legal-advice-small-business-owners",
            "https://www.gannons.co.uk/sectors/smes/",
            "https://kkbservices.com/who-we-work-with/small-businesses/",
            "https://farringfordlegal.co.uk/",
            "https://stanislawlegal.com/en/legal-solutions/for-sme-companies/",
            "https://www.lawhive.co.uk/small-business/",
            "https://www.catalystlaw.co.uk/business-legal-advice.html",
            "https://medium.com/@kmitsme123/legal-consulting-tips-for-your-small-business-9075005eb574",
            "https://smecomply.co.uk/",
            "https://dojobusiness.com/blogs/news/legal-consultant-complete-guide",
            "https://englishlegaladvice.com/",
        ]
        
        try:
            # Load documents with error handling
            docs = []
            successful_loads = 0
            
            for url in urls:
                try:
                    loader = WebBaseLoader(url)
                    docs.extend(loader.load())
                    successful_loads += 1
                    logger.info(f"Successfully loaded: {url}")
                except Exception as e:
                    logger.warning(f"Failed to load {url}: {e}")
                    continue
            
            logger.info(f"Successfully loaded {successful_loads}/{len(urls)} URLs")
            
            if not docs:
                logger.warning("No documents could be loaded, using fallback mode")
                return
            
            # Split documents
            text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
                chunk_size=300, chunk_overlap=50
            )
            doc_splits = text_splitter.split_documents(docs)
            logger.info(f"Created {len(doc_splits)} document chunks")
            
            # Create embeddings and vectorstore
            embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
            vectorstore = InMemoryVectorStore.from_documents(
                documents=doc_splits, 
                embedding=embeddings
            )
            
            # Create retriever tool
            retriever = vectorstore.as_retriever()
            self.retriever_tool = create_retriever_tool(
                retriever,
                name="legal_consulting_retriever",
                description="Search and return relevant legal information and consulting resources for small and medium-sized businesses."
            )
            logger.info("Retriever tool created successfully")
            
        except Exception as e:
            logger.error(f"Error setting up retriever: {e}")
            self.retriever_tool = None
    
    def setup_workflow(self):
        """Set up the LangGraph workflow."""
        try:
            if not self.response_model:
                logger.error("Response model not available")
                return
                
            # Create workflow
            workflow = StateGraph(MessagesState)
            
            # Add nodes
            workflow.add_node("generate_query_or_respond", self.generate_query_or_respond)
            if self.retriever_tool:
                workflow.add_node("retrieve", ToolNode([self.retriever_tool]))
                workflow.add_node("grade_documents", self.grade_documents_node)
            workflow.add_node("rewrite_question", self.rewrite_question)
            workflow.add_node("generate_answer", self.generate_answer)
            
            # Add edges
            workflow.add_edge(START, "generate_query_or_respond")
            
            if self.retriever_tool:
                workflow.add_conditional_edges(
                    "generate_query_or_respond",
                    tools_condition,
                    {
                        "tools": "retrieve",
                        END: END,
                    },
                )
                workflow.add_edge("retrieve", "grade_documents")
                workflow.add_conditional_edges(
                    "grade_documents",
                    lambda x: x.get("grade_result", "generate_answer"),
                    {
                        "generate_answer": "generate_answer",
                        "rewrite_question": "rewrite_question"
                    }
                )
            else:
                workflow.add_edge("generate_query_or_respond", END)
            
            workflow.add_edge("generate_answer", END)
            workflow.add_edge("rewrite_question", "generate_query_or_respond")
            
            self.graph = workflow.compile()
            logger.info("Workflow compiled successfully")
            
        except Exception as e:
            logger.error(f"Error setting up workflow: {e}")
            self.graph = None
    
    def generate_query_or_respond(self, state: MessagesState):
        """Generate query or respond directly."""
        try:
            if not self.retriever_tool:
                # Fallback response when retriever is not available
                fallback_response = """I'm a legal consulting assistant for small and medium enterprises. While my document retriever is currently unavailable, I can still help answer general questions about:

- Business formation and structure
- Contract basics and employment law
- Intellectual property fundamentals  
- Compliance and regulatory matters
- General legal considerations for SMEs

Please note: This is general information only, not legal advice. Always consult qualified legal professionals for specific matters."""
                return {"messages": [AIMessage(content=fallback_response)]}
            
            response = (
                self.response_model
                .bind_tools([self.retriever_tool])
                .invoke(state["messages"])
            )
            return {"messages": [response]}
        except Exception as e:
            logger.error(f"Error in generate_query_or_respond: {e}")
            return {"messages": [AIMessage(content="I'm sorry, I encountered an error. Please try again.")]}
    
    class GradeDocuments(BaseModel):
        """Grade documents using a binary score for relevance check."""
        binary_score: str = Field(
            description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
        )
    
    def grade_documents_node(self, state: MessagesState):
        """Node wrapper for document grading."""
        try:
            question = state["messages"][0].content
            context = state["messages"][-1].content if state["messages"] else ""
            
            GRADE_PROMPT = (
                "You are a grader assessing relevance of a retrieved document to a user question. \n "
                "Here is the retrieved document: \n\n {context} \n\n"
                "Here is the user question: {question} \n"
                "If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
                "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
            )
            
            prompt = GRADE_PROMPT.format(question=question, context=context)
            response = (
                self.grader_model
                .with_structured_output(self.GradeDocuments)
                .invoke([{"role": "user", "content": prompt}])
            )
            
            grade_result = "generate_answer" if response.binary_score == "yes" else "rewrite_question"
            return {"grade_result": grade_result}
        except Exception as e:
            logger.error(f"Error in grade_documents_node: {e}")
            return {"grade_result": "generate_answer"}  # Default to generating answer
    
    def rewrite_question(self, state: MessagesState):
        """Rewrite the original user question."""
        try:
            messages = state["messages"]
            question = messages[0].content if messages else ""
            
            REWRITE_PROMPT = (
                "Look at the input and try to reason about the underlying semantic intent / meaning.\n"
                "Here is the initial question:"
                "\n ------- \n"
                "{question}"
                "\n ------- \n"
                "Formulate an improved question that would be better for searching legal consulting information:"
            )
            
            prompt = REWRITE_PROMPT.format(question=question)
            response = self.response_model.invoke([{"role": "user", "content": prompt}])
            return {"messages": [HumanMessage(content=response.content)]}
        except Exception as e:
            logger.error(f"Error in rewrite_question: {e}")
            # Return original message if rewriting fails
            return {"messages": state["messages"][:1] if state["messages"] else []}
    
    def generate_answer(self, state: MessagesState):
        """Generate an answer based on retrieved context."""
        try:
            question = state["messages"][0].content if state["messages"] else ""
            context = state["messages"][-1].content if len(state["messages"]) > 1 else ""
            
            GENERATE_PROMPT = (
                "You are an assistant for question-answering tasks about legal consulting for small and medium businesses. "
                "Use the following pieces of retrieved context to answer the question. "
                "If you don't know the answer, just say that you don't know. "
                "Keep the answer concise but informative. Always remind users that this is general information and not legal advice.\n"
                "Question: {question} \n"
                "Context: {context}"
            )
            
            prompt = GENERATE_PROMPT.format(question=question, context=context)
            response = self.response_model.invoke([{"role": "user", "content": prompt}])
            return {"messages": [response]}
        except Exception as e:
            logger.error(f"Error in generate_answer: {e}")
            return {"messages": [AIMessage(content="I'm sorry, I encountered an error while generating the answer. Please try again.")]}
    
    def chat(self, message: str, history: list) -> str:
        """Main chat function for Gradio interface."""
        try:
            if not message or not message.strip():
                return "Please enter a question."
            
            if not self.response_model:
                return "Sorry, the system is not properly initialized. Please check if the API key is configured correctly."
            
            # For simple cases without retriever, provide direct response
            if not self.graph:
                prompt = f"""You are a helpful assistant specializing in legal consulting for small and medium enterprises. 
                Answer this question: {message}
                
                Always remind users that this is general information only and not professional legal advice."""
                
                response = self.response_model.invoke([{"role": "user", "content": prompt}])
                return response.content if hasattr(response, 'content') else str(response)
            
            # Create initial state
            initial_state = {"messages": [HumanMessage(content=message)]}
            
            # Run the graph
            result = self.graph.invoke(initial_state)
            
            # Extract the final response
            if result and "messages" in result and result["messages"]:
                final_message = result["messages"][-1]
                if hasattr(final_message, 'content'):
                    return final_message.content
                else:
                    return str(final_message)
            else:
                return "I'm sorry, I couldn't generate a response. Please try again."
                
        except Exception as e:
            logger.error(f"Error in chat function: {e}")
            return f"An error occurred while processing your request. Please try again."

# Initialize the bot
logger.info("Initializing Legal Consulting Bot...")
bot = LegalConsultingBot()

# Create Gradio interface
def create_interface():
    with gr.Blocks(
        title="Legal Consulting Assistant for SMEs",
        theme=gr.themes.Soft(),
        css="""
        .container {
            max-width: 800px;
            margin: auto;
        }
        """
    ) as demo:
        gr.Markdown("""
        # 🏢 Legal Consulting Assistant for Small & Medium Enterprises
        
        Get informed answers about legal consulting, compliance, and business law for SMEs. 
        This assistant uses information from various legal consulting websites to provide relevant guidance.
        
        **⚠️ Important**: This provides general information only and does not constitute professional legal advice.
        """)
        
        with gr.Row():
            with gr.Column(scale=4):
                chatbot = gr.Chatbot(
                    height=500,
                    placeholder="💬 Ask me about legal consulting for your business...",
                    avatar_images=("👤", "🤖"),
                    bubble_full_width=False
                )
                
                with gr.Row():
                    msg = gr.Textbox(
                        placeholder="e.g., What legal structure should I choose for my startup?",
                        label="Your Question",
                        scale=4
                    )
                    submit_btn = gr.Button("Send", variant="primary", scale=1)
                
                clear = gr.Button("🗑️ Clear Chat", variant="secondary")
        
        def respond(message, chat_history):
            if not message.strip():
                return "", chat_history
            
            # Show typing indicator
            chat_history.append((message, "🤔 Thinking..."))
            yield "", chat_history
            
            # Get bot response
            bot_message = bot.chat(message, chat_history)
            chat_history[-1] = (message, bot_message)
            yield "", chat_history
        
        def clear_chat():
            return []
        
        # Event handlers
        msg.submit(respond, [msg, chatbot], [msg, chatbot])
        submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
        clear.click(clear_chat, outputs=[chatbot])
        
        gr.Markdown("""
        ### 📋 Example Questions
        - *What legal structure should I choose for my small business?*
        - *What compliance requirements do SMEs need to consider?*
        - *How can I protect my business's intellectual property?*
        - *What should be included in employment contracts?*
        
        ### ⚖️ Disclaimer
        This chatbot provides general information about legal consulting for SMEs based on publicly available resources. 
        **This information should not be considered as professional legal advice.** 
        Always consult with qualified legal professionals for specific legal matters and before making important business decisions.
        """)
    
    return demo

# Create and launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )