Spaces:

dntwaritag
/

Legal_Assistant_Bot

Runtime error

App Files Files Community

dntwaritag commited on Jan 22, 2025

Commit

ec2e940

verified ·

1 Parent(s): fe62881

Update app.py

Browse files

Files changed (1) hide show

app.py +233 -48

app.py CHANGED Viewed

@@ -1,64 +1,249 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
 """
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import os
+# Get the secret key from the environment
+groq_api_key = os.environ.get('legai')
+## LLM used for RAG
+from langchain_groq import ChatGroq
+llm = ChatGroq(model="llama-3.1-70b-versatile",api_key=groq_api_key )
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain.output_parsers import ResponseSchema, StructuredOutputParser
+import PyPDF2
+# Initialize required components
+TEMPLATE = """
+You are a helpful agent. Your task is to generate a meaningful question and an answer using the following provided "{context}"
+You MUST obey the following criteria:
+- No preamble.
+- Restrict the question to the context information provided and provide answer with its details in summary.
+- Do NOT create a question that cannot be answered from the context.
+- Phrase the question so that it does NOT refer to specific context.
+- For instance, do NOT use phrases like 'given the provided context' or 'in this work' in the question or 'according to the text' in the answer because if the question is asked elsewhere it would not be provided specific context. Replace these terms with specific details.
+- Please do NOT repeat the provided context.
+- Please Only generate a question and an answer without any sentence in advance such as "Here is the generated question and answer:".
+- Please follow the JSON recommended format below.
+- Please ensure that the output is a valid JSON object.
+{format_instructions}
 """
+prompt = ChatPromptTemplate.from_template(template=TEMPLATE)
+response_schemas = [
+    {"name": "Question", "description": "The generated question from the provided context"},
+    {"name": "Answer", "description": "The corresponding answer from the provided context"}
+]
+output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+format_instructions = output_parser.get_format_instructions(only_json=True)
+# Folder containing PDF files
+folder_path = "./"
+# List to store questions and answers as tuples
+data = []
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_path):
+    with open(pdf_path, "rb") as file:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text
+# Process each PDF in the folder
+for filename in os.listdir(folder_path):
+    if filename.endswith(".pdf"):
+        pdf_path = os.path.join(folder_path, filename)
+        try:
+            # Extract text from the PDF
+            context = extract_text_from_pdf(pdf_path)
+            # Split context into manageable chunks (optional)
+            chunks = [context[i:i+200] for i in range(0, len(context), 200)]
+            for chunk in chunks:
+                # Format the messages
+                messages = prompt.format_messages(context=chunk, format_instructions=format_instructions)
+                # Invoke the LLM
+                response = llm.invoke(messages)
+                # Parse the response
+                output_dict = output_parser.parse(response.content)
+                # Extract question and answer
+                question = output_dict["Question"]
+                answer = output_dict["Answer"]
+                # Append question and answer as a tuple to the list
+                data.append((question, answer))
+        except Exception as e:
+            print(f"Error processing file {filename}: {e}")
+import PyPDF2
+# Function to extract text from a PDF
+def extract_text_from_pdf(pdf_path):
+    with open(pdf_path, 'rb') as file:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text
+# Function to chunk text into pieces of max_length
+def chunk_text(text, max_length=500):
+    return [text[i:i + max_length] for i in range(0, len(text), max_length)]
+# Specify the path to the PDF file
+pdf_path = "./LAW Nº 59 ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES.pdf"
+# List to hold context data
+context_data = []
+try:
+    # Extract text from the PDF
+    pdf_text = extract_text_from_pdf(pdf_path)
+    if pdf_text:
+        # Create chunks of 500 characters
+        chunks = chunk_text(pdf_text, max_length=500)
+        # Add each chunk to context_data list as plain strings
+        context_data = []  # Initialize the list
+        for chunk in chunks:
+            context_data.append(chunk)  # Save each chunk as a string
+        # Print the context_data list
+        for entry in context_data:
+            print(entry)
+            print("-" * 40)  # Separator for readability
+    else:
+        print("No text found in the PDF.")
+except Exception as e:
+    print(f"Error reading the PDF: {e}")
+context_data.extend(data)
+processed_texts = []
+for element in context_data:
+    if isinstance(element, tuple):
+        question, answer = element
+        processed_texts.append(f"Question: {question} Answer: {answer}")
+    elif isinstance(element, str):
+        processed_texts.append(element)
+    else:
+        processed_texts.append(str(element))
+## Embedding model!
+from langchain_huggingface import HuggingFaceEmbeddings
+embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
+# create vector store!
+from langchain_chroma import Chroma
+vectorstore = Chroma(
+    collection_name="laws_dataset",  # Changed the name to be compliant
+    embedding_function=embed_model,
+    persist_directory="./",
+)
+vectorstore.get().keys()
+# add data to vector nstore
+vectorstore.add_texts(processed_texts)
+from langchain_core.prompts import PromptTemplate
+template = ("""Hello there! I'm your legal expert.
+    Let's dive right in. Feel free to ask your question.
+    You need to provide clear and accurate legal advice based on the context provided.
+    If the context isn't relevant or doesn't provide enough information, suggest how to proceed.
+    Stick to the answer directly.
+    If the message or query is greetings do the same.
+    Keep things professional but easy to understand, explaining everything in detail.
+    Legal Context: {context}
+    Question: {question}
+    Legal Advice:""")
+rag_prompt = PromptTemplate.from_template(template)
+retriever = vectorstore.as_retriever()
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+rag_chain = (
+    {"context": retriever, "question": RunnablePassthrough()}
+    | rag_prompt
+    | llm
+    | StrOutputParser()
+)
+import gradio as gr
+def rag_memory_stream(message, history):
+    partial_text = ""
+    for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
+        partial_text += new_text
+        yield partial_text
+# Correctly define examples as a list
+examples =[
+    "What is the main purpose of Law Nº 59/2018 of 22/8/2018?",
+    "What happens to a person who deliberately conceals or destroys evidence related to genocide?",
+    "What are the penalties for violating a specific article?"
+]
+description = (
+    "This Regal AI Assistance specializes in LAW Nº 59/2018 OF 22/8/2018 "
+    "ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES."
+)
+title = "⚖️ Chat with me and learn Laws! ⚖️"
+# Custom CSS for styling the interface
+custom_css = """
+body {
+    font-family: "Times New Roman", serif;
+}
+.gradio-container {
+    font-family: "Times New Roman", serif;
+}
+.gr-button {
+    background-color: #007bff; /* Blue button */
+    color: white;
+    border: none;
+    border-radius: 5px;
+    font-size: 16px;
+    padding: 10px 20px;
+    cursor: pointer;
+}
+.gr-textbox:focus, .gr-button:focus {
+    outline: none; /* Remove outline focus for a cleaner look */
+}
 """
+# Create the Chat Interface
 demo = gr.ChatInterface(
+    fn=rag_memory_stream,
+    type="messages",
+    title=title,
+    description=description,
+    fill_height=True,
+    examples=examples,  # Pass the corrected examples list
+    theme="soft",
+    #css=custom_css,  # Apply the custom CSS
 )
 if __name__ == "__main__":
+    demo.launch(share=True, inbrowser=True, height=800, width="100%")