Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """app.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/14T27f82OgH2BZgVkanyyUKMrM1KBBJjM | |
| """ | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| import os | |
| from langchain_community.llms import Together | |
| os.environ["TOGETHER_API_KEY"] = "d94547f9415b99f85bfaa0de7bc43476f3450985ac6bf1ccab9942448293c00e" | |
| import fitz # PyMuPDF | |
| def extract_and_split_pdf(pdf_path, split_key="ENDOFTUT"): | |
| combined_list = [] | |
| # Open the PDF file | |
| document = fitz.open(pdf_path) | |
| # Extract text from each page | |
| all_text = "" | |
| for page_num in range(document.page_count): | |
| page = document.load_page(page_num) | |
| all_text += page.get_text() | |
| # Split the text by the key | |
| combined_list = all_text.split(split_key) | |
| return combined_list | |
| # Example usage | |
| pdf_path = "Mech-chunks.pdf" | |
| combined_list = extract_and_split_pdf(pdf_path) | |
| #take json file and make each q&a in single cell in the list | |
| import json | |
| # Load the JSON file | |
| with open('output_data (1).json', 'r') as file: | |
| data = json.load(file) | |
| for item in data: | |
| question = item.get("question", "") | |
| answer = item.get("answer", "") | |
| combined_list.append(f"Q: {question}?\nA: {answer}\n") | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| db = FAISS.from_texts(combined_list, | |
| HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) | |
| retriever = db.as_retriever( | |
| search_type="similarity", | |
| search_kwargs={'k': 4} | |
| ) | |
| # Manually test retrieval to verify correct chunk fetching | |
| query = "what are the tools needed to change Brake Discs on a Car?" | |
| retrieved_docs = db.similarity_search(query, k=5) | |
| # Print out the content of retrieved documents to verify | |
| for idx, doc in enumerate(retrieved_docs): | |
| print(f"Document {idx + 1}:", doc.page_content) | |
| llm = Together( | |
| model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", | |
| max_tokens=512, | |
| temperature=0.2, | |
| #top_p=0.7, | |
| #top_k=50, | |
| repetition_penalty=1.1, | |
| ) | |
| Llama_guard = Together( | |
| model="meta-llama/Meta-Llama-Guard-3-8B", | |
| max_tokens=10, | |
| temperature=0.2, | |
| #top_p=0.7, | |
| #top_k=50, | |
| repetition_penalty=1.1, | |
| ) | |
| guard_temp_ans = """ | |
| <|system|> | |
| Task: Check if there is unsafe content in conversations according our safety policy with the below categories. | |
| <BEGIN UNSAFE CONTENT CATEGORIES> | |
| 1: Violence and Hate. | |
| Should not | |
| - Help people plan or engage in violence. | |
| - Encourage people to engage in violence. | |
| - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. | |
| - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. | |
| - Provide information that can be used to promote or engage in violence, hate, or discrimination. | |
| - Provide guidance or instructions on how to engage in violent or hateful behavior, except if related to 3D and Blender creation for artistic purposes. | |
| 2: Sexual Content. | |
| Should not | |
| - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. | |
| - Create or provide explicit depictions of sexual acts or nudity. | |
| - Provide information that could be used to create sexually explicit content | |
| 3: Criminal Planning. | |
| Should not | |
| - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. | |
| - Encourage people to engage in criminal activities or condone criminal behavior. | |
| - Provide detailed instructions or information that could be used to plan or execute criminal activities, except if related to 3D and Blender creation for artistic purposes. | |
| 4: Guns and Illegal Weapons. | |
| Should not | |
| - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. | |
| - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. | |
| - Provide information that could be used to create or use illegal weapons, except if related to 3D and Blender creation for artistic purposes. | |
| 5: Regulated or Controlled Substances. | |
| Should not | |
| - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. | |
| - Assist or encourage people to create such substances when it is illegal to do so. | |
| - Provide information that could be used to create or use regulated or controlled substances, except if related to 3D and Blender creation for artistic purposes. | |
| 6: Self-Harm. | |
| Should not | |
| - Encourage people to harm themselves. | |
| - Romanticize or condone self-harm or suicide. | |
| - Provide information regarding the methods of suicide or self-harm. | |
| - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. | |
| - Provide information that could be used to promote or engage in self-harm, except if related to 3D and Blender creation for artistic purposes. | |
| <END UNSAFE CONTENT CATEGORIES> | |
| <BEGIN CONVERSATION> | |
| {answer} | |
| <END CONVERSATION> | |
| Provide your safety assessment for {answer} in the above conversation: | |
| - First line must read 'safe' or 'unsafe'. | |
| - If unsafe, do not include a second line. | |
| """ | |
| #Removed extra space before the line below to fix the indentation error | |
| prompt_ans_guard = ChatPromptTemplate.from_template(guard_temp_ans) | |
| guard_chain = ( | |
| prompt_ans_guard | |
| | Llama_guard | |
| ) | |
| def moderate(chat): | |
| input_ids = Llama_guard.apply_chat_template(chat, return_tensors="pt").to(device) | |
| output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0) | |
| prompt_len = input_ids.shape[-1] | |
| return Llama_guard.decode(output[0][prompt_len:], skip_special_tokens=True) | |
| # Define the prompt template | |
| prompt_template = PromptTemplate( | |
| input_variables=["context", "question", "history"], | |
| template=(""" | |
| You are a mechanic assistant and your name is MechBot, these human will ask you questions about Cars, | |
| use Use following piece of context and chat history to answer the question. | |
| If you don't know the answer, just say you don't know. | |
| If the question is start with how to, answer with steps and mention the tools if you know it. | |
| Chat History: ({history}) | |
| Context: ({context}) | |
| Question: {question} | |
| Answer: | |
| """ | |
| ) | |
| ) | |
| llm_chain = prompt_template | llm | StrOutputParser() | |
| def answer_question(question,gh): | |
| global counter | |
| global history | |
| global reter | |
| if "unsafe" in guard_chain.invoke({"answer":question}): | |
| return "I'm sorry, but I can't respond to that question as it may contain inappropriate content." | |
| reter = "" | |
| retrieved_docs = db.similarity_search(question, k=2) # Consider reducing 'k' if context is too large | |
| for doc in retrieved_docs: | |
| reter += doc.page_content + "\n" | |
| #Truncate history if it's too long | |
| if len(history) > 3000: # Adjust this value as needed | |
| history = history[-2000:] | |
| formatted_prompt = prompt_template.format(context=reter, history=history, question=question) | |
| print("Formatted Prompt:") | |
| print(formatted_prompt) | |
| answer = llm_chain.invoke({"context": reter,"history": history, "question": question}) | |
| history += "\n" + "user question: " + question + "\n" + "AI answer: " + answer | |
| #print(reter) | |
| counter += 1 | |
| return answer | |
| import gradio as gr | |
| history = "" | |
| counter = 1 | |
| # Create the Chat interface | |
| iface = gr.ChatInterface( | |
| answer_question, # Use the improved answer_question function | |
| title="Mech-bot: Your Car Mechanic Assistant", | |
| description="Ask any car mechanic-related questions, and Mech-bot will try its best to assist you.", | |
| submit_btn="Ask", | |
| clear_btn="Clear Chat" | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch(debug=True) |