Spaces:
Sleeping
Sleeping
| # import streamlit as st | |
| # from langchain_core.messages import HumanMessage, AIMessage, SystemMessage | |
| # from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate | |
| # import os | |
| # import nltk | |
| # import io | |
| # import fitz | |
| # nltk.download("punkt") | |
| # st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper') | |
| # st.header("AI Chatbot :robot_face:") | |
| # os.environ["GOOGLE_API_KEY"] = os.getenv("k4") | |
| # # Creating a template | |
| # chat_template = ChatPromptTemplate.from_messages([ | |
| # # System Message establishes bot's role and general behavior guidelines | |
| # SystemMessage(content="""You are a Helpful AI Bot. | |
| # You take the context and question from user. Your answer should be based on the specific context."""), | |
| # # Human Message Prompt Template | |
| # HumanMessagePromptTemplate.from_template("""Answer the question based on the given context. | |
| # Context: | |
| # {context} | |
| # Question: | |
| # {question} | |
| # Answer: """) | |
| # ]) | |
| # #user's question. | |
| # #how many results we want to print. | |
| # from langchain_google_genai import ChatGoogleGenerativeAI | |
| # chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest") | |
| # from langchain_core.output_parsers import StrOutputParser | |
| # output_parser = StrOutputParser() | |
| # chain = chat_template | chat_model | output_parser | |
| # from langchain_community.document_loaders import PDFMinerLoader | |
| # from langchain_text_splitters import NLTKTextSplitter | |
| # from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| # from langchain_community.vectorstores import Chroma | |
| # from langchain_core.runnables import RunnablePassthrough | |
| # def extract_text_from_pdf(pdf_file): | |
| # document = fitz.open(stream=pdf_file, filetype="pdf") | |
| # text = "" | |
| # for page_num in range(len(document)): | |
| # page = document.load_page(page_num) | |
| # text += page.get_text() | |
| # return text | |
| # uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf") | |
| # if uploaded_file is not None: | |
| # pdf_file = io.BytesIO(uploaded_file.read()) | |
| # text = extract_text_from_pdf(pdf_file) | |
| # #pdf_loader = PDFMinerLoader(pdf_file) | |
| # #dat_nik = pdf_loader.load() | |
| # text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100) | |
| # chunks = text_splitter.split_documents([text]) | |
| # embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
| # db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1") | |
| # db.persist() | |
| # db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model) | |
| # retriever = db_connection.as_retriever(search_kwargs={"k": 5}) | |
| # def format_docs(docs): | |
| # return "\n\n".join(doc.page_content for doc in docs) | |
| # rag_chain = ( | |
| # {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| # | chat_template | |
| # | chat_model | |
| # | output_parser | |
| # ) | |
| # user_input = st.text_area("Ask Questions to AI") | |
| # if st.button("Submit"): | |
| # st.subheader(":green[Query:]") | |
| # st.subheader(user_input) | |
| # response = rag_chain.invoke(user_input) | |
| # st.subheader(":green[Response:-]") | |
| # st.write(response) | |
| ##################################################### chatgpt code model ############################################# | |
| import streamlit as st | |
| from langchain_core.messages import HumanMessage, AIMessage, SystemMessage | |
| from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate | |
| import os | |
| import nltk | |
| import io | |
| import fitz | |
| nltk.download("punkt") | |
| st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper') | |
| st.header("AI Chatbot :robot_face:") | |
| # Set up environment variables | |
| os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") | |
| # Creating a template | |
| chat_template = ChatPromptTemplate.from_messages([ | |
| SystemMessage(content="""You are a Helpful AI Bot. | |
| You take the context and question from user. Your answer should be based on the specific context."""), | |
| HumanMessagePromptTemplate.from_template("""Answer the question based on the given context. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: """) | |
| ]) | |
| # Initialize chat model | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest") | |
| # Initialize output parser | |
| from langchain_core.output_parsers import StrOutputParser | |
| output_parser = StrOutputParser() | |
| # Initialize the chain | |
| chain = chat_template | chat_model | output_parser | |
| # Initialize document loaders and splitters | |
| from langchain_community.document_loaders import PDFMinerLoader | |
| from langchain_text_splitters import NLTKTextSplitter | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_core.runnables import RunnablePassthrough | |
| def extract_text_from_pdf(pdf_file): | |
| document = fitz.open(stream=pdf_file, filetype="pdf") | |
| text = "" | |
| for page_num in range(len(document)): | |
| page = document.load_page(page_num) | |
| text += page.get_text() | |
| return text | |
| # Streamlit file uploader | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| # Extract text from the uploaded PDF | |
| pdf_file = io.BytesIO(uploaded_file.read()) | |
| text = extract_text_from_pdf(pdf_file) | |
| # Split the document into chunks | |
| text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100) | |
| chunks = text_splitter.split_documents([text]) | |
| # Initialize embeddings and vectorstore | |
| embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
| db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db") | |
| print(f"Current working directory: {os.getcwd()}") | |
| # Check if the 'static' directory exists | |
| if not os.path.exists('static'): | |
| print("'static' directory does not exist. Creating it...") | |
| os.makedirs('static') | |
| db.persist() | |
| db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model) | |
| retriever = db_connection.as_retriever(search_kwargs={"k": 5}) | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| rag_chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | chat_template | |
| | chat_model | |
| | output_parser | |
| ) | |
| user_input = st.text_area("Ask Questions to AI") | |
| if st.button("Submit"): | |
| st.subheader(":green[Query:]") | |
| st.subheader(user_input) | |
| response = rag_chain.invoke({"question": user_input}) | |
| st.subheader(":green[Response:]") | |
| st.write(response) | |
| else: | |
| st.write("Please upload a PDF file to get started.") | |