Spaces:
Sleeping
Sleeping
| from PyPDF2 import PdfReader,PdfWriter | |
| import gradio as gr | |
| from langchain.embeddings import CohereEmbeddings | |
| from langchain.prompts import PromptTemplate | |
| from langchain import OpenAI | |
| from langchain_cohere import ChatCohere | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import os | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import spacy | |
| spacy.cli.download("en_core_web_md") | |
| nlp = spacy.load('en_core_web_md') | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| COHERE_API_KEY = os.getenv('COHERE_API_KEY') | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0) | |
| embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY) | |
| def recieve_pdf(filename): | |
| reader = PdfReader(filename) | |
| writer = PdfWriter() | |
| for page in reader.pages: | |
| writer.add_page(page) | |
| with open('processed_file.pdf','wb') as f: | |
| writer.write(f) | |
| read = PdfReader('processed_file.pdf') | |
| extracted_file =[page.extract_text(0) for page in read.pages] | |
| extracted_text = ''.join(extracted_file) | |
| global file | |
| file = extracted_text | |
| # summary_prompt_formated = summary_prompt.format(document = extracted_text) | |
| return 'Document succesfully uploaded' | |
| def chatbot(query,history): | |
| similarity_array =[] | |
| embeded_query = embedding.embed_documents([query]) | |
| doc = nlp(file) | |
| sentences_1 = [str(sentence) for sentence in doc.sents] | |
| embedded_text = embedding.embed_documents(sentences_1) | |
| similarity_score = cosine_similarity(embeded_query,embedded_text) | |
| similarity_array.append(similarity_score) | |
| most_similar_index = np.argmax(similarity_array) | |
| most_similar_documents = sentences_1[most_similar_index] | |
| splitter_text = text_splitter.split_text(file) | |
| recursive_embedded_text = embedding.embed_documents(splitter_text) | |
| most_similar_embed = embedding.embed_documents([most_similar_documents]) | |
| final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text) | |
| final_similarity_index = np.argmax(final_similarity_score) | |
| final_document = splitter_text[final_similarity_index] | |
| prompt_formated = prompt.format(context = final_document, query = query) | |
| response = llm.invoke(prompt_formated).content | |
| history.append((query, response)) | |
| return '', history | |
| summary_template = """ You an article summarizer and have been provided with this file | |
| {document} | |
| provide a one line summary of the content of the provides file. | |
| """ | |
| summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template) | |
| template = """ You are a knowledgeable chatbot that gently answers questions. | |
| You know the following context information. | |
| {context} | |
| Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff. | |
| Question: {query} | |
| Answer:""" | |
| prompt = PromptTemplate(input_variables= ['context', 'query'], template= template) | |
| llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY')) | |
| with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo: | |
| signal = gr.Markdown('''# Welcome to Chat with Docs | |
| I am an AI that recieves a **PDF** and can answer questions on the content of the document.''') | |
| inp = gr.File() | |
| out = gr.Textbox(label= 'Summary') | |
| inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True) | |
| signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded') | |
| chat = gr.Chatbot() | |
| msg = gr.Textbox(info='input your chat') | |
| with gr.Row(): | |
| submit = gr.Button('Send') | |
| clear = gr.ClearButton([msg,chat]) | |
| msg.submit(chatbot, [msg, chat], [msg ,chat]) | |
| submit.click(chatbot, [msg, chat], [msg ,chat]) | |
| feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)') | |
| demo.launch() | |