Spaces:

Hamza011
/

chat_with_docs

Sleeping

App Files Files Community

Hamza011 commited on Feb 13, 2024

Commit

196c8fb

1 Parent(s): ee415ff

app.py

Browse files

Files changed (1) hide show

app.py +127 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from PyPDF2 import PdfReader,PdfWriter
+import gradio as gr
+from langchain.embeddings import CohereEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain import OpenAI
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import spacy
+nlp = spacy.load('en_core_web_md')
+text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
+embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
+def recieve_pdf(filename):
+    reader = PdfReader(filename)
+    writer = PdfWriter()
+    for page in reader.pages:
+        writer.add_page(page)
+    with open('processed_file.pdf','wb') as f:
+        writer.write(f)
+    read = PdfReader('processed_file.pdf')
+    extracted_file =[page.extract_text(0) for page in read.pages]
+    extracted_text = ''.join(extracted_file)
+    global file
+    file = extracted_text
+    summary_prompt_formated = summary_prompt.format(document = extracted_text)
+    return llm(summary_prompt_formated)
+def chatbot(query,history):
+    similarity_array =[]
+    embeded_query = embedding.embed_documents([query])
+    doc = nlp(file)
+    sentences_1 = [str(sentence) for sentence in doc.sents]
+    embedded_text = embedding.embed_documents(sentences_1)
+    similarity_score = cosine_similarity(embeded_query,embedded_text)
+    similarity_array.append(similarity_score)
+    most_similar_index = np.argmax(similarity_array)
+    most_similar_documents = sentences_1[most_similar_index]
+    splitter_text = text_splitter.split_text(file)
+    recursive_embedded_text = embedding.embed_documents(splitter_text)
+    most_similar_embed = embedding.embed_documents([most_similar_documents])
+    final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
+    final_similarity_index = np.argmax(final_similarity_score)
+    final_document = splitter_text[final_similarity_index]
+    prompt_formated = prompt.format(context = final_document, query = query)
+    repsonse = llm(prompt_formated)
+    history.append((query, repsonse))
+    return '', history
+summary_template = """ You an article summarizer and have been provided with this file
+{document}
+provide a one line summary of the content of the provides file.
+"""
+summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
+template = """ You are a knowledgeable chatbot that gently answers questions.
+You know the following context information.
+{context}
+Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
+Question: {query}
+Answer:"""
+prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
+llm = OpenAI(model= 'gpt-3.5-turbo-instruct' , temperature= 0)
+with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
+    signal = gr.Markdown('''# Welcome to Chat with Docs
+                          I am an AI that recieves a document and can answer questions on the content of the document.''')
+    inp = gr.File()
+    out = gr.Textbox(label= 'Summary')
+    inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
+    signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
+    chat = gr.Chatbot()
+    msg = gr.Textbox(info='input your chat')
+    with gr.Row():
+        submit = gr.Button('Send')
+        clear = gr.ClearButton([msg,chat])
+    msg.submit(chatbot, [msg, chat], [msg ,chat])
+    submit.click(chatbot, [msg, chat], [msg ,chat])
+    feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
+demo.launch()