Pdf_Tutor / app.py
sharmaarush's picture
added app
fef5e65
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import os
from dotenv import load_dotenv
load_dotenv()
def upload_file(files, input_text):
file_paths = []
target_directory = "/content/uploaded_files"
os.makedirs(target_directory, exist_ok=True)
try:
for file in files:
filename = os.path.basename(file.name)
file_path = os.path.join(target_directory, filename)
with open(file_path, "wb") as f:
with open(file.name, "rb") as uploaded_file:
f.write(uploaded_file.read())
file_paths.append(file_path)
loader = DirectoryLoader(target_directory,
glob='*.pdf',
loader_cls=PyPDFLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
chunk_overlap=30)
texts = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings()
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever(search_kwargs={'k': 2})
repo_id = "mistralai/Mistral-7B-v0.1"
llm = HuggingFaceHub(huggingfacehub_api_token=os.getenv("MY_HUGGING_FACE_TOKEN"),
repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":50})
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever,return_source_documents=False)
chat_history = []
answers = []
query = input_text
result = qa_chain({'question': query, 'chat_history': chat_history})
answers.append(result['answer'])
chat_history.append((query, result['answer']))
return "\n".join(answers)
finally:
# Delete the files from the target directory
for file_path in file_paths:
os.remove(file_path)
# Define the Gradio interface
interface = gr.Interface(
fn=upload_file,
inputs=["files", gr.Textbox(label="Enter Text")],
outputs="text",
title="File and Text Processing",
description="Upload a file and enter some text. Click 'Submit' to process them together."
)
# Launch the interface
interface.launch()