sharmaarush commited on
Commit
fef5e65
·
1 Parent(s): 3d4ec97

added app

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain.llms import HuggingFaceHub
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ import gradio as gr
10
+ import os
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ def upload_file(files, input_text):
16
+ file_paths = []
17
+ target_directory = "/content/uploaded_files"
18
+ os.makedirs(target_directory, exist_ok=True)
19
+ try:
20
+ for file in files:
21
+ filename = os.path.basename(file.name)
22
+ file_path = os.path.join(target_directory, filename)
23
+
24
+ with open(file_path, "wb") as f:
25
+ with open(file.name, "rb") as uploaded_file:
26
+ f.write(uploaded_file.read())
27
+
28
+ file_paths.append(file_path)
29
+
30
+ loader = DirectoryLoader(target_directory,
31
+ glob='*.pdf',
32
+ loader_cls=PyPDFLoader)
33
+
34
+ documents = loader.load()
35
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
36
+ chunk_overlap=30)
37
+ texts = text_splitter.split_documents(documents)
38
+
39
+ embeddings = HuggingFaceEmbeddings()
40
+ db = Chroma.from_documents(texts, embeddings)
41
+ retriever = db.as_retriever(search_kwargs={'k': 2})
42
+ repo_id = "mistralai/Mistral-7B-v0.1"
43
+ llm = HuggingFaceHub(huggingfacehub_api_token=os.getenv("MY_HUGGING_FACE_TOKEN"),
44
+ repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":50})
45
+ qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever,return_source_documents=False)
46
+ chat_history = []
47
+ answers = []
48
+ query = input_text
49
+ result = qa_chain({'question': query, 'chat_history': chat_history})
50
+ answers.append(result['answer'])
51
+ chat_history.append((query, result['answer']))
52
+
53
+ return "\n".join(answers)
54
+ finally:
55
+ # Delete the files from the target directory
56
+ for file_path in file_paths:
57
+ os.remove(file_path)
58
+
59
+ # Define the Gradio interface
60
+ interface = gr.Interface(
61
+ fn=upload_file,
62
+ inputs=["files", gr.Textbox(label="Enter Text")],
63
+ outputs="text",
64
+ title="File and Text Processing",
65
+ description="Upload a file and enter some text. Click 'Submit' to process them together."
66
+ )
67
+
68
+ # Launch the interface
69
+ interface.launch()
70
+