Meghna05 commited on
Commit
690f23e
·
verified ·
1 Parent(s): adf613a

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +55 -0
  2. csp.pdf +0 -0
  3. requirements.txt.txt +6 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ import os
6
+ os.environ["OPENAI_API_KEY"] = "sk-proj-FiRph57Razvp7ndTaNQ3T3BlbkFJ4cEeTjMqUaQomG961dyc"
7
+ # provide the path of pdf file/files.
8
+ pdfreader = PdfReader('csp.pdf')
9
+ from typing_extensions import Concatenate
10
+ # read text from pdf
11
+ raw_text = ''
12
+ for i, page in enumerate(pdfreader.pages):
13
+ content = page.extract_text()
14
+ if content:
15
+ raw_text += content
16
+ raw_text
17
+ text_splitter = CharacterTextSplitter(
18
+ separator = "\n",
19
+ chunk_size = 800,
20
+ chunk_overlap = 200,
21
+ length_function = len,
22
+ )
23
+ texts = text_splitter.split_text(raw_text)
24
+ len(texts)
25
+ embeddings = OpenAIEmbeddings()
26
+ document_search = FAISS.from_texts(texts, embeddings)
27
+ document_search
28
+ from langchain.chains.question_answering import load_qa_chain
29
+ from langchain.llms import OpenAI
30
+ chain = load_qa_chain(OpenAI(), chain_type="stuff")
31
+ import gradio as gr
32
+
33
+
34
+ # Define the function to process the query, retrieve documents, and run the chain process
35
+ def process_query(query):
36
+ # Perform similarity search to retrieve relevant documents
37
+ docs = document_search.similarity_search(query)
38
+
39
+ # Run the chain process with the retrieved documents and the user query
40
+ response = chain.run(input_documents=docs, question=query)
41
+
42
+ # Return the response
43
+ return response
44
+
45
+ # Define the Gradio interface
46
+ iface = gr.Interface(
47
+ fn=process_query, # Function to process user input
48
+ inputs="text", # Textbox for user input
49
+ outputs="text", # Textbox to display the response
50
+ title="Document Search and Chain Process", # Interface title
51
+ description="Enter your query to search for relevant documents and run the chain process." # Interface description
52
+ )
53
+
54
+ # Launch the interface
55
+ iface.launch()
csp.pdf ADDED
Binary file (391 kB). View file
 
requirements.txt.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ PyPDF2
4
+ faiss-cpu
5
+ tiktoken
6
+ gradio