ahmedumeraziz commited on
Commit
7acb521
·
verified ·
1 Parent(s): 1aaebe8

Update utils/rag_utils.py

Browse files
Files changed (1) hide show
  1. utils/rag_utils.py +4 -6
utils/rag_utils.py CHANGED
@@ -2,11 +2,9 @@ from PyPDF2 import PdfReader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_groq import ChatGroq
4
  from langchain.chains import RetrievalQA
5
- import io
6
 
7
- def process_pdf(uploaded_file):
8
- pdf_bytes = uploaded_file.read()
9
- reader = PdfReader(io.BytesIO(pdf_bytes))
10
  text = "\n".join([page.extract_text() or "" for page in reader.pages])
11
 
12
  splitter = RecursiveCharacterTextSplitter(
@@ -20,12 +18,12 @@ def get_groq_response(query, vector_db, model_name="mixtral-8x7b-32768"):
20
  llm = ChatGroq(
21
  temperature=0.1,
22
  model_name=model_name,
23
- max_tokens=1024
24
  )
25
 
26
  qa = RetrievalQA.from_chain_type(
27
  llm=llm,
28
  chain_type="stuff",
29
- retriever=vector_db.as_retriever(search_kwargs={"k": 3})
30
  )
31
  return qa.run(query)
 
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_groq import ChatGroq
4
  from langchain.chains import RetrievalQA
 
5
 
6
+ def process_pdf(file_path: str):
7
+ reader = PdfReader(file_path)
 
8
  text = "\n".join([page.extract_text() or "" for page in reader.pages])
9
 
10
  splitter = RecursiveCharacterTextSplitter(
 
18
  llm = ChatGroq(
19
  temperature=0.1,
20
  model_name=model_name,
21
+ max_tokens=2048
22
  )
23
 
24
  qa = RetrievalQA.from_chain_type(
25
  llm=llm,
26
  chain_type="stuff",
27
+ retriever=vector_db.as_retriever(search_kwargs={"k": 4})
28
  )
29
  return qa.run(query)