Spaces:
Sleeping
Sleeping
Update utils/rag_utils.py
Browse files- utils/rag_utils.py +4 -6
utils/rag_utils.py
CHANGED
|
@@ -2,11 +2,9 @@ from PyPDF2 import PdfReader
|
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
from langchain_groq import ChatGroq
|
| 4 |
from langchain.chains import RetrievalQA
|
| 5 |
-
import io
|
| 6 |
|
| 7 |
-
def process_pdf(
|
| 8 |
-
|
| 9 |
-
reader = PdfReader(io.BytesIO(pdf_bytes))
|
| 10 |
text = "\n".join([page.extract_text() or "" for page in reader.pages])
|
| 11 |
|
| 12 |
splitter = RecursiveCharacterTextSplitter(
|
|
@@ -20,12 +18,12 @@ def get_groq_response(query, vector_db, model_name="mixtral-8x7b-32768"):
|
|
| 20 |
llm = ChatGroq(
|
| 21 |
temperature=0.1,
|
| 22 |
model_name=model_name,
|
| 23 |
-
max_tokens=
|
| 24 |
)
|
| 25 |
|
| 26 |
qa = RetrievalQA.from_chain_type(
|
| 27 |
llm=llm,
|
| 28 |
chain_type="stuff",
|
| 29 |
-
retriever=vector_db.as_retriever(search_kwargs={"k":
|
| 30 |
)
|
| 31 |
return qa.run(query)
|
|
|
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
from langchain_groq import ChatGroq
|
| 4 |
from langchain.chains import RetrievalQA
|
|
|
|
| 5 |
|
| 6 |
+
def process_pdf(file_path: str):
|
| 7 |
+
reader = PdfReader(file_path)
|
|
|
|
| 8 |
text = "\n".join([page.extract_text() or "" for page in reader.pages])
|
| 9 |
|
| 10 |
splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 18 |
llm = ChatGroq(
|
| 19 |
temperature=0.1,
|
| 20 |
model_name=model_name,
|
| 21 |
+
max_tokens=2048
|
| 22 |
)
|
| 23 |
|
| 24 |
qa = RetrievalQA.from_chain_type(
|
| 25 |
llm=llm,
|
| 26 |
chain_type="stuff",
|
| 27 |
+
retriever=vector_db.as_retriever(search_kwargs={"k": 4})
|
| 28 |
)
|
| 29 |
return qa.run(query)
|