Spaces:

kinely
/

RAG-App

Sleeping

App Files Files Community

kinely commited on Nov 23, 2024

Commit

f383522

verified ·

1 Parent(s): 3f63360

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -12

app.py CHANGED Viewed

@@ -27,6 +27,25 @@ def restrict_to_pdf_query(query, dataset):
     return "\n".join(relevant_content) if relevant_content else "No relevant content found."
 # Load the PDF, convert it to text, and create a JSON dataset
 pdf_path = "PAKISTAN PENAL CODE.pdf"  # Replace with the path to your PDF file
 pdf_text = pdf_to_text(pdf_path)
@@ -53,16 +72,22 @@ if user_query:
     # Get the relevant content from the dataset based on the user's query
     pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
-    # Get completion from the Groq model using the updated model name
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": pdf_based_answer,
-            }
-        ],
-        model="llama3-groq-70b-8192-tool-use-preview",  # Updated model
-    )
-    # Display the result
-    st.write(chat_completion.choices[0].message.content)

     return "\n".join(relevant_content) if relevant_content else "No relevant content found."
+# Function to split text into manageable chunks
+def split_text_into_chunks(text, max_tokens=2000):
+    # Split text into chunks that fit within the model's token limit
+    chunks = []
+    current_chunk = ""
+    for paragraph in text.split("\n"):
+        # Check token length before adding paragraph
+        if len(current_chunk.split()) + len(paragraph.split()) > max_tokens:
+            chunks.append(current_chunk)
+            current_chunk = paragraph
+        else:
+            current_chunk += "\n" + paragraph
+    if current_chunk:  # Add the last chunk
+        chunks.append(current_chunk)
+    return chunks
 # Load the PDF, convert it to text, and create a JSON dataset
 pdf_path = "PAKISTAN PENAL CODE.pdf"  # Replace with the path to your PDF file
 pdf_text = pdf_to_text(pdf_path)
     # Get the relevant content from the dataset based on the user's query
     pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
+    # Split the PDF-based answer into smaller chunks to avoid token limits
+    chunks = split_text_into_chunks(pdf_based_answer)
+    # Use only the first chunk for this example (or you can query multiple chunks based on user input)
+    if chunks:
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": chunks[0],  # Use the first chunk
+                }
+            ],
+            model="llama3-groq-70b-8192-tool-use-preview",  # Updated model
+        )
+        # Display the result
+        st.write(chat_completion.choices[0].message.content)
+    else:
+        st.write("No relevant content found in the PDF dataset.")