Spaces:

kinely
/

RAG-App

Sleeping

App Files Files Community

kinely commited on Nov 23, 2024

Commit

f1ec8ab

verified ·

1 Parent(s): f383522

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -19

app.py CHANGED Viewed

@@ -18,14 +18,18 @@ def text_to_json(text):
     json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
     return json_data
-# Function to restrict query results to the PDF dataset
 def restrict_to_pdf_query(query, dataset):
     relevant_content = []
     for section in dataset["dataset"]:
-        if query.lower() in section["content"].lower():
             relevant_content.append(section["content"])
-    return "\n".join(relevant_content) if relevant_content else "No relevant content found."
 # Function to split text into manageable chunks
 def split_text_into_chunks(text, max_tokens=2000):
@@ -72,22 +76,28 @@ if user_query:
     # Get the relevant content from the dataset based on the user's query
     pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
-    # Split the PDF-based answer into smaller chunks to avoid token limits
-    chunks = split_text_into_chunks(pdf_based_answer)
-    # Use only the first chunk for this example (or you can query multiple chunks based on user input)
-    if chunks:
-        chat_completion = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": chunks[0],  # Use the first chunk
-                }
-            ],
-            model="llama3-groq-70b-8192-tool-use-preview",  # Updated model
-        )
-        # Display the result
-        st.write(chat_completion.choices[0].message.content)
     else:
         st.write("No relevant content found in the PDF dataset.")

     json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
     return json_data
+# Function to restrict query results to the PDF dataset (returns relevant content)
 def restrict_to_pdf_query(query, dataset):
     relevant_content = []
+    query_keywords = query.lower().split()  # Split query into keywords
     for section in dataset["dataset"]:
+        section_content = section["content"].lower()
+        # Check if any of the keywords are present in the section content
+        if any(keyword in section_content for keyword in query_keywords):
             relevant_content.append(section["content"])
+    return relevant_content if relevant_content else ["No relevant content found."]
 # Function to split text into manageable chunks
 def split_text_into_chunks(text, max_tokens=2000):
     # Get the relevant content from the dataset based on the user's query
     pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
+    if pdf_based_answer[0] != "No relevant content found.":
+        # Combine all relevant content into one string (you can limit this further if needed)
+        relevant_text = "\n".join(pdf_based_answer)
+        # Split the relevant content into manageable chunks
+        chunks = split_text_into_chunks(relevant_text)
+        # Use only the first chunk (you can modify this to iterate over chunks or dynamically choose a chunk)
+        if chunks:
+            chat_completion = client.chat.completions.create(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": chunks[0],  # Send the first chunk of relevant content
+                    }
+                ],
+                model="llama3-groq-70b-8192-tool-use-preview",  # Updated model
+            )
+            # Display the result
+            st.write(chat_completion.choices[0].message.content)
+        else:
+            st.write("Error: Unable to process content into chunks.")
     else:
         st.write("No relevant content found in the PDF dataset.")