kinely commited on
Commit
f1ec8ab
·
verified ·
1 Parent(s): f383522

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -19
app.py CHANGED
@@ -18,14 +18,18 @@ def text_to_json(text):
18
  json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
19
  return json_data
20
 
21
- # Function to restrict query results to the PDF dataset
22
  def restrict_to_pdf_query(query, dataset):
23
  relevant_content = []
 
 
24
  for section in dataset["dataset"]:
25
- if query.lower() in section["content"].lower():
 
 
26
  relevant_content.append(section["content"])
27
-
28
- return "\n".join(relevant_content) if relevant_content else "No relevant content found."
29
 
30
  # Function to split text into manageable chunks
31
  def split_text_into_chunks(text, max_tokens=2000):
@@ -72,22 +76,28 @@ if user_query:
72
  # Get the relevant content from the dataset based on the user's query
73
  pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
74
 
75
- # Split the PDF-based answer into smaller chunks to avoid token limits
76
- chunks = split_text_into_chunks(pdf_based_answer)
 
77
 
78
- # Use only the first chunk for this example (or you can query multiple chunks based on user input)
79
- if chunks:
80
- chat_completion = client.chat.completions.create(
81
- messages=[
82
- {
83
- "role": "user",
84
- "content": chunks[0], # Use the first chunk
85
- }
86
- ],
87
- model="llama3-groq-70b-8192-tool-use-preview", # Updated model
88
- )
89
 
90
- # Display the result
91
- st.write(chat_completion.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  else:
93
  st.write("No relevant content found in the PDF dataset.")
 
18
  json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
19
  return json_data
20
 
21
+ # Function to restrict query results to the PDF dataset (returns relevant content)
22
  def restrict_to_pdf_query(query, dataset):
23
  relevant_content = []
24
+ query_keywords = query.lower().split() # Split query into keywords
25
+
26
  for section in dataset["dataset"]:
27
+ section_content = section["content"].lower()
28
+ # Check if any of the keywords are present in the section content
29
+ if any(keyword in section_content for keyword in query_keywords):
30
  relevant_content.append(section["content"])
31
+
32
+ return relevant_content if relevant_content else ["No relevant content found."]
33
 
34
  # Function to split text into manageable chunks
35
  def split_text_into_chunks(text, max_tokens=2000):
 
76
  # Get the relevant content from the dataset based on the user's query
77
  pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
78
 
79
+ if pdf_based_answer[0] != "No relevant content found.":
80
+ # Combine all relevant content into one string (you can limit this further if needed)
81
+ relevant_text = "\n".join(pdf_based_answer)
82
 
83
+ # Split the relevant content into manageable chunks
84
+ chunks = split_text_into_chunks(relevant_text)
 
 
 
 
 
 
 
 
 
85
 
86
+ # Use only the first chunk (you can modify this to iterate over chunks or dynamically choose a chunk)
87
+ if chunks:
88
+ chat_completion = client.chat.completions.create(
89
+ messages=[
90
+ {
91
+ "role": "user",
92
+ "content": chunks[0], # Send the first chunk of relevant content
93
+ }
94
+ ],
95
+ model="llama3-groq-70b-8192-tool-use-preview", # Updated model
96
+ )
97
+
98
+ # Display the result
99
+ st.write(chat_completion.choices[0].message.content)
100
+ else:
101
+ st.write("Error: Unable to process content into chunks.")
102
  else:
103
  st.write("No relevant content found in the PDF dataset.")