kinely commited on
Commit
f383522
·
verified ·
1 Parent(s): 3f63360

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -27,6 +27,25 @@ def restrict_to_pdf_query(query, dataset):
27
 
28
  return "\n".join(relevant_content) if relevant_content else "No relevant content found."
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Load the PDF, convert it to text, and create a JSON dataset
31
  pdf_path = "PAKISTAN PENAL CODE.pdf" # Replace with the path to your PDF file
32
  pdf_text = pdf_to_text(pdf_path)
@@ -53,16 +72,22 @@ if user_query:
53
  # Get the relevant content from the dataset based on the user's query
54
  pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
55
 
56
- # Get completion from the Groq model using the updated model name
57
- chat_completion = client.chat.completions.create(
58
- messages=[
59
- {
60
- "role": "user",
61
- "content": pdf_based_answer,
62
- }
63
- ],
64
- model="llama3-groq-70b-8192-tool-use-preview", # Updated model
65
- )
 
 
 
 
66
 
67
- # Display the result
68
- st.write(chat_completion.choices[0].message.content)
 
 
 
27
 
28
  return "\n".join(relevant_content) if relevant_content else "No relevant content found."
29
 
30
+ # Function to split text into manageable chunks
31
+ def split_text_into_chunks(text, max_tokens=2000):
32
+ # Split text into chunks that fit within the model's token limit
33
+ chunks = []
34
+ current_chunk = ""
35
+
36
+ for paragraph in text.split("\n"):
37
+ # Check token length before adding paragraph
38
+ if len(current_chunk.split()) + len(paragraph.split()) > max_tokens:
39
+ chunks.append(current_chunk)
40
+ current_chunk = paragraph
41
+ else:
42
+ current_chunk += "\n" + paragraph
43
+
44
+ if current_chunk: # Add the last chunk
45
+ chunks.append(current_chunk)
46
+
47
+ return chunks
48
+
49
  # Load the PDF, convert it to text, and create a JSON dataset
50
  pdf_path = "PAKISTAN PENAL CODE.pdf" # Replace with the path to your PDF file
51
  pdf_text = pdf_to_text(pdf_path)
 
72
  # Get the relevant content from the dataset based on the user's query
73
  pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
74
 
75
+ # Split the PDF-based answer into smaller chunks to avoid token limits
76
+ chunks = split_text_into_chunks(pdf_based_answer)
77
+
78
+ # Use only the first chunk for this example (or you can query multiple chunks based on user input)
79
+ if chunks:
80
+ chat_completion = client.chat.completions.create(
81
+ messages=[
82
+ {
83
+ "role": "user",
84
+ "content": chunks[0], # Use the first chunk
85
+ }
86
+ ],
87
+ model="llama3-groq-70b-8192-tool-use-preview", # Updated model
88
+ )
89
 
90
+ # Display the result
91
+ st.write(chat_completion.choices[0].message.content)
92
+ else:
93
+ st.write("No relevant content found in the PDF dataset.")