kinely commited on
Commit
6c5a238
·
verified ·
1 Parent(s): 97edd73

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import fitz # PyMuPDF
4
+ from groq import Groq
5
+ import streamlit as st
6
+
7
+ # Function to convert PDF to text
8
+ def pdf_to_text(pdf_path):
9
+ doc = fitz.open(pdf_path)
10
+ text = ""
11
+ for page in doc:
12
+ text += page.get_text()
13
+ return text
14
+
15
+ # Function to convert extracted text into a structured JSON format
16
+ def text_to_json(text):
17
+ paragraphs = text.split("\n\n") # Split text into sections or paragraphs
18
+ json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
19
+ return json_data
20
+
21
+ # Function to restrict query results to the PDF dataset
22
+ def restrict_to_pdf_query(query, dataset):
23
+ relevant_content = []
24
+ for section in dataset["dataset"]:
25
+ if query.lower() in section["content"].lower():
26
+ relevant_content.append(section["content"])
27
+
28
+ return "\n".join(relevant_content) if relevant_content else "No relevant content found."
29
+
30
+ # Load the PDF, convert it to text, and create a JSON dataset
31
+ pdf_path = "your_file.pdf" # Replace with the path to your PDF file
32
+ pdf_text = pdf_to_text(pdf_path)
33
+ dataset_json = text_to_json(pdf_text)
34
+
35
+ # Save the JSON dataset to a file
36
+ with open("dataset.json", "w") as f:
37
+ json.dump(dataset_json, f, indent=4)
38
+
39
+ # Set up the Groq client
40
+ client = Groq(
41
+ api_key=os.environ.get("GROQ_API_KEY"),
42
+ )
43
+
44
+ # Streamlit UI
45
+ st.title("RAG App Using Groq API")
46
+ user_query = st.text_input("Ask a question:")
47
+
48
+ if user_query:
49
+ # Load the dataset from the JSON file
50
+ with open("dataset.json", "r") as f:
51
+ dataset = json.load(f)
52
+
53
+ # Get the relevant content from the dataset based on the user's query
54
+ pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
55
+
56
+ # Get completion from the Groq model
57
+ chat_completion = client.chat.completions.create(
58
+ messages=[
59
+ {
60
+ "role": "user",
61
+ "content": pdf_based_answer,
62
+ }
63
+ ],
64
+ model="whisper-large-v3",
65
+ )
66
+
67
+ # Display the result
68
+ st.write(chat_completion.choices[0].message.content)