Spaces:

kinely
/

RAG-App

Sleeping

App Files Files Community

kinely commited on Nov 23, 2024

Commit

6c5a238

verified ·

1 Parent(s): 97edd73

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import json
+import fitz  # PyMuPDF
+from groq import Groq
+import streamlit as st
+# Function to convert PDF to text
+def pdf_to_text(pdf_path):
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+# Function to convert extracted text into a structured JSON format
+def text_to_json(text):
+    paragraphs = text.split("\n\n")  # Split text into sections or paragraphs
+    json_data = {"dataset": [{"section": i + 1, "content": para} for i, para in enumerate(paragraphs)]}
+    return json_data
+# Function to restrict query results to the PDF dataset
+def restrict_to_pdf_query(query, dataset):
+    relevant_content = []
+    for section in dataset["dataset"]:
+        if query.lower() in section["content"].lower():
+            relevant_content.append(section["content"])
+    return "\n".join(relevant_content) if relevant_content else "No relevant content found."
+# Load the PDF, convert it to text, and create a JSON dataset
+pdf_path = "your_file.pdf"  # Replace with the path to your PDF file
+pdf_text = pdf_to_text(pdf_path)
+dataset_json = text_to_json(pdf_text)
+# Save the JSON dataset to a file
+with open("dataset.json", "w") as f:
+    json.dump(dataset_json, f, indent=4)
+# Set up the Groq client
+client = Groq(
+    api_key=os.environ.get("GROQ_API_KEY"),
+)
+# Streamlit UI
+st.title("RAG App Using Groq API")
+user_query = st.text_input("Ask a question:")
+if user_query:
+    # Load the dataset from the JSON file
+    with open("dataset.json", "r") as f:
+        dataset = json.load(f)
+    # Get the relevant content from the dataset based on the user's query
+    pdf_based_answer = restrict_to_pdf_query(user_query, dataset)
+    # Get completion from the Groq model
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": pdf_based_answer,
+            }
+        ],
+        model="whisper-large-v3",
+    )
+    # Display the result
+    st.write(chat_completion.choices[0].message.content)