Spaces:

RizwanSajad
/

Ideas_Creater

Sleeping

App Files Files Community

RizwanSajad commited on Jan 2, 2025

Commit

d2edfae

verified ·

1 Parent(s): 4cc8eb3

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -34

app.py CHANGED Viewed

@@ -6,25 +6,23 @@ import pytesseract
 from transformers import AutoTokenizer, AutoModel
 import faiss
 import numpy as np
-import torch
 from groq import Groq
-# Configure Streamlit app
 st.title("RAG-Based Application")
-st.write("Upload an image to extract and query content.")
-# Initialize Groq API
 def get_groq_client():
 	    return Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# Load embedding model
 st.write("Loading embedding model...")
 try:
     tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
     model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
 except Exception as e:
-    st.error(f"Error loading model: {e}")
 # Initialize FAISS index
 dimension = model.config.hidden_size
@@ -35,61 +33,55 @@ def extract_text_from_image(image_path):
     try:
         return pytesseract.image_to_string(Image.open(image_path))
     except pytesseract.TesseractNotFoundError:
-        st.error("Tesseract is not installed. It is required for text extraction.")
         return ""
 def get_embeddings(text_chunks):
-    """Get embeddings for text chunks."""
     inputs = tokenizer(text_chunks, return_tensors="pt", padding=True, truncation=True)
     with torch.no_grad():
-        outputs = model(**inputs)
-        embeddings = outputs.last_hidden_state.mean(dim=1).numpy()
     return embeddings
-def query_groq(question, model="llama-3.3-70b-versatile"):
-    """Query Groq model for a response."""
     try:
-        client = Groq(api_key=GROQ_API_KEY)
         response = client.chat.completions.create(
             messages=[{"role": "user", "content": question}],
-            model=model,
         )
         return response.choices[0].message.content
     except Exception as e:
-        st.error(f"Error querying Groq model: {e}")
         return ""
-# File uploader for image input
 uploaded_file = st.file_uploader("Upload an image (JPG, PNG):", type=["jpg", "jpeg", "png"])
 if uploaded_file:
     with tempfile.NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_image_path = temp_file.name
-    # Extract text from the image
     st.write("Extracting text from the uploaded image...")
     extracted_text = extract_text_from_image(temp_image_path)
     st.text_area("Extracted Text:", extracted_text, height=200)
     if extracted_text.strip():
-        # Chunk and process the text
-        st.write("Processing text into chunks...")
-        text_chunks = [extracted_text[i : i + 512] for i in range(0, len(extracted_text), 512)]
-        try:
-            embeddings = get_embeddings(text_chunks)
-            st.write("Storing data in FAISS database...")
-            index.add(np.array(embeddings))
-            st.success("Data processed and stored successfully!")
-        except Exception as e:
-            st.error(f"Error during embedding creation: {e}")
-        # Query interface
-        user_question = st.text_input("Ask a question based on the uploaded content:")
         if user_question:
             answer = query_groq(user_question)
             st.write("Answer from Groq:")
             st.write(answer)
     else:
-        st.warning("No text was extracted from the image. Please try again with a different file.")

 from transformers import AutoTokenizer, AutoModel
 import faiss
 import numpy as np
 from groq import Groq
+# Configure the application
 st.title("RAG-Based Application")
+st.write("Upload an image, and extract and query its content.")
+# Groq API setup
 def get_groq_client():
 	    return Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Model for embedding generation
 st.write("Loading embedding model...")
 try:
     tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
     model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
 except Exception as e:
+    st.error(f"Failed to load embedding model: {e}")
 # Initialize FAISS index
 dimension = model.config.hidden_size
     try:
         return pytesseract.image_to_string(Image.open(image_path))
     except pytesseract.TesseractNotFoundError:
+        st.error("Tesseract is not installed. Install it via the setup script.")
         return ""
 def get_embeddings(text_chunks):
+    """Generate embeddings for text chunks using the model."""
     inputs = tokenizer(text_chunks, return_tensors="pt", padding=True, truncation=True)
     with torch.no_grad():
+        embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
     return embeddings
+def query_groq(question):
+    """Query the Groq API to generate answers."""
     try:
         response = client.chat.completions.create(
             messages=[{"role": "user", "content": question}],
+            model="llama-3.3-70b-versatile"
         )
         return response.choices[0].message.content
     except Exception as e:
+        st.error(f"Error querying Groq API: {e}")
         return ""
+# File uploader
 uploaded_file = st.file_uploader("Upload an image (JPG, PNG):", type=["jpg", "jpeg", "png"])
 if uploaded_file:
     with tempfile.NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_image_path = temp_file.name
+    # Extract text from image
     st.write("Extracting text from the uploaded image...")
     extracted_text = extract_text_from_image(temp_image_path)
     st.text_area("Extracted Text:", extracted_text, height=200)
     if extracted_text.strip():
+        # Chunk text for embeddings
+        text_chunks = [extracted_text[i:i+512] for i in range(0, len(extracted_text), 512)]
+        # Generate embeddings
+        embeddings = get_embeddings(text_chunks)
+        st.write("Storing extracted data in FAISS database...")
+        index.add(np.array(embeddings))
+        st.success("Text processed and stored successfully!")
+        # Question input for Groq
+        user_question = st.text_input("Ask a question based on the uploaded image content:")
         if user_question:
             answer = query_groq(user_question)
             st.write("Answer from Groq:")
             st.write(answer)
     else:
+        st.warning("No text could be extracted from the image. Try another file.")