Spaces:

LunaticMaestro
/

rag

Sleeping

App Files Files Community

Deepak Sahu commited on Jan 22, 2025

Commit

2a28d9d

1 Parent(s): f1fa604

parsing images

Browse files

Files changed (2) hide show

app.py +3 -3
z_generate.py +61 -2

app.py CHANGED Viewed

@@ -30,8 +30,8 @@ llm = ServerlessInference(vector_store_text=vector_text, vector_store_images=vec
 # Processing Functions
 def update_response(query:str = "something"):
-    response_text = llm.perform_rag(query)
-    return response_text
 def update_gallery(text:str = "hell"):
     imgs = [
@@ -42,7 +42,7 @@ def update_gallery(text:str = "hell"):
 def ask_bot(text):
-    return update_response(text), update_gallery(text)
 # UI Layout
 with demo:

 # Processing Functions
 def update_response(query:str = "something"):
+    response_text, response_images = llm.perform_rag(query)
+    return response_text, response_images
 def update_gallery(text:str = "hell"):
     imgs = [
 def ask_bot(text):
+    return update_response(text)
 # UI Layout
 with demo:

z_generate.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from huggingface_hub import InferenceClient
 import os
 class ServerlessInference:
     def __init__(self, vector_store_text = None, vector_store_images = None):
@@ -135,5 +138,61 @@ Question: {question}""".format(context=context, question=query),
             max_tokens=500
         )
-        images_list = completion.choices[0].message.content
-        return response_text + str(images_list)

 from huggingface_hub import InferenceClient
 import os
+from typing import List
+import requests
+from bs4 import BeautifulSoup
 class ServerlessInference:
     def __init__(self, vector_store_text = None, vector_store_images = None):
             max_tokens=500
         )
+        images_list_str: str = completion.choices[0].message.content
+        images_list:list = parse(images_list_str)
+        # Create link and caption pair
+        response_images = []
+        for idx in images_list:
+            caption = retrieved_image[idx].page_content
+            url = get_wiki_file_to_image_url(retrieved_image[idx].metadata["url"])
+            response_images.append(
+                (url, caption)
+            )
+        return response_text, response_images
+def parse(value: str) -> List[int]:
+    try:
+        # Convert the string to a Python list using eval safely with literal_eval
+        from ast import literal_eval
+        parsed_value = literal_eval(value)
+        # Ensure it's a list of numbers
+        if isinstance(parsed_value, list) and all(isinstance(i, (int, float)) for i in parsed_value):
+            return parsed_value
+        else:
+            print("The input string is not a valid list of numbers.")
+    except Exception as e:
+        print(f"Invalid input string: {value}. Error: {e}")
+        return []
+def get_wiki_file_to_image_url(file_page_url:str):
+    # URL of the Wikipedia file page
+    file_page_url = "https://en.wikipedia.org/wiki/File:Wicketkeeping_kit_and_bat_of_MS_Dhoni_at_Blades_of_Glory_Cricket_Museum,_Pune.jpg"
+    # Headers to mimic a browser
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    }
+    # Step 1: Get the file page HTML
+    response = requests.get(file_page_url, headers=headers)
+    if response.status_code == 200:
+        # Parse the HTML content
+        soup = BeautifulSoup(response.content, "html.parser")
+        # Step 2: Find the link to the image file
+        image_tag = soup.find("a", {"class": "internal"})
+        if image_tag and "href" in image_tag.attrs:
+            direct_image_url = "https:" + image_tag["href"]
+            return direct_image_url
+    else:
+        return file_page_url