Spaces:
Sleeping
Sleeping
Deepak Sahu commited on
Commit ·
2a28d9d
1
Parent(s): f1fa604
parsing images
Browse files- app.py +3 -3
- z_generate.py +61 -2
app.py
CHANGED
|
@@ -30,8 +30,8 @@ llm = ServerlessInference(vector_store_text=vector_text, vector_store_images=vec
|
|
| 30 |
|
| 31 |
# Processing Functions
|
| 32 |
def update_response(query:str = "something"):
|
| 33 |
-
response_text = llm.perform_rag(query)
|
| 34 |
-
return response_text
|
| 35 |
|
| 36 |
def update_gallery(text:str = "hell"):
|
| 37 |
imgs = [
|
|
@@ -42,7 +42,7 @@ def update_gallery(text:str = "hell"):
|
|
| 42 |
|
| 43 |
|
| 44 |
def ask_bot(text):
|
| 45 |
-
return update_response(text)
|
| 46 |
|
| 47 |
# UI Layout
|
| 48 |
with demo:
|
|
|
|
| 30 |
|
| 31 |
# Processing Functions
|
| 32 |
def update_response(query:str = "something"):
|
| 33 |
+
response_text, response_images = llm.perform_rag(query)
|
| 34 |
+
return response_text, response_images
|
| 35 |
|
| 36 |
def update_gallery(text:str = "hell"):
|
| 37 |
imgs = [
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def ask_bot(text):
|
| 45 |
+
return update_response(text)
|
| 46 |
|
| 47 |
# UI Layout
|
| 48 |
with demo:
|
z_generate.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
from huggingface_hub import InferenceClient
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class ServerlessInference:
|
| 5 |
def __init__(self, vector_store_text = None, vector_store_images = None):
|
|
@@ -135,5 +138,61 @@ Question: {question}""".format(context=context, question=query),
|
|
| 135 |
max_tokens=500
|
| 136 |
)
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from huggingface_hub import InferenceClient
|
| 2 |
import os
|
| 3 |
+
from typing import List
|
| 4 |
+
import requests
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
|
| 7 |
class ServerlessInference:
|
| 8 |
def __init__(self, vector_store_text = None, vector_store_images = None):
|
|
|
|
| 138 |
max_tokens=500
|
| 139 |
)
|
| 140 |
|
| 141 |
+
images_list_str: str = completion.choices[0].message.content
|
| 142 |
+
images_list:list = parse(images_list_str)
|
| 143 |
+
# Create link and caption pair
|
| 144 |
+
response_images = []
|
| 145 |
+
for idx in images_list:
|
| 146 |
+
caption = retrieved_image[idx].page_content
|
| 147 |
+
url = get_wiki_file_to_image_url(retrieved_image[idx].metadata["url"])
|
| 148 |
+
response_images.append(
|
| 149 |
+
(url, caption)
|
| 150 |
+
)
|
| 151 |
+
return response_text, response_images
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def parse(value: str) -> List[int]:
|
| 157 |
+
try:
|
| 158 |
+
# Convert the string to a Python list using eval safely with literal_eval
|
| 159 |
+
from ast import literal_eval
|
| 160 |
+
parsed_value = literal_eval(value)
|
| 161 |
+
|
| 162 |
+
# Ensure it's a list of numbers
|
| 163 |
+
if isinstance(parsed_value, list) and all(isinstance(i, (int, float)) for i in parsed_value):
|
| 164 |
+
return parsed_value
|
| 165 |
+
else:
|
| 166 |
+
print("The input string is not a valid list of numbers.")
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Invalid input string: {value}. Error: {e}")
|
| 169 |
+
return []
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def get_wiki_file_to_image_url(file_page_url:str):
|
| 175 |
+
# URL of the Wikipedia file page
|
| 176 |
+
file_page_url = "https://en.wikipedia.org/wiki/File:Wicketkeeping_kit_and_bat_of_MS_Dhoni_at_Blades_of_Glory_Cricket_Museum,_Pune.jpg"
|
| 177 |
+
|
| 178 |
+
# Headers to mimic a browser
|
| 179 |
+
headers = {
|
| 180 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
# Step 1: Get the file page HTML
|
| 184 |
+
response = requests.get(file_page_url, headers=headers)
|
| 185 |
+
|
| 186 |
+
if response.status_code == 200:
|
| 187 |
+
# Parse the HTML content
|
| 188 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 189 |
+
|
| 190 |
+
# Step 2: Find the link to the image file
|
| 191 |
+
image_tag = soup.find("a", {"class": "internal"})
|
| 192 |
+
if image_tag and "href" in image_tag.attrs:
|
| 193 |
+
direct_image_url = "https:" + image_tag["href"]
|
| 194 |
+
|
| 195 |
+
return direct_image_url
|
| 196 |
+
|
| 197 |
+
else:
|
| 198 |
+
return file_page_url
|