Spaces:
Sleeping
Sleeping
Tanuj
commited on
Commit
·
2cf474e
1
Parent(s):
f2c4e2e
Update requirements
Browse files- .gitignore +1 -0
- app.py +4 -3
- rag.py +15 -13
- requirements.txt +4 -1
.gitignore
CHANGED
|
@@ -2,3 +2,4 @@
|
|
| 2 |
.DS_Store
|
| 3 |
.venv/
|
| 4 |
__pycache__/
|
|
|
|
|
|
| 2 |
.DS_Store
|
| 3 |
.venv/
|
| 4 |
__pycache__/
|
| 5 |
+
.env
|
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from pathlib import Path
|
| 3 |
-
from rag import is_allowed_filetype,
|
| 4 |
import logging
|
| 5 |
|
| 6 |
# ---------------------------
|
|
@@ -56,7 +56,8 @@ def natural_language_handler(query: str) -> str:
|
|
| 56 |
Returns:
|
| 57 |
str: Simulated or generated action and result.
|
| 58 |
"""
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def placeholder(feature_name: str = "unknown") -> str:
|
|
@@ -98,7 +99,7 @@ def handle_file_input(file_path: str | None, uploaded_file: gr.File | None):
|
|
| 98 |
file_contents = file.read()
|
| 99 |
|
| 100 |
|
| 101 |
-
upload_result =
|
| 102 |
|
| 103 |
return f"Uploaded document: {upload_result['id']}"
|
| 104 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from pathlib import Path
|
| 3 |
+
from rag import is_allowed_filetype, upload_file_to_vectara, retrieve_chunks
|
| 4 |
import logging
|
| 5 |
|
| 6 |
# ---------------------------
|
|
|
|
| 56 |
Returns:
|
| 57 |
str: Simulated or generated action and result.
|
| 58 |
"""
|
| 59 |
+
chunks, response = retrieve_chunks(query, limit=5)
|
| 60 |
+
return f"💬 Got {len(chunks)} chunks for your request: “{query}”. Response: {response}"
|
| 61 |
|
| 62 |
|
| 63 |
def placeholder(feature_name: str = "unknown") -> str:
|
|
|
|
| 99 |
file_contents = file.read()
|
| 100 |
|
| 101 |
|
| 102 |
+
upload_result = upload_file_to_vectara(file_contents, filepath.name)
|
| 103 |
|
| 104 |
return f"Uploaded document: {upload_result['id']}"
|
| 105 |
|
rag.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
import os
|
|
|
|
| 4 |
import requests
|
| 5 |
from pprint import pprint
|
| 6 |
from schemas import UploadResult
|
|
@@ -57,13 +58,13 @@ def save_response_to_file(response_json: dict, filename: str):
|
|
| 57 |
with open(filename, "w") as f:
|
| 58 |
json.dump(response_json, f, indent=2)
|
| 59 |
|
| 60 |
-
def
|
| 61 |
"""
|
| 62 |
-
Uploads a
|
| 63 |
|
| 64 |
Args:
|
| 65 |
-
|
| 66 |
-
filename (str): The name of the
|
| 67 |
|
| 68 |
Returns:
|
| 69 |
None
|
|
@@ -74,12 +75,13 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
|
|
| 74 |
"""
|
| 75 |
CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
|
| 76 |
|
| 77 |
-
# Check if
|
| 78 |
-
if not
|
| 79 |
-
raise IndexingError("No
|
| 80 |
|
|
|
|
| 81 |
# Ensure valid filename
|
| 82 |
-
if not
|
| 83 |
raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
|
| 84 |
|
| 85 |
# Replace with your actual corpus_key and API key
|
|
@@ -92,11 +94,11 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
|
|
| 92 |
"Accept": "application/json",
|
| 93 |
"x-api-key": api_key,
|
| 94 |
}
|
| 95 |
-
|
| 96 |
files = {
|
| 97 |
-
'file': (filename,
|
| 98 |
}
|
| 99 |
|
|
|
|
| 100 |
try:
|
| 101 |
response = requests.post(url, headers=headers, files=files)
|
| 102 |
response.raise_for_status() # Raise an exception for HTTP errors
|
|
@@ -133,7 +135,7 @@ def process_upload_response(response_json: dict) -> UploadResult:
|
|
| 133 |
storage_usage=response_json["storage_usage"]
|
| 134 |
)
|
| 135 |
# See https://docs.vectara.com/docs/rest-api/query-corpus
|
| 136 |
-
def retrieve_chunks(query: str) -> tuple[list[str], str]:
|
| 137 |
"""
|
| 138 |
Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
|
| 139 |
|
|
@@ -157,7 +159,7 @@ def retrieve_chunks(query: str) -> tuple[list[str], str]:
|
|
| 157 |
payload = {
|
| 158 |
"query": query,
|
| 159 |
"search": {
|
| 160 |
-
"limit":
|
| 161 |
# "reranker": {
|
| 162 |
# "type": "customer_reranker",
|
| 163 |
# "reranker_name": "Rerank_Multilingual_v1",
|
|
@@ -244,7 +246,7 @@ def test_file_upload():
|
|
| 244 |
pdf_path = Path(FILEPATH).expanduser()
|
| 245 |
with open(pdf_path, "rb") as f:
|
| 246 |
pdf_bytes = f.read()
|
| 247 |
-
|
| 248 |
except Exception as e:
|
| 249 |
raise IndexingError(f"Error occurred while uploading PDF: {e}")
|
| 250 |
|
|
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
import requests
|
| 6 |
from pprint import pprint
|
| 7 |
from schemas import UploadResult
|
|
|
|
| 58 |
with open(filename, "w") as f:
|
| 59 |
json.dump(response_json, f, indent=2)
|
| 60 |
|
| 61 |
+
def upload_file_to_vectara(file_bytes: bytes, filename: str) -> UploadResult:
|
| 62 |
"""
|
| 63 |
+
Uploads a supported file type to Vectara for processing.
|
| 64 |
|
| 65 |
Args:
|
| 66 |
+
file_bytes (bytes): The file content in bytes.
|
| 67 |
+
filename (str): The name of the file.
|
| 68 |
|
| 69 |
Returns:
|
| 70 |
None
|
|
|
|
| 75 |
"""
|
| 76 |
CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
|
| 77 |
|
| 78 |
+
# Check if file_bytes is provided
|
| 79 |
+
if not file_bytes:
|
| 80 |
+
raise IndexingError("No file bytes provided.")
|
| 81 |
|
| 82 |
+
suffix = Path(filename).suffix
|
| 83 |
# Ensure valid filename
|
| 84 |
+
if not is_allowed_filetype(suffix):
|
| 85 |
raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
|
| 86 |
|
| 87 |
# Replace with your actual corpus_key and API key
|
|
|
|
| 94 |
"Accept": "application/json",
|
| 95 |
"x-api-key": api_key,
|
| 96 |
}
|
|
|
|
| 97 |
files = {
|
| 98 |
+
'file': (filename, file_bytes)
|
| 99 |
}
|
| 100 |
|
| 101 |
+
|
| 102 |
try:
|
| 103 |
response = requests.post(url, headers=headers, files=files)
|
| 104 |
response.raise_for_status() # Raise an exception for HTTP errors
|
|
|
|
| 135 |
storage_usage=response_json["storage_usage"]
|
| 136 |
)
|
| 137 |
# See https://docs.vectara.com/docs/rest-api/query-corpus
|
| 138 |
+
def retrieve_chunks(query: str, limit: int = 10) -> tuple[list[str], str]:
|
| 139 |
"""
|
| 140 |
Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
|
| 141 |
|
|
|
|
| 159 |
payload = {
|
| 160 |
"query": query,
|
| 161 |
"search": {
|
| 162 |
+
"limit": limit, # Number of search results to retrieve
|
| 163 |
# "reranker": {
|
| 164 |
# "type": "customer_reranker",
|
| 165 |
# "reranker_name": "Rerank_Multilingual_v1",
|
|
|
|
| 246 |
pdf_path = Path(FILEPATH).expanduser()
|
| 247 |
with open(pdf_path, "rb") as f:
|
| 248 |
pdf_bytes = f.read()
|
| 249 |
+
upload_file_to_vectara(pdf_bytes, pdf_path.name)
|
| 250 |
except Exception as e:
|
| 251 |
raise IndexingError(f"Error occurred while uploading PDF: {e}")
|
| 252 |
|
requirements.txt
CHANGED
|
@@ -1,2 +1,5 @@
|
|
| 1 |
gradio[mcp]==5.33.0
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gradio[mcp]==5.33.0
|
| 2 |
+
requests
|
| 3 |
+
python-dotenv
|
| 4 |
+
smolagents
|
| 5 |
+
fastrtc
|