Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from huggingface_hub import login
|
|
| 12 |
import arxiv
|
| 13 |
import numpy as np
|
| 14 |
import torch # Add torch to explicitly set the device
|
|
|
|
| 15 |
|
| 16 |
# Access the Hugging Face token from the environment variable
|
| 17 |
HF_TOKEN = os.getenv("HF_Token")
|
|
@@ -63,7 +64,6 @@ def download_youtube_video(video_url, output_dir, title=None):
|
|
| 63 |
try:
|
| 64 |
with YoutubeDL(ydl_opts) as ydl:
|
| 65 |
info = ydl.extract_info(video_url, download=True)
|
| 66 |
-
# Extract the final downloaded file path
|
| 67 |
downloaded_file = ydl.prepare_filename(info)
|
| 68 |
return downloaded_file
|
| 69 |
except Exception as e:
|
|
@@ -81,22 +81,18 @@ def fetch_and_download_youtube_video(query, output_dir="./videos"):
|
|
| 81 |
|
| 82 |
try:
|
| 83 |
with YoutubeDL(ydl_opts) as ydl:
|
| 84 |
-
# Perform a search for the query on YouTube
|
| 85 |
search_results = ydl.extract_info(f"ytsearch:{query}", download=False)
|
| 86 |
-
|
| 87 |
if 'entries' not in search_results or len(search_results['entries']) == 0:
|
| 88 |
print(f"No YouTube results found for query: '{query}'")
|
| 89 |
return []
|
| 90 |
|
| 91 |
-
video_info = search_results['entries'][0]
|
| 92 |
video_title = video_info.get("title", "unknown_title")
|
| 93 |
video_url = video_info.get("webpage_url", None)
|
| 94 |
-
|
| 95 |
if not video_url:
|
| 96 |
print("No URL found for the video.")
|
| 97 |
return []
|
| 98 |
|
| 99 |
-
# Download the video
|
| 100 |
local_path = download_youtube_video(video_url, output_dir, title=video_title)
|
| 101 |
if not local_path:
|
| 102 |
return []
|
|
@@ -108,24 +104,21 @@ def fetch_and_download_youtube_video(query, output_dir="./videos"):
|
|
| 108 |
print(f"Error fetching YouTube video for query '{query}': {e}")
|
| 109 |
return []
|
| 110 |
|
| 111 |
-
from arxiv import Client, Search, SortCriterion
|
| 112 |
-
|
| 113 |
def fetch_from_arxiv(query="machine learning", max_results=2, output_dir="./papers"):
|
| 114 |
"""Fetch papers from arXiv and download their PDFs."""
|
| 115 |
print(f"Fetching papers for query: {query}")
|
| 116 |
-
client = Client()
|
| 117 |
-
search = Search(
|
| 118 |
query=query,
|
| 119 |
max_results=max_results,
|
| 120 |
-
sort_by=SortCriterion.Relevance
|
| 121 |
)
|
| 122 |
metadata = []
|
| 123 |
for i, result in enumerate(client.results(search)):
|
| 124 |
-
pdf_url = result.pdf_url
|
| 125 |
filename = f"{query.replace(' ', '_')}_arxiv_{i}.pdf"
|
| 126 |
local_path = os.path.join(output_dir, filename)
|
| 127 |
try:
|
| 128 |
-
# Download the PDF
|
| 129 |
response = requests.get(pdf_url)
|
| 130 |
if response.status_code == 200:
|
| 131 |
with open(local_path, 'wb') as f:
|
|
@@ -182,8 +175,21 @@ def hybrid_rag_system_with_llama(query):
|
|
| 182 |
final_response = generate_llama_response(query, context)
|
| 183 |
return final_results, final_response
|
| 184 |
|
| 185 |
-
#
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
import arxiv
|
| 13 |
import numpy as np
|
| 14 |
import torch # Add torch to explicitly set the device
|
| 15 |
+
import gradio as gr
|
| 16 |
|
| 17 |
# Access the Hugging Face token from the environment variable
|
| 18 |
HF_TOKEN = os.getenv("HF_Token")
|
|
|
|
| 64 |
try:
|
| 65 |
with YoutubeDL(ydl_opts) as ydl:
|
| 66 |
info = ydl.extract_info(video_url, download=True)
|
|
|
|
| 67 |
downloaded_file = ydl.prepare_filename(info)
|
| 68 |
return downloaded_file
|
| 69 |
except Exception as e:
|
|
|
|
| 81 |
|
| 82 |
try:
|
| 83 |
with YoutubeDL(ydl_opts) as ydl:
|
|
|
|
| 84 |
search_results = ydl.extract_info(f"ytsearch:{query}", download=False)
|
|
|
|
| 85 |
if 'entries' not in search_results or len(search_results['entries']) == 0:
|
| 86 |
print(f"No YouTube results found for query: '{query}'")
|
| 87 |
return []
|
| 88 |
|
| 89 |
+
video_info = search_results['entries'][0]
|
| 90 |
video_title = video_info.get("title", "unknown_title")
|
| 91 |
video_url = video_info.get("webpage_url", None)
|
|
|
|
| 92 |
if not video_url:
|
| 93 |
print("No URL found for the video.")
|
| 94 |
return []
|
| 95 |
|
|
|
|
| 96 |
local_path = download_youtube_video(video_url, output_dir, title=video_title)
|
| 97 |
if not local_path:
|
| 98 |
return []
|
|
|
|
| 104 |
print(f"Error fetching YouTube video for query '{query}': {e}")
|
| 105 |
return []
|
| 106 |
|
|
|
|
|
|
|
| 107 |
def fetch_from_arxiv(query="machine learning", max_results=2, output_dir="./papers"):
|
| 108 |
"""Fetch papers from arXiv and download their PDFs."""
|
| 109 |
print(f"Fetching papers for query: {query}")
|
| 110 |
+
client = arxiv.Client()
|
| 111 |
+
search = arxiv.Search(
|
| 112 |
query=query,
|
| 113 |
max_results=max_results,
|
| 114 |
+
sort_by=arxiv.SortCriterion.Relevance
|
| 115 |
)
|
| 116 |
metadata = []
|
| 117 |
for i, result in enumerate(client.results(search)):
|
| 118 |
+
pdf_url = result.pdf_url
|
| 119 |
filename = f"{query.replace(' ', '_')}_arxiv_{i}.pdf"
|
| 120 |
local_path = os.path.join(output_dir, filename)
|
| 121 |
try:
|
|
|
|
| 122 |
response = requests.get(pdf_url)
|
| 123 |
if response.status_code == 200:
|
| 124 |
with open(local_path, 'wb') as f:
|
|
|
|
| 175 |
final_response = generate_llama_response(query, context)
|
| 176 |
return final_results, final_response
|
| 177 |
|
| 178 |
+
# Define Gradio interface
|
| 179 |
+
def gradio_interface(query):
|
| 180 |
+
"""Gradio wrapper for hybrid RAG system."""
|
| 181 |
+
_, final_response = hybrid_rag_system_with_llama(query)
|
| 182 |
+
return final_response
|
| 183 |
+
|
| 184 |
+
# Create Gradio app
|
| 185 |
+
interface = gr.Interface(
|
| 186 |
+
fn=gradio_interface,
|
| 187 |
+
inputs=gr.Textbox(label="Enter your query", placeholder="e.g., short easy machine learning"),
|
| 188 |
+
outputs=gr.Textbox(label="Generated Response"),
|
| 189 |
+
title="Hybrid RAG System with LLaMA",
|
| 190 |
+
description="Enter a query to retrieve relevant resources and generate a response using LLaMA."
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# Launch Gradio app
|
| 194 |
+
if __name__ == "__main__":
|
| 195 |
+
interface.launch()
|