Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 11 |
from huggingface_hub import login
|
| 12 |
import arxiv
|
| 13 |
import numpy as np
|
|
|
|
| 14 |
|
| 15 |
# Access the Hugging Face token from the environment variable
|
| 16 |
HF_TOKEN = os.getenv("HF_Token")
|
|
@@ -29,10 +30,11 @@ papers_path = "./papers"
|
|
| 29 |
os.makedirs(download_path, exist_ok=True)
|
| 30 |
os.makedirs(papers_path, exist_ok=True)
|
| 31 |
|
| 32 |
-
# Load LLaMA 2
|
| 33 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
|
| 35 |
-
model = AutoModelForCausalLM.from_pretrained(model_name,
|
|
|
|
| 36 |
|
| 37 |
# Define utility functions
|
| 38 |
def compute_similarity(query_embedding, content_embeddings):
|
|
@@ -50,10 +52,6 @@ def add_local_files(module):
|
|
| 50 |
elif module == "paper":
|
| 51 |
return [{"title": os.path.basename(file_path), "url": None, "file_path": file_path, "type": "paper"}]
|
| 52 |
|
| 53 |
-
import os
|
| 54 |
-
import re
|
| 55 |
-
from yt_dlp import YoutubeDL
|
| 56 |
-
|
| 57 |
def download_youtube_video(video_url, output_dir, title=None):
|
| 58 |
"""Download a YouTube video using yt_dlp."""
|
| 59 |
sanitized_title = re.sub(r'[\\/*?:"<>|]', '_', title) if title else "unknown_title"
|
|
@@ -140,14 +138,13 @@ def fetch_from_arxiv(query="machine learning", max_results=2, output_dir="./pape
|
|
| 140 |
print(f"Error downloading paper: {e}")
|
| 141 |
return metadata
|
| 142 |
|
| 143 |
-
|
| 144 |
def generate_llama_response(query, context=None):
|
| 145 |
"""Generate a response using LLaMA 2."""
|
| 146 |
input_text = f"Query: {query}\n"
|
| 147 |
if context:
|
| 148 |
input_text += f"Context: {context}\n"
|
| 149 |
input_text += "Answer:"
|
| 150 |
-
inputs = tokenizer(input_text, return_tensors="pt")
|
| 151 |
outputs = model.generate(inputs["input_ids"], max_length=500, temperature=0.7)
|
| 152 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 153 |
return response
|
|
|
|
| 11 |
from huggingface_hub import login
|
| 12 |
import arxiv
|
| 13 |
import numpy as np
|
| 14 |
+
import torch # Add torch to explicitly set the device
|
| 15 |
|
| 16 |
# Access the Hugging Face token from the environment variable
|
| 17 |
HF_TOKEN = os.getenv("HF_Token")
|
|
|
|
| 30 |
os.makedirs(download_path, exist_ok=True)
|
| 31 |
os.makedirs(papers_path, exist_ok=True)
|
| 32 |
|
| 33 |
+
# Load LLaMA 2 (set to use CPU)
|
| 34 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
|
| 36 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32) # Ensure float32 for CPU
|
| 37 |
+
model.to("cpu") # Explicitly set the model to use the CPU
|
| 38 |
|
| 39 |
# Define utility functions
|
| 40 |
def compute_similarity(query_embedding, content_embeddings):
|
|
|
|
| 52 |
elif module == "paper":
|
| 53 |
return [{"title": os.path.basename(file_path), "url": None, "file_path": file_path, "type": "paper"}]
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def download_youtube_video(video_url, output_dir, title=None):
|
| 56 |
"""Download a YouTube video using yt_dlp."""
|
| 57 |
sanitized_title = re.sub(r'[\\/*?:"<>|]', '_', title) if title else "unknown_title"
|
|
|
|
| 138 |
print(f"Error downloading paper: {e}")
|
| 139 |
return metadata
|
| 140 |
|
|
|
|
| 141 |
def generate_llama_response(query, context=None):
|
| 142 |
"""Generate a response using LLaMA 2."""
|
| 143 |
input_text = f"Query: {query}\n"
|
| 144 |
if context:
|
| 145 |
input_text += f"Context: {context}\n"
|
| 146 |
input_text += "Answer:"
|
| 147 |
+
inputs = tokenizer(input_text, return_tensors="pt")
|
| 148 |
outputs = model.generate(inputs["input_ids"], max_length=500, temperature=0.7)
|
| 149 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 150 |
return response
|