Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,9 +3,7 @@ import gradio as gr
|
|
| 3 |
import torch
|
| 4 |
import pandas as pd
|
| 5 |
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
|
| 6 |
-
from datasets import Dataset
|
| 7 |
import yfinance as yf
|
| 8 |
-
import numpy as np
|
| 9 |
|
| 10 |
# Function to fetch and preprocess ICICI Bank data
|
| 11 |
def fetch_and_preprocess_data():
|
|
@@ -26,51 +24,20 @@ def fetch_and_preprocess_data():
|
|
| 26 |
print(f"Error fetching data: {e}")
|
| 27 |
return pd.DataFrame() # Return an empty DataFrame if fetching fails
|
| 28 |
|
| 29 |
-
# Function to create and save a custom index for the retriever
|
| 30 |
-
def create_custom_index():
|
| 31 |
-
# Fetch and preprocess data
|
| 32 |
-
data = fetch_and_preprocess_data()
|
| 33 |
-
|
| 34 |
-
if data.empty:
|
| 35 |
-
raise ValueError("No data available to create the index.")
|
| 36 |
-
|
| 37 |
-
# Create a dataset for the retriever
|
| 38 |
-
dataset = Dataset.from_dict({
|
| 39 |
-
"id": [str(i) for i in range(len(data))],
|
| 40 |
-
"text": data.apply(lambda row: f"Date: {row.name}, Close: {row['Close']:.2f}, MA_50: {row['MA_50']:.2f}, MA_200: {row['MA_200']:.2f}", axis=1).tolist(),
|
| 41 |
-
"title": [f"ICICI Bank Data {i}" for i in range(len(data))]
|
| 42 |
-
})
|
| 43 |
-
|
| 44 |
-
# Save the dataset and index
|
| 45 |
-
dataset_path = "icici_bank_dataset"
|
| 46 |
-
index_path = "icici_bank_index"
|
| 47 |
-
dataset.save_to_disk(dataset_path)
|
| 48 |
-
print(f"Dataset saved to {dataset_path}")
|
| 49 |
-
|
| 50 |
-
# Add FAISS index
|
| 51 |
-
dataset.add_faiss_index("text")
|
| 52 |
-
dataset.get_index("text").save(index_path)
|
| 53 |
-
print(f"FAISS index saved to {index_path}")
|
| 54 |
-
|
| 55 |
-
return dataset_path, index_path
|
| 56 |
-
|
| 57 |
# Load the fine-tuned RAG model and tokenizer
|
| 58 |
-
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-base")
|
| 59 |
-
print("Tokenizer loaded successfully.")
|
| 60 |
-
|
| 61 |
try:
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
#
|
| 66 |
retriever = RagRetriever.from_pretrained(
|
| 67 |
"facebook/rag-sequence-base",
|
| 68 |
-
index_name="
|
| 69 |
-
passages_path=
|
| 70 |
-
index_path=
|
| 71 |
)
|
| 72 |
print("Retriever loaded successfully.")
|
| 73 |
-
|
| 74 |
# Load the RAG model
|
| 75 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-base", retriever=retriever)
|
| 76 |
print("Model loaded successfully.")
|
|
|
|
| 3 |
import torch
|
| 4 |
import pandas as pd
|
| 5 |
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
|
|
|
|
| 6 |
import yfinance as yf
|
|
|
|
| 7 |
|
| 8 |
# Function to fetch and preprocess ICICI Bank data
|
| 9 |
def fetch_and_preprocess_data():
|
|
|
|
| 24 |
print(f"Error fetching data: {e}")
|
| 25 |
return pd.DataFrame() # Return an empty DataFrame if fetching fails
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Load the fine-tuned RAG model and tokenizer
|
|
|
|
|
|
|
|
|
|
| 28 |
try:
|
| 29 |
+
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-base")
|
| 30 |
+
print("Tokenizer loaded successfully.")
|
| 31 |
+
|
| 32 |
+
# Use a pre-built index (e.g., wiki_dpr) instead of creating a custom index
|
| 33 |
retriever = RagRetriever.from_pretrained(
|
| 34 |
"facebook/rag-sequence-base",
|
| 35 |
+
index_name="wiki_dpr",
|
| 36 |
+
passages_path=None,
|
| 37 |
+
index_path=None
|
| 38 |
)
|
| 39 |
print("Retriever loaded successfully.")
|
| 40 |
+
|
| 41 |
# Load the RAG model
|
| 42 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-base", retriever=retriever)
|
| 43 |
print("Model loaded successfully.")
|