avimittal30's picture
push files for app
0e2d97d
from langchain import OpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
import os
import requests
import pandas as pd
load_dotenv()
OPENAI_API_KEY=os.getenv('OPENAI_KEY')
AV_API_KEY = os.getenv('Alphavantage_key')
llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1)
def process_pdf(file_path):
"""
This function processes the uploaded PDF, splits it into text chunks,
and stores them in a Chroma database using OpenAI embeddings.
Args:
file_path (str): The path to the uploaded PDF file.
openai_api_key (str): Your OpenAI API key for embeddings.
Returns:
db: The Chroma database containing the embedded documents.
"""
# Set up OpenAI API key
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
# Load the PDF file
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
# Split text into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(pages)
# Create a Chroma database from the documents using OpenAI embeddings
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(documents, embeddings)
# Return the Chroma database
return db
from openai import OpenAI
def get_model_response(query, context):
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
prompt = f"""
You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context.
You are given the following context:
{context}
You are asked to generate a short and accurate answer to the following question using the above context.
question: {query}
strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points.
Remove any unwanted characters or symbols.
"""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4-1106-preview",
max_tokens=1024,
temperature=0,
messages=[
{"role": "system", "content": prompt}
]
)
model_response = response.choices[0].message.content
return model_response
# query = "How has the performance been in this year compared to last year?"
# docs = db.similarity_search(query)
# print(docs[0].page_content)
# context=docs[0].page_content
def get_income_statement(symbol='INFY'):
load_dotenv()
url = "https://www.alphavantage.co/query"
params = {
"function": "INCOME_STATEMENT",
"symbol": symbol,
"apikey": AV_API_KEY
}
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
if not data:
print(f"No data found for {symbol}")
return None
rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []}
for i in range(0, 9):
rev['dates'].append(data['annualReports'][i]['fiscalDateEnding'])
rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000)
rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000)
rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000)
is_df = pd.DataFrame(rev)
is_df= is_df.sort_values(by=['dates'], ascending=True)
is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int)
return is_df
else:
print(f"Error fetching data: {response.status_code}")
return None
def get_balance_sheet(symbol='INFY'):
load_dotenv()
url = "https://www.alphavantage.co/query"
params = {
"function": "BALANCE_SHEET",
"symbol": symbol,
"apikey": AV_API_KEY
}
response = requests.get(url, params=params)
if response.status_code == 200:
bs_data = response.json()
if not bs_data:
print(f"No data found for {symbol}")
return None
bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []}
for i in range(0, 9):
bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding'])
long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0')
bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0)
bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000)
bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000)
bs_df = pd.DataFrame(bs)
bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int)
bs_df= bs_df.sort_values(by=['dates'], ascending=True)
return bs_df
else:
print(f"Error fetching data: {response.status_code}")
return None
def get_ticker(company):
# Define the desired market
desired_market = 'India/Bombay'
# API URL to search for the company symbol
url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}'
# Make a GET request
r = requests.get(url)
# Parse the JSON response
data = r.json()
# Iterate over the bestMatches to find the symbol for the desired market
for match in data.get('bestMatches', []):
if match['4. region'] == desired_market :
symbol = match['1. symbol'].split('.')[0]
print(f"The symbol for {desired_market} is: {symbol}")
return symbol
elif match['4. region'] == 'United States' :
symbol = match['1. symbol']
print(f"The symbol for {desired_market} is: {symbol}")
return symbol
else:
print(f"No symbol found for the market: {desired_market}")
return None