Spaces:

avimittal30
/

fundamental_analysis

Sleeping

App Files Files Community

fundamental_analysis / helper.py

avimittal30

push files for app

0e2d97d 10 months ago

raw

history blame contribute delete

6.4 kB

	from langchain import OpenAI
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import CharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings
	from langchain_chroma import Chroma
	from dotenv import load_dotenv
	import os
	import requests
	import pandas as pd


	load_dotenv()

	OPENAI_API_KEY=os.getenv('OPENAI_KEY')
	AV_API_KEY = os.getenv('Alphavantage_key')

	llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1)

	def process_pdf(file_path):
	"""
	This function processes the uploaded PDF, splits it into text chunks,
	and stores them in a Chroma database using OpenAI embeddings.

	Args:
	file_path (str): The path to the uploaded PDF file.
	openai_api_key (str): Your OpenAI API key for embeddings.

	Returns:
	db: The Chroma database containing the embedded documents.
	"""
	# Set up OpenAI API key
	os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

	# Load the PDF file
	loader = PyPDFLoader(file_path)
	pages = loader.load_and_split()

	# Split text into chunks
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	documents = text_splitter.split_documents(pages)

	# Create a Chroma database from the documents using OpenAI embeddings
	embeddings = OpenAIEmbeddings()
	db = Chroma.from_documents(documents, embeddings)

	# Return the Chroma database
	return db


	from openai import OpenAI
	def get_model_response(query, context):
	load_dotenv()
	OPENAI_API_KEY = os.getenv('OPENAI_KEY')
	os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

	prompt = f"""
	You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context.
	You are given the following context:
	{context}
	You are asked to generate a short and accurate answer to the following question using the above context.
	question: {query}
	strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points.
	Remove any unwanted characters or symbols.
	"""

	client = OpenAI()
	response = client.chat.completions.create(
	model="gpt-4-1106-preview",
	max_tokens=1024,
	temperature=0,
	messages=[
	{"role": "system", "content": prompt}
	]
	)

	model_response = response.choices[0].message.content
	return model_response

	# query = "How has the performance been in this year compared to last year?"
	# docs = db.similarity_search(query)
	# print(docs[0].page_content)
	# context=docs[0].page_content


	def get_income_statement(symbol='INFY'):
	load_dotenv()


	url = "https://www.alphavantage.co/query"
	params = {
	"function": "INCOME_STATEMENT",
	"symbol": symbol,
	"apikey": AV_API_KEY
	}

	response = requests.get(url, params=params)
	if response.status_code == 200:
	data = response.json()
	if not data:
	print(f"No data found for {symbol}")
	return None

	rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []}
	for i in range(0, 9):
	rev['dates'].append(data['annualReports'][i]['fiscalDateEnding'])
	rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000)
	rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000)
	rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000)

	is_df = pd.DataFrame(rev)
	is_df= is_df.sort_values(by=['dates'], ascending=True)
	is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int)
	return is_df
	else:
	print(f"Error fetching data: {response.status_code}")
	return None



	def get_balance_sheet(symbol='INFY'):
	load_dotenv()

	url = "https://www.alphavantage.co/query"
	params = {
	"function": "BALANCE_SHEET",
	"symbol": symbol,
	"apikey": AV_API_KEY
	}

	response = requests.get(url, params=params)
	if response.status_code == 200:
	bs_data = response.json()
	if not bs_data:
	print(f"No data found for {symbol}")
	return None

	bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []}
	for i in range(0, 9):
	bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding'])
	long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0')
	bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0)
	bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000)
	bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000)

	bs_df = pd.DataFrame(bs)
	bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int)
	bs_df= bs_df.sort_values(by=['dates'], ascending=True)
	return bs_df
	else:
	print(f"Error fetching data: {response.status_code}")
	return None



	def get_ticker(company):
	# Define the desired market
	desired_market = 'India/Bombay'

	# API URL to search for the company symbol
	url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}'

	# Make a GET request
	r = requests.get(url)

	# Parse the JSON response
	data = r.json()

	# Iterate over the bestMatches to find the symbol for the desired market
	for match in data.get('bestMatches', []):
	if match['4. region'] == desired_market :
	symbol = match['1. symbol'].split('.')[0]
	print(f"The symbol for {desired_market} is: {symbol}")
	return symbol
	elif match['4. region'] == 'United States' :
	symbol = match['1. symbol']
	print(f"The symbol for {desired_market} is: {symbol}")
	return symbol
	else:
	print(f"No symbol found for the market: {desired_market}")
	return None