Spaces:
Sleeping
Sleeping
| from langchain import OpenAI | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_chroma import Chroma | |
| from dotenv import load_dotenv | |
| import os | |
| import requests | |
| import pandas as pd | |
| load_dotenv() | |
| OPENAI_API_KEY=os.getenv('OPENAI_KEY') | |
| AV_API_KEY = os.getenv('Alphavantage_key') | |
| llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1) | |
| def process_pdf(file_path): | |
| """ | |
| This function processes the uploaded PDF, splits it into text chunks, | |
| and stores them in a Chroma database using OpenAI embeddings. | |
| Args: | |
| file_path (str): The path to the uploaded PDF file. | |
| openai_api_key (str): Your OpenAI API key for embeddings. | |
| Returns: | |
| db: The Chroma database containing the embedded documents. | |
| """ | |
| # Set up OpenAI API key | |
| os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
| # Load the PDF file | |
| loader = PyPDFLoader(file_path) | |
| pages = loader.load_and_split() | |
| # Split text into chunks | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| documents = text_splitter.split_documents(pages) | |
| # Create a Chroma database from the documents using OpenAI embeddings | |
| embeddings = OpenAIEmbeddings() | |
| db = Chroma.from_documents(documents, embeddings) | |
| # Return the Chroma database | |
| return db | |
| from openai import OpenAI | |
| def get_model_response(query, context): | |
| load_dotenv() | |
| OPENAI_API_KEY = os.getenv('OPENAI_KEY') | |
| os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
| prompt = f""" | |
| You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context. | |
| You are given the following context: | |
| {context} | |
| You are asked to generate a short and accurate answer to the following question using the above context. | |
| question: {query} | |
| strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points. | |
| Remove any unwanted characters or symbols. | |
| """ | |
| client = OpenAI() | |
| response = client.chat.completions.create( | |
| model="gpt-4-1106-preview", | |
| max_tokens=1024, | |
| temperature=0, | |
| messages=[ | |
| {"role": "system", "content": prompt} | |
| ] | |
| ) | |
| model_response = response.choices[0].message.content | |
| return model_response | |
| # query = "How has the performance been in this year compared to last year?" | |
| # docs = db.similarity_search(query) | |
| # print(docs[0].page_content) | |
| # context=docs[0].page_content | |
| def get_income_statement(symbol='INFY'): | |
| load_dotenv() | |
| url = "https://www.alphavantage.co/query" | |
| params = { | |
| "function": "INCOME_STATEMENT", | |
| "symbol": symbol, | |
| "apikey": AV_API_KEY | |
| } | |
| response = requests.get(url, params=params) | |
| if response.status_code == 200: | |
| data = response.json() | |
| if not data: | |
| print(f"No data found for {symbol}") | |
| return None | |
| rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []} | |
| for i in range(0, 9): | |
| rev['dates'].append(data['annualReports'][i]['fiscalDateEnding']) | |
| rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000) | |
| rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000) | |
| rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000) | |
| is_df = pd.DataFrame(rev) | |
| is_df= is_df.sort_values(by=['dates'], ascending=True) | |
| is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int) | |
| return is_df | |
| else: | |
| print(f"Error fetching data: {response.status_code}") | |
| return None | |
| def get_balance_sheet(symbol='INFY'): | |
| load_dotenv() | |
| url = "https://www.alphavantage.co/query" | |
| params = { | |
| "function": "BALANCE_SHEET", | |
| "symbol": symbol, | |
| "apikey": AV_API_KEY | |
| } | |
| response = requests.get(url, params=params) | |
| if response.status_code == 200: | |
| bs_data = response.json() | |
| if not bs_data: | |
| print(f"No data found for {symbol}") | |
| return None | |
| bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []} | |
| for i in range(0, 9): | |
| bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding']) | |
| long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0') | |
| bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0) | |
| bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000) | |
| bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000) | |
| bs_df = pd.DataFrame(bs) | |
| bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int) | |
| bs_df= bs_df.sort_values(by=['dates'], ascending=True) | |
| return bs_df | |
| else: | |
| print(f"Error fetching data: {response.status_code}") | |
| return None | |
| def get_ticker(company): | |
| # Define the desired market | |
| desired_market = 'India/Bombay' | |
| # API URL to search for the company symbol | |
| url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}' | |
| # Make a GET request | |
| r = requests.get(url) | |
| # Parse the JSON response | |
| data = r.json() | |
| # Iterate over the bestMatches to find the symbol for the desired market | |
| for match in data.get('bestMatches', []): | |
| if match['4. region'] == desired_market : | |
| symbol = match['1. symbol'].split('.')[0] | |
| print(f"The symbol for {desired_market} is: {symbol}") | |
| return symbol | |
| elif match['4. region'] == 'United States' : | |
| symbol = match['1. symbol'] | |
| print(f"The symbol for {desired_market} is: {symbol}") | |
| return symbol | |
| else: | |
| print(f"No symbol found for the market: {desired_market}") | |
| return None | |