Spaces:

avimittal30
/

fundamental_analysis

Sleeping

File size: 6,395 Bytes

0e2d97d

from langchain import OpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
import os
import requests
import pandas as pd


load_dotenv()

OPENAI_API_KEY=os.getenv('OPENAI_KEY')
AV_API_KEY = os.getenv('Alphavantage_key')

llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1)

def process_pdf(file_path):
    """
    This function processes the uploaded PDF, splits it into text chunks, 
    and stores them in a Chroma database using OpenAI embeddings.

    Args:
        file_path (str): The path to the uploaded PDF file.
        openai_api_key (str): Your OpenAI API key for embeddings.

    Returns:
        db: The Chroma database containing the embedded documents.
    """
    # Set up OpenAI API key
    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

    # Load the PDF file
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()

    # Split text into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    documents = text_splitter.split_documents(pages)

    # Create a Chroma database from the documents using OpenAI embeddings
    embeddings = OpenAIEmbeddings()
    db = Chroma.from_documents(documents, embeddings)

    # Return the Chroma database
    return db


from openai import OpenAI
def get_model_response(query, context):
    load_dotenv()
    OPENAI_API_KEY = os.getenv('OPENAI_KEY')
    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

    prompt = f"""
            You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context.
            You are given the following context:
            {context}
            You are asked to generate a short and accurate answer to the following question using the above context.
            question: {query}
            strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points.
            Remove any unwanted characters or symbols.
            """
    
    client = OpenAI()
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        max_tokens=1024,
        temperature=0,
        messages=[
            {"role": "system", "content": prompt}
        ]
    )
    
    model_response = response.choices[0].message.content
    return model_response

# query = "How has the performance been in this year compared to last year?"
# docs = db.similarity_search(query)
# print(docs[0].page_content)
# context=docs[0].page_content


def get_income_statement(symbol='INFY'):
    load_dotenv()
    
    
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "INCOME_STATEMENT",
        "symbol": symbol,
        "apikey": AV_API_KEY
    }

    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if not data:
            print(f"No data found for {symbol}")
            return None

        rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []}
        for i in range(0, 9):
            rev['dates'].append(data['annualReports'][i]['fiscalDateEnding'])
            rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000)
            rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000)
            rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000)
        
        is_df = pd.DataFrame(rev)
        is_df= is_df.sort_values(by=['dates'], ascending=True)
        is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int)
        return is_df
    else:
        print(f"Error fetching data: {response.status_code}")
        return None



def get_balance_sheet(symbol='INFY'):
    load_dotenv()
    
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "BALANCE_SHEET",
        "symbol": symbol,
        "apikey": AV_API_KEY
    }

    response = requests.get(url, params=params)
    if response.status_code == 200:
        bs_data = response.json()
        if not bs_data:
            print(f"No data found for {symbol}")
            return None

        bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []}
        for i in range(0, 9):
            bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding'])
            long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0')
            bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0)
            bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000)
            bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000)
        
        bs_df = pd.DataFrame(bs)
        bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int)
        bs_df= bs_df.sort_values(by=['dates'], ascending=True)
        return bs_df
    else:
        print(f"Error fetching data: {response.status_code}")
        return None



def get_ticker(company):
    # Define the desired market
    desired_market = 'India/Bombay'

    # API URL to search for the company symbol
    url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}'
    
    # Make a GET request
    r = requests.get(url)
    
    # Parse the JSON response
    data = r.json()
    
    # Iterate over the bestMatches to find the symbol for the desired market
    for match in data.get('bestMatches', []):
        if match['4. region'] == desired_market   :
            symbol = match['1. symbol'].split('.')[0]
            print(f"The symbol for {desired_market} is: {symbol}")
            return symbol
        elif match['4. region'] == 'United States'   :
            symbol = match['1. symbol']
            print(f"The symbol for {desired_market} is: {symbol}")
            return symbol
    else:
        print(f"No symbol found for the market: {desired_market}")
        return None