Spaces:

sainathBelagavi
/

startupCaptain

Sleeping

App Files Files Community

startupCaptain / app.py

sainathBelagavi

Update app.py

896fecd verified almost 2 years ago

raw

history blame contribute delete

23.8 kB

	import streamlit as st
	from huggingface_hub import InferenceClient
	import wikipedia
	import re
	import requests
	from bs4 import BeautifulSoup
	import os
	import pickle
	from requests.exceptions import HTTPError
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
	from reportlab.lib.styles import getSampleStyleSheet
	from reportlab.lib import colors
	import tempfile

	base_url = "https://api-inference.huggingface.co/models/"
	API_KEY = os.environ.get('HUGGINGFACE_API_KEY')

	model_links = {
	"StartupCaptain🥇": base_url + "mistralai/Mistral-7B-Instruct-v0.2",
	}

	model_info = {
	"StartupCaptain🥇": {
	'description': """The StartupCaptain model is a Large Language Model (LLM) that's able to predict the success potential of Indian startups based on various factors as a Sucess full startup Founder.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights into the startup's likelihood of success.\n""",
	'logo': './captain.jpg'
	},
	}

	def format_prompt(startup_details):
	prompt = "[STARTUP_DETAILS]\n"
	for key, value in startup_details.items():
	if key == "funding_rounds":
	prompt += f"{key.capitalize()}:\n"
	for round_details in value:
	prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n"
	else:
	prompt += f"{key.capitalize()}: {value}\n"
	prompt += "[/STARTUP_DETAILS]\n"
	prompt += "Fill in any missing details and provide a comprehensive analysis."
	return prompt

	def reset_conversation():
	st.session_state.conversation = []
	st.session_state.messages = []
	st.session_state.chat_state = "reset"

	def load_conversation_history():
	history_file = "conversation_history.pickle"
	if os.path.exists(history_file):
	with open(history_file, "rb") as f:
	conversation_history = pickle.load(f)
	else:
	conversation_history = []
	return conversation_history

	def save_conversation_history(conversation_history):
	history_file = "conversation_history.pickle"
	with open(history_file, "wb") as f:
	pickle.dump(conversation_history, f)

	def extract_details_from_summary(summary):
	details = {
	"founded_year": "N/A",
	"location": "N/A",
	"financial_size": "N/A",
	"market_size": "N/A",
	"success_rate": "N/A"
	}
	year_match = re.search(r'(\d{4})', summary)
	if year_match:
	details["founded_year"] = year_match.group(1)

	location_match = re.search(r'headquartered in (.*?)[,.]', summary)
	if location_match:
	details["location"] = location_match.group(1)

	return details

	def scrape_startup_info(startup_name):
	startup_details = {}

	try:
	startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
	startup_details['name'] = startup_name
	startup_details['summary'] = startup_summary

	extracted_details = extract_details_from_summary(startup_summary)
	startup_details.update(extracted_details)

	except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
	pass

	if 'summary' not in startup_details:
	try:
	crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
	response = requests.get(crunchbase_url)
	if response.status_code == 200:
	soup = BeautifulSoup(response.content, "html.parser")
	startup_details["name"] = startup_name

	founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
	if founded_year_elem:
	startup_details["founded_year"] = int(founded_year_elem.text.strip())

	industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
	if industry_elem:
	startup_details["industry"] = industry_elem.text.strip()

	funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
	funding_rounds = []
	for round_elem in funding_rounds_elem:
	round_details = {}
	round_type = round_elem.select_one("span.component--field-formatter")
	if round_type:
	round_details["type"] = round_type.text.strip()
	round_amount = round_elem.select_one("span.component--field-formatter + span")
	if round_amount:
	round_details["amount"] = round_amount.text.strip()
	funding_rounds.append(round_details)
	startup_details["funding_rounds"] = funding_rounds
	except Exception as e:
	st.error(f"Error scraping Crunchbase: {e}")

	try:
	angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
	response = requests.get(angellist_url)
	if response.status_code == 200:
	soup = BeautifulSoup(response.content, "html.parser")

	team_members_elem = soup.select("div.team-member")
	team_members = []
	for member_elem in team_members_elem:
	member_name = member_elem.select_one("div.name")
	if member_name:
	team_members.append(member_name.text.strip())
	startup_details["team_members"] = team_members

	user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
	if user_growth_elem:
	startup_details["user_growth"] = user_growth_elem.text.strip()
	except Exception as e:
	st.error(f"Error scraping AngelList: {e}")

	if 'summary' not in startup_details:
	startup_details['summary'] = "N/A"
	if 'founded_year' not in startup_details:
	startup_details['founded_year'] = "N/A"
	if 'industry' not in startup_details:
	startup_details['industry'] = "N/A"
	if 'funding_rounds' not in startup_details:
	startup_details['funding_rounds'] = []
	if 'team_members' not in startup_details:
	startup_details['team_members'] = "N/A"
	if 'user_growth' not in startup_details:
	startup_details['user_growth'] = "N/A"

	startup_details['financial_size'] = "N/A"
	startup_details['market_size'] = "N/A"
	startup_details['success_rate'] = "N/A"
	startup_details['location'] = "N/A"

	return startup_details

	def wrap_text(text, width):
	words = text.split()
	lines = []
	current_line = []

	for word in words:
	if sum(len(w) for w in current_line) + len(current_line) + len(word) > width:
	lines.append(' '.join(current_line))
	current_line = [word]
	else:
	current_line.append(word)
	if current_line:
	lines.append(' '.join(current_line))

	return "\n".join(lines)

	def generate_pdf_report(startup_details):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	doc = SimpleDocTemplate(tmp_file.name, pagesize=letter)
	styles = getSampleStyleSheet()

	elements = []

	elements.append(Paragraph("Startup Report", styles['Title']))

	data = [
	["Field", "Details"],
	["Startup Name", startup_details['name']],
	["Summary", wrap_text(startup_details['summary'], 50)],
	["Financial Size", startup_details['financial_size']],
	["Market Size", startup_details['market_size']],
	["Success Rate", startup_details['success_rate']],
	["Founding Year", startup_details['founded_year']],
	["Location", startup_details['location']],
	["Funding Rounds", ""]
	]

	for round_details in startup_details['funding_rounds']:
	data.append(["", f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}"])

	data += [
	["Team Members", ", ".join(startup_details['team_members']) if isinstance(startup_details['team_members'], list) else startup_details['team_members']],
	["User Growth", startup_details['user_growth']]
	]

	table = Table(data, colWidths=[150, 350])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.grey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (-1, -1), 'LEFT'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, 0), 12),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('BACKGROUND', (0, 1), (-1, -1), colors.beige),
	('GRID', (0, 0), (-1, -1), 1, colors.black),
	]))

	elements.append(table)
	doc.build(elements)

	return tmp_file.name

	models = [key for key in model_links.keys()]
	selected_model = st.sidebar.selectbox("Select Model", models)
	temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
	st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button

	st.sidebar.write(f"You're now chatting with {selected_model}")
	st.sidebar.markdown(model_info[selected_model]['description'])
	st.sidebar.image(model_info[selected_model]['logo'])

	if "prev_option" not in st.session_state:
	st.session_state.prev_option = selected_model

	if st.session_state.prev_option != selected_model:
	st.session_state.messages = []
	st.session_state.prev_option = selected_model

	if "chat_state" not in st.session_state:
	st.session_state.chat_state = "normal"

	if "messages" not in st.session_state:
	st.session_state.messages = load_conversation_history()

	if "asked_questions" not in st.session_state:
	st.session_state.asked_questions = {}

	repo_id = model_links[selected_model]
	st.subheader(f'{selected_model}')

	if st.session_state.chat_state == "normal":
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):
	question_key = prompt.lower().strip()
	if "predict success of" in prompt.lower():
	if question_key in st.session_state.asked_questions:
	response = st.session_state.asked_questions[question_key]
	st.markdown(response)
	else:
	startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
	if startup_name_match:
	startup_name = startup_name_match.group(1).strip()
	startup_details = scrape_startup_info(startup_name)
	if startup_details:
	with st.chat_message("user"):
	st.markdown(prompt)

	st.session_state.messages.append({"role": "user", "content": prompt})
	conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
	custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, and as an experienced startup founder, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."

	formatted_text = format_prompt(startup_details)

	with st.chat_message("assistant"):
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 3000
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk

	lines = response.split('\n')
	for line in lines:
	if 'Financial Size' in line:
	startup_details['financial_size'] = line.split(': ')[1]
	elif 'Market Size' in line:
	startup_details['market_size'] = line.split(': ')[1]
	elif 'Success Rate' in line:
	startup_details['success_rate'] = line.split(': ')[1]
	elif 'Location' in line:
	startup_details['location'] = line.split(': ')[1]
	elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0:
	startup_details['funding_rounds'] = []
	for funding_line in lines[lines.index(line) + 1:]:
	if '- Type' in funding_line:
	round_details = {}
	round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1)
	round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1)
	startup_details['funding_rounds'].append(round_details)
	else:
	break

	st.markdown(f"Success Analysis for {startup_details['name']}\n\n{response}")
	st.session_state.asked_questions[question_key] = response
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.session_state.messages.append({"role": "assistant", "content": response})
	save_conversation_history(st.session_state.messages)
	else:
	st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.")
	else:
	if question_key in st.session_state.asked_questions:
	response = st.session_state.asked_questions[question_key]
	st.markdown(response)
	else:
	with st.chat_message("user"):
	st.markdown(prompt)

	st.session_state.messages.append({"role": "user", "content": prompt})
	conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]

	formatted_text = format_prompt({"question": prompt})

	with st.chat_message("assistant"):
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 3000
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk
	st.markdown(response)
	st.session_state.asked_questions[question_key] = response
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.session_state.messages.append({"role": "assistant", "content": response})
	save_conversation_history(st.session_state.messages)

	elif st.session_state.chat_state == "reset":
	st.session_state.chat_state = "normal"
	st.experimental_rerun()

	company_name_input = st.text_input('Enter the company name for the report:', key='company_name_input')
	if st.button('Download Report', key='download_report_button') and company_name_input:
	startup_details = scrape_startup_info(company_name_input)
	if startup_details:
	formatted_text = format_prompt(startup_details)
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 3000
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk

	lines = response.split('\n')
	for line in lines:
	if 'Financial Size' in line:
	startup_details['financial_size'] = line.split(': ')[1]
	elif 'Market Size' in line:
	startup_details['market_size'] = line.split(': ')[1]
	elif 'Success Rate' in line:
	startup_details['success_rate'] = line.split(': ')[1]
	elif 'Location' in line:
	startup_details['location'] = line.split(': ')[1]
	elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0:
	startup_details['funding_rounds'] = []
	for funding_line in lines[lines.index(line) + 1:]:
	if '- Type' in funding_line:
	round_details = {}
	round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1)
	round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1)
	startup_details['funding_rounds'].append(round_details)
	else:
	break

	pdf_file = generate_pdf_report(startup_details)
	with open(pdf_file, "rb") as file:
	st.download_button(
	label="Download PDF",
	data=file,
	file_name=f"{company_name_input}_report.pdf",
	mime="application/pdf"
	)
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.error(f"No information found for the company '{company_name_input}'.")

	company_name_input2 = st.text_input('Enter the company name for the report:', key='company_name_input2')
	if st.button('Download Report', key='download_report_button2') and company_name_input2:
	startup_details = scrape_startup_info(company_name_input2)
	if startup_details:
	formatted_text = format_prompt(startup_details)
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 3000
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk

	lines = response.split('\n')
	for line in lines:
	if 'Financial Size' in line:
	startup_details['financial_size'] = line.split(': ')[1]
	elif 'Market Size' in line:
	startup_details['market_size'] = line.split(': ')[1]
	elif 'Success Rate' in line:
	startup_details['success_rate'] = line.split(': ')[1]
	elif 'Location' in line:
	startup_details['location'] = line.split(': ')[1]
	elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0:
	startup_details['funding_rounds'] = []
	for funding_line in lines[lines.index(line) + 1:]:
	if '- Type' in funding_line:
	round_details = {}
	round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1)
	round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1)
	startup_details['funding_rounds'].append(round_details)
	else:
	break

	pdf_file = generate_pdf_report(startup_details)
	with open(pdf_file, "rb") as file:
	st.download_button(
	label="Download PDF",
	data=file,
	file_name=f"{company_name_input2}_report.pdf",
	mime="application/pdf"
	)
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.error(f"No information found for the company '{company_name_input2}'.")