Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from huggingface_hub import InferenceClient | |
| import wikipedia | |
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| import pickle | |
| from requests.exceptions import HTTPError | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.lib import colors | |
| import tempfile | |
| base_url = "https://api-inference.huggingface.co/models/" | |
| API_KEY = os.environ.get('HUGGINGFACE_API_KEY') | |
| model_links = { | |
| "StartupCaptain🥇": base_url + "mistralai/Mistral-7B-Instruct-v0.2", | |
| } | |
| model_info = { | |
| "StartupCaptain🥇": { | |
| 'description': """The StartupCaptain model is a Large Language Model (LLM) that's able to predict the success potential of Indian startups based on various factors as a Sucess full startup Founder.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights into the startup's likelihood of success.\n""", | |
| 'logo': './captain.jpg' | |
| }, | |
| } | |
| def format_prompt(startup_details): | |
| prompt = "[STARTUP_DETAILS]\n" | |
| for key, value in startup_details.items(): | |
| if key == "funding_rounds": | |
| prompt += f"{key.capitalize()}:\n" | |
| for round_details in value: | |
| prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n" | |
| else: | |
| prompt += f"{key.capitalize()}: {value}\n" | |
| prompt += "[/STARTUP_DETAILS]\n" | |
| prompt += "Fill in any missing details and provide a comprehensive analysis." | |
| return prompt | |
| def reset_conversation(): | |
| st.session_state.conversation = [] | |
| st.session_state.messages = [] | |
| st.session_state.chat_state = "reset" | |
| def load_conversation_history(): | |
| history_file = "conversation_history.pickle" | |
| if os.path.exists(history_file): | |
| with open(history_file, "rb") as f: | |
| conversation_history = pickle.load(f) | |
| else: | |
| conversation_history = [] | |
| return conversation_history | |
| def save_conversation_history(conversation_history): | |
| history_file = "conversation_history.pickle" | |
| with open(history_file, "wb") as f: | |
| pickle.dump(conversation_history, f) | |
| def extract_details_from_summary(summary): | |
| details = { | |
| "founded_year": "N/A", | |
| "location": "N/A", | |
| "financial_size": "N/A", | |
| "market_size": "N/A", | |
| "success_rate": "N/A" | |
| } | |
| year_match = re.search(r'(\d{4})', summary) | |
| if year_match: | |
| details["founded_year"] = year_match.group(1) | |
| location_match = re.search(r'headquartered in (.*?)[,.]', summary) | |
| if location_match: | |
| details["location"] = location_match.group(1) | |
| return details | |
| def scrape_startup_info(startup_name): | |
| startup_details = {} | |
| try: | |
| startup_summary = wikipedia.summary(startup_name, auto_suggest=False) | |
| startup_details['name'] = startup_name | |
| startup_details['summary'] = startup_summary | |
| extracted_details = extract_details_from_summary(startup_summary) | |
| startup_details.update(extracted_details) | |
| except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError): | |
| pass | |
| if 'summary' not in startup_details: | |
| try: | |
| crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}" | |
| response = requests.get(crunchbase_url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| startup_details["name"] = startup_name | |
| founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter") | |
| if founded_year_elem: | |
| startup_details["founded_year"] = int(founded_year_elem.text.strip()) | |
| industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter") | |
| if industry_elem: | |
| startup_details["industry"] = industry_elem.text.strip() | |
| funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li") | |
| funding_rounds = [] | |
| for round_elem in funding_rounds_elem: | |
| round_details = {} | |
| round_type = round_elem.select_one("span.component--field-formatter") | |
| if round_type: | |
| round_details["type"] = round_type.text.strip() | |
| round_amount = round_elem.select_one("span.component--field-formatter + span") | |
| if round_amount: | |
| round_details["amount"] = round_amount.text.strip() | |
| funding_rounds.append(round_details) | |
| startup_details["funding_rounds"] = funding_rounds | |
| except Exception as e: | |
| st.error(f"Error scraping Crunchbase: {e}") | |
| try: | |
| angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}" | |
| response = requests.get(angellist_url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| team_members_elem = soup.select("div.team-member") | |
| team_members = [] | |
| for member_elem in team_members_elem: | |
| member_name = member_elem.select_one("div.name") | |
| if member_name: | |
| team_members.append(member_name.text.strip()) | |
| startup_details["team_members"] = team_members | |
| user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline") | |
| if user_growth_elem: | |
| startup_details["user_growth"] = user_growth_elem.text.strip() | |
| except Exception as e: | |
| st.error(f"Error scraping AngelList: {e}") | |
| if 'summary' not in startup_details: | |
| startup_details['summary'] = "N/A" | |
| if 'founded_year' not in startup_details: | |
| startup_details['founded_year'] = "N/A" | |
| if 'industry' not in startup_details: | |
| startup_details['industry'] = "N/A" | |
| if 'funding_rounds' not in startup_details: | |
| startup_details['funding_rounds'] = [] | |
| if 'team_members' not in startup_details: | |
| startup_details['team_members'] = "N/A" | |
| if 'user_growth' not in startup_details: | |
| startup_details['user_growth'] = "N/A" | |
| startup_details['financial_size'] = "N/A" | |
| startup_details['market_size'] = "N/A" | |
| startup_details['success_rate'] = "N/A" | |
| startup_details['location'] = "N/A" | |
| return startup_details | |
| def wrap_text(text, width): | |
| words = text.split() | |
| lines = [] | |
| current_line = [] | |
| for word in words: | |
| if sum(len(w) for w in current_line) + len(current_line) + len(word) > width: | |
| lines.append(' '.join(current_line)) | |
| current_line = [word] | |
| else: | |
| current_line.append(word) | |
| if current_line: | |
| lines.append(' '.join(current_line)) | |
| return "\n".join(lines) | |
| def generate_pdf_report(startup_details): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| doc = SimpleDocTemplate(tmp_file.name, pagesize=letter) | |
| styles = getSampleStyleSheet() | |
| elements = [] | |
| elements.append(Paragraph("Startup Report", styles['Title'])) | |
| data = [ | |
| ["Field", "Details"], | |
| ["Startup Name", startup_details['name']], | |
| ["Summary", wrap_text(startup_details['summary'], 50)], | |
| ["Financial Size", startup_details['financial_size']], | |
| ["Market Size", startup_details['market_size']], | |
| ["Success Rate", startup_details['success_rate']], | |
| ["Founding Year", startup_details['founded_year']], | |
| ["Location", startup_details['location']], | |
| ["Funding Rounds", ""] | |
| ] | |
| for round_details in startup_details['funding_rounds']: | |
| data.append(["", f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}"]) | |
| data += [ | |
| ["Team Members", ", ".join(startup_details['team_members']) if isinstance(startup_details['team_members'], list) else startup_details['team_members']], | |
| ["User Growth", startup_details['user_growth']] | |
| ] | |
| table = Table(data, colWidths=[150, 350]) | |
| table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (-1, 0), colors.grey), | |
| ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
| ('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
| ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), | |
| ('FONTSIZE', (0, 0), (-1, 0), 12), | |
| ('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
| ('BACKGROUND', (0, 1), (-1, -1), colors.beige), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ])) | |
| elements.append(table) | |
| doc.build(elements) | |
| return tmp_file.name | |
| models = [key for key in model_links.keys()] | |
| selected_model = st.sidebar.selectbox("Select Model", models) | |
| temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5) | |
| st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button | |
| st.sidebar.write(f"You're now chatting with {selected_model}") | |
| st.sidebar.markdown(model_info[selected_model]['description']) | |
| st.sidebar.image(model_info[selected_model]['logo']) | |
| if "prev_option" not in st.session_state: | |
| st.session_state.prev_option = selected_model | |
| if st.session_state.prev_option != selected_model: | |
| st.session_state.messages = [] | |
| st.session_state.prev_option = selected_model | |
| if "chat_state" not in st.session_state: | |
| st.session_state.chat_state = "normal" | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = load_conversation_history() | |
| if "asked_questions" not in st.session_state: | |
| st.session_state.asked_questions = {} | |
| repo_id = model_links[selected_model] | |
| st.subheader(f'{selected_model}') | |
| if st.session_state.chat_state == "normal": | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"): | |
| question_key = prompt.lower().strip() | |
| if "predict success of" in prompt.lower(): | |
| if question_key in st.session_state.asked_questions: | |
| response = st.session_state.asked_questions[question_key] | |
| st.markdown(response) | |
| else: | |
| startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE) | |
| if startup_name_match: | |
| startup_name = startup_name_match.group(1).strip() | |
| startup_details = scrape_startup_info(startup_name) | |
| if startup_details: | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
| custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, and as an experienced startup founder, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not." | |
| formatted_text = format_prompt(startup_details) | |
| with st.chat_message("assistant"): | |
| client = InferenceClient(model=model_links[selected_model]) | |
| max_new_tokens = 3000 | |
| try: | |
| output = client.text_generation( | |
| formatted_text, | |
| temperature=temp_values, | |
| max_new_tokens=max_new_tokens, | |
| stream=True | |
| ) | |
| response = "" | |
| for output_chunk in output: | |
| if isinstance(output_chunk, dict) and "text" in output_chunk: | |
| response += output_chunk["text"] | |
| else: | |
| response += output_chunk | |
| lines = response.split('\n') | |
| for line in lines: | |
| if 'Financial Size' in line: | |
| startup_details['financial_size'] = line.split(': ')[1] | |
| elif 'Market Size' in line: | |
| startup_details['market_size'] = line.split(': ')[1] | |
| elif 'Success Rate' in line: | |
| startup_details['success_rate'] = line.split(': ')[1] | |
| elif 'Location' in line: | |
| startup_details['location'] = line.split(': ')[1] | |
| elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0: | |
| startup_details['funding_rounds'] = [] | |
| for funding_line in lines[lines.index(line) + 1:]: | |
| if '- Type' in funding_line: | |
| round_details = {} | |
| round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1) | |
| round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1) | |
| startup_details['funding_rounds'].append(round_details) | |
| else: | |
| break | |
| st.markdown(f"Success Analysis for {startup_details['name']}\n\n{response}") | |
| st.session_state.asked_questions[question_key] = response | |
| except ValueError as e: | |
| if "Input validation error" in str(e): | |
| st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
| else: | |
| st.error(f"An error occurred: {e}") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| else: | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| save_conversation_history(st.session_state.messages) | |
| else: | |
| st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.") | |
| else: | |
| if question_key in st.session_state.asked_questions: | |
| response = st.session_state.asked_questions[question_key] | |
| st.markdown(response) | |
| else: | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
| formatted_text = format_prompt({"question": prompt}) | |
| with st.chat_message("assistant"): | |
| client = InferenceClient(model=model_links[selected_model]) | |
| max_new_tokens = 3000 | |
| try: | |
| output = client.text_generation( | |
| formatted_text, | |
| temperature=temp_values, | |
| max_new_tokens=max_new_tokens, | |
| stream=True | |
| ) | |
| response = "" | |
| for output_chunk in output: | |
| if isinstance(output_chunk, dict) and "text" in output_chunk: | |
| response += output_chunk["text"] | |
| else: | |
| response += output_chunk | |
| st.markdown(response) | |
| st.session_state.asked_questions[question_key] = response | |
| except ValueError as e: | |
| if "Input validation error" in str(e): | |
| st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
| else: | |
| st.error(f"An error occurred: {e}") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| else: | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| save_conversation_history(st.session_state.messages) | |
| elif st.session_state.chat_state == "reset": | |
| st.session_state.chat_state = "normal" | |
| st.experimental_rerun() | |
| company_name_input = st.text_input('Enter the company name for the report:', key='company_name_input') | |
| if st.button('Download Report', key='download_report_button') and company_name_input: | |
| startup_details = scrape_startup_info(company_name_input) | |
| if startup_details: | |
| formatted_text = format_prompt(startup_details) | |
| client = InferenceClient(model=model_links[selected_model]) | |
| max_new_tokens = 3000 | |
| try: | |
| output = client.text_generation( | |
| formatted_text, | |
| temperature=temp_values, | |
| max_new_tokens=max_new_tokens, | |
| stream=True | |
| ) | |
| response = "" | |
| for output_chunk in output: | |
| if isinstance(output_chunk, dict) and "text" in output_chunk: | |
| response += output_chunk["text"] | |
| else: | |
| response += output_chunk | |
| lines = response.split('\n') | |
| for line in lines: | |
| if 'Financial Size' in line: | |
| startup_details['financial_size'] = line.split(': ')[1] | |
| elif 'Market Size' in line: | |
| startup_details['market_size'] = line.split(': ')[1] | |
| elif 'Success Rate' in line: | |
| startup_details['success_rate'] = line.split(': ')[1] | |
| elif 'Location' in line: | |
| startup_details['location'] = line.split(': ')[1] | |
| elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0: | |
| startup_details['funding_rounds'] = [] | |
| for funding_line in lines[lines.index(line) + 1:]: | |
| if '- Type' in funding_line: | |
| round_details = {} | |
| round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1) | |
| round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1) | |
| startup_details['funding_rounds'].append(round_details) | |
| else: | |
| break | |
| pdf_file = generate_pdf_report(startup_details) | |
| with open(pdf_file, "rb") as file: | |
| st.download_button( | |
| label="Download PDF", | |
| data=file, | |
| file_name=f"{company_name_input}_report.pdf", | |
| mime="application/pdf" | |
| ) | |
| except ValueError as e: | |
| if "Input validation error" in str(e): | |
| st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
| else: | |
| st.error(f"An error occurred: {e}") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| else: | |
| st.error(f"No information found for the company '{company_name_input}'.") | |
| company_name_input2 = st.text_input('Enter the company name for the report:', key='company_name_input2') | |
| if st.button('Download Report', key='download_report_button2') and company_name_input2: | |
| startup_details = scrape_startup_info(company_name_input2) | |
| if startup_details: | |
| formatted_text = format_prompt(startup_details) | |
| client = InferenceClient(model=model_links[selected_model]) | |
| max_new_tokens = 3000 | |
| try: | |
| output = client.text_generation( | |
| formatted_text, | |
| temperature=temp_values, | |
| max_new_tokens=max_new_tokens, | |
| stream=True | |
| ) | |
| response = "" | |
| for output_chunk in output: | |
| if isinstance(output_chunk, dict) and "text" in output_chunk: | |
| response += output_chunk["text"] | |
| else: | |
| response += output_chunk | |
| lines = response.split('\n') | |
| for line in lines: | |
| if 'Financial Size' in line: | |
| startup_details['financial_size'] = line.split(': ')[1] | |
| elif 'Market Size' in line: | |
| startup_details['market_size'] = line.split(': ')[1] | |
| elif 'Success Rate' in line: | |
| startup_details['success_rate'] = line.split(': ')[1] | |
| elif 'Location' in line: | |
| startup_details['location'] = line.split(': ')[1] | |
| elif 'Funding Rounds' in line and len(startup_details['funding_rounds']) == 0: | |
| startup_details['funding_rounds'] = [] | |
| for funding_line in lines[lines.index(line) + 1:]: | |
| if '- Type' in funding_line: | |
| round_details = {} | |
| round_details['type'] = re.search(r'Type: (.*?),', funding_line).group(1) | |
| round_details['amount'] = re.search(r'Amount: (.*)', funding_line).group(1) | |
| startup_details['funding_rounds'].append(round_details) | |
| else: | |
| break | |
| pdf_file = generate_pdf_report(startup_details) | |
| with open(pdf_file, "rb") as file: | |
| st.download_button( | |
| label="Download PDF", | |
| data=file, | |
| file_name=f"{company_name_input2}_report.pdf", | |
| mime="application/pdf" | |
| ) | |
| except ValueError as e: | |
| if "Input validation error" in str(e): | |
| st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
| else: | |
| st.error(f"An error occurred: {e}") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| else: | |
| st.error(f"No information found for the company '{company_name_input2}'.") |