karcadan-unicorn commited on
Commit
c8440e8
·
verified ·
1 Parent(s): e425757

Upload 7 files

Browse files
Dockerfile CHANGED
@@ -17,4 +17,4 @@ WORKDIR $HOME/app
17
 
18
  COPY --chown=user . $HOME/app
19
 
20
- CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
17
 
18
  COPY --chown=user . $HOME/app
19
 
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
customgraph.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nest_asyncio
2
+ nest_asyncio.apply()
3
+
4
+ from scrapegraphai.graphs import SmartScraperMultiGraph
5
+ from scrapegraphai.nodes import FetchNode, ParseNode
6
+ from langchain.schema import Document
7
+
8
+ # Create a custom graph class
9
+ class CustomSmartScraperMultiGraph(SmartScraperMultiGraph):
10
+ def run(self):
11
+ # Fetch data from the URL
12
+ url_data = ""
13
+ for source in self.source:
14
+ if isinstance(source, str) and source.startswith("http"):
15
+ fetch_node = FetchNode( input="url | local_dir",
16
+ output=["doc", "link_urls", "img_urls"],
17
+ node_config={
18
+ "verbose": True,
19
+ "headless": True,})
20
+
21
+ url_data = fetch_node.execute({"url": source})
22
+
23
+ parse_node = ParseNode(
24
+ input="doc",
25
+ output=["parsed_doc"],
26
+ node_config={
27
+ "chunk_size": 4096,
28
+ "verbose": True,
29
+ }
30
+ )
31
+
32
+ parsed_doc = parse_node.execute({"doc": url_data["doc"]})
33
+
34
+ break # Assuming only one URL needs to be fetched
35
+
36
+ # Combine URL data with Document data
37
+ combined_data = ""
38
+ for source in self.source:
39
+ if isinstance(source, Document):
40
+ combined_data += source.page_content
41
+ combined_data += parsed_doc['parsed_doc'][0]
42
+
43
+
44
+ return combined_data
45
+
46
+
47
+ def get_data(pdf_doc, web_url,openai_key):
48
+
49
+ graph_config = {
50
+ "llm": {
51
+ "api_key": openai_key,
52
+ "model": "gpt-4o",
53
+ },
54
+ "verbose": True
55
+
56
+ }
57
+
58
+ sources = [
59
+ web_url,
60
+ Document(page_content=pdf_doc, metadata={"source": "local_content"})
61
+ ]
62
+
63
+ prompt = "give an indepth analysis"
64
+
65
+ # Instantiate the custom graph
66
+ multiple_search_graph = CustomSmartScraperMultiGraph(
67
+ prompt=prompt,
68
+ source=sources,
69
+ config=graph_config
70
+ )
71
+
72
+ # Run the graph and print the result
73
+ result = multiple_search_graph.run()
74
+ return result
main.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi import File, UploadFile
6
+ from kor.extraction import create_extraction_chain
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_community.document_loaders import PyPDFLoader
9
+ import json
10
+ from scrape_parse_combine import scrape_parse_combine
11
+ import schema_quick_analysis
12
+ import schema_indepth_analysis
13
+
14
+
15
+ def configure():
16
+ load_dotenv()
17
+
18
+ configure()
19
+ app = FastAPI()
20
+ openai_key = os.getenv("openai_key")
21
+
22
+ llm = ChatOpenAI(
23
+ model_name="gpt-4o",
24
+ temperature=0,
25
+ max_tokens=2000,
26
+ openai_api_key=openai_key
27
+ )
28
+
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+
38
+ @app.get("/ping")
39
+ async def ping():
40
+ return "Hello, I am alive"
41
+
42
+ # Helper function. Upload a pdf_file and save it
43
+ def upload(file):
44
+ file_name = ""
45
+ try:
46
+ contents = file.file.read()
47
+ with open(file.filename, 'wb') as f:
48
+ f.write(contents)
49
+ except Exception:
50
+ return {"message": "There was an error uploading the file"}
51
+ finally:
52
+ file.file.close()
53
+
54
+ file_name += file.filename
55
+ pdf_path = f"./{file_name}"
56
+ return pdf_path
57
+
58
+
59
+ @app.post("/quick_analysis")
60
+ async def quick_analysis(file: UploadFile = File(...)):
61
+ state_dict = {}
62
+ pdf_path = upload(file)
63
+
64
+ loader = PyPDFLoader(pdf_path)
65
+ pages = loader.load_and_split()
66
+
67
+ doc_info = ""
68
+ for page in range(len(pages)):
69
+ doc_info += pages[page].page_content
70
+
71
+ state_dict["pdf_doc"] = doc_info
72
+
73
+ chain = create_extraction_chain(
74
+ llm, schema_quick_analysis.schema, encoder_or_encoder_class="json")
75
+ doc_output = chain.invoke(doc_info)["data"]
76
+
77
+ state_dict["website_url"] = doc_output["startup_info"]["website_url"]
78
+
79
+ # Write JSON string to a file
80
+ with open('pdf_data.json', 'w') as json_file:
81
+ json.dump(state_dict, json_file)
82
+
83
+ return {"quick_analysis": doc_output}
84
+
85
+
86
+
87
+ @app.post("/indepth_analysis")
88
+ async def indepth_analysis():
89
+
90
+ scrape_parse_combine(openai_key)
91
+
92
+ # Load JSON data from a file
93
+ with open('pdf_data.json', 'r') as json_file:
94
+ data = json.load(json_file)
95
+
96
+ result = data["startup_info"]
97
+
98
+ chain = create_extraction_chain(
99
+ llm, schema_indepth_analysis.schema, encoder_or_encoder_class="json")
100
+
101
+ doc_output = chain.invoke(result)["data"]
102
+
103
+ return {"indepth_analysis": doc_output["startup_info"]}
104
+
schema_indepth_analysis.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from kor.nodes import Object, Text
2
+
3
+ team = Object(
4
+ id="team",
5
+ description="Information about the team",
6
+ attributes=[
7
+ Text(id="name", description="The name of the team member"),
8
+ Text(id="position", description="The position of the team member in the company")
9
+ ],
10
+
11
+ examples=[
12
+ (
13
+ "Diego Hoter Co-founder & CEO", # Text input
14
+ {
15
+ "name": "Diego Hoter",
16
+ "position": "Co-founder & CEO"
17
+ } # Dictionary with extracted attributes
18
+ )
19
+ # Add more examples in the same format if needed
20
+ ],
21
+ many=True
22
+ )
23
+
24
+ region = Object(
25
+ id="region",
26
+ description="Information about the regions where the startup are located",
27
+ attributes=[
28
+ Text(id="country", description="The country the startup is located"),
29
+ Text(id="city", description="The city the startup is located")
30
+ ],
31
+
32
+ examples=[
33
+ (
34
+ "We have a center in Sao Pablo, Brasil ", # Text input
35
+ {
36
+ "country": "Brasil",
37
+ "city": "Sao Pablo"
38
+ } # Dictionary with extracted attributes
39
+ )
40
+ # Add more examples in the same format if needed
41
+ ],
42
+ many=True
43
+ )
44
+
45
+ schema = Object(
46
+ id="startup_info",
47
+ description="Pitchdeck Information about a given startup.",
48
+ attributes=[
49
+ Text(
50
+ id="startup_overview",
51
+ description="A brief overview of the startup.",
52
+ examples=[("""
53
+ We verify sustainable agriculture at global scale! simple, scalable and auditable from farm to marketWork with ucrop.it to develop a program to incentivize farmers to adopt desired agricultural changes
54
+ Enabling farmers and AgFood companies to agree, trace, achieve & verify sustainability goals from Farm to market
55
+ The solution is to MRV* the use of land and the crops management for a net nature-positive impact
56
+ """,
57
+
58
+ "ucrop.it enables farmers and AgFood companies to trace, achieve, and verify sustainable agriculture practices from farm to market, ensuring nature-positive impacts and profitability through verified Crop Stories")],
59
+ ),
60
+ Text(
61
+ id="industry",
62
+ description="The industry or sector in which the startup operates.",
63
+ examples=[("We verify sustainable agriculture at global scale! simple, scalable and auditable from farm to market",
64
+ "Agriculture, AI, Data Analytics, Blockchain")],
65
+ ),
66
+ Text(
67
+ id="startup_name",
68
+ description="The Name of a startup",
69
+ examples=[
70
+ ("Work with ucrop.it to develop a program to incentivize farmers to adopt desired agricultural changes", "ucrop.it")],
71
+ ),
72
+ Text(
73
+ id="product_or_service",
74
+ description="The product or service the startup sells",
75
+ examples=[
76
+ ("They offer a SaaS platform for farmers", "SaaS platform")],
77
+ ),
78
+
79
+ team,
80
+
81
+
82
+ Text(
83
+ id="business_model",
84
+ description="The business model of the startup",
85
+ examples=[("""
86
+ 2024 (F) 2022 2023 Annual Recurring Revenues Gross Margin 9% 58% 74% 36% 50% 55% Churn rate <5% Average customer value$23K Revenues In Thousands B2B Customer contracts Our growth since 2020 has been exponential
87
+ """,
88
+ """
89
+ ucrop.it operates a B2B subscription-based model, charging for verified sustainable crop stories and land use assessments, with an average customer value of $23K and a gross margin of 55%.
90
+ """)],
91
+ ),
92
+
93
+ Text(
94
+ id="company_stage",
95
+ description="The current stage of the startup (e.g., early-stage, growth, established).",
96
+ examples=[
97
+ ("Business development in Australia targeting Asia Pacific Region preparing for Series B growth opportunity", "early-stage")],
98
+ ),
99
+
100
+ region,
101
+
102
+ Text(
103
+ id="traction ",
104
+ description="Key performance indicators or metrics that demonstrate the startup’s progress.",
105
+ examples=[("""
106
+ 2024 (F) 2022 2023 Annual Recurring Revenues Gross Margin 9% 58% 74% 36% 50% 55% Churn rate <5% Average customer value$23K Revenues In Thousands B2B Customer contracts Our growth since 2020 has been exponential
107
+ """,
108
+ """
109
+ Exponential growth since 2020, with $1.4M in annual recurring revenues in 2023, 70+ corporate customers, and operations in 10 countries.
110
+ """)],
111
+ ),
112
+
113
+ Text(
114
+ id="go_to_market_strategy",
115
+ description="The approach the startup uses to reach its target audience.",
116
+ examples=[("""
117
+ Market penetration Consolidate N. America operations. Co-Founder relocation (US) Business development leveraging commercial contracts in BrazilBusiness development in Australia targeting Asia Pacific Region preparing
118
+ for Series B growth opportunity Talent growth 15% ESOPS vesting equity rights for Validated talents Key talent acquisition Strategic Geographies Use of Funds Series A Ask""",
119
+ "The go-to-market strategy involves leveraging commercial contracts, co-founder relocation to the US, and business development in key regions.")],
120
+ ),
121
+ Text(
122
+ id="market_opportunity",
123
+ description="TThe market gap or need that the startup aims to address.",
124
+ examples=[("""
125
+ 2050 $ 77 Bn*on a 7~10 Tr market About us Ag transition: The MRV market value for the sustainability will 4x by 2050 2030 $ 20 Bn * MRV price rate is 1% of the Tr market value 4 x Market penetration Consolidate N. America
126
+ operations. Co-Founder relocation (US) Business development leveraging commercial contracts in BrazilBusiness development in Australia targeting Asia Pacific Region preparing for Series B growth opportunity
127
+ """,
128
+ "The MRV market for sustainability is projected to grow 4x by 2050, reaching $77 billion from $20 billion in 2030, with a focus on North America, Brazil, and the Asia Pacific region.")],
129
+ ),
130
+ Text(
131
+ id="financial_projections",
132
+ description="Projected financial performance (revenue, expenses, profits) over a specific period.",
133
+ examples=[("DeepAgro aims to achieve $1 million in annual revenue within the next three years.",
134
+ "$1 million, for the next 3 years")],
135
+ ),
136
+ Text(
137
+ id="Investment_Round",
138
+ description="The type and amount of funding the startup is seeking.",
139
+ examples=[("Use of Funds Series A AsK $6 M ",
140
+ "Serie A funding with an ask 6M")],
141
+ ),
142
+ Text(
143
+ id="value_proposition",
144
+ description="The unique value the startup offers to its customers.",
145
+ examples=[("""
146
+ The startup addresses the following Problem: Farmers and AgFood companies need to monitor, report, and verify sustainable agricultural practices to ensure compliance and add value to the supply chain.
147
+ ucrop.it provides a platform for creating and sharing traceable and verifiable Sustainable Crop Stories™ of land use and crop management practices.
148
+
149
+ """,
150
+ "The company's platform offers a unique value proposition by enabling farmers and AgFood companies to monitor, report, and verify sustainable agricultural practices. This could attract a large customer base.")],
151
+ ),
152
+
153
+ ],
154
+ )
schema_quick_analysis.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from kor.nodes import Object, Text
2
+
3
+ team = Object(
4
+ id="team",
5
+ description="Information about the team",
6
+ attributes=[
7
+ Text(id="name", description="The name of the team member"),
8
+ Text(id="position",
9
+ description="The position of the team member in the company")
10
+ ],
11
+
12
+ examples=[
13
+ (
14
+ "Diego Hoter Co-founder & CEO", # Text input
15
+ {
16
+ "name": "Diego Hoter",
17
+ "position": "Co-founder & CEO"
18
+ } # Dictionary with extracted attributes
19
+ )
20
+ # Add more examples in the same format if needed
21
+ ],
22
+ many=True
23
+ )
24
+
25
+ region = Object(
26
+ id="region",
27
+ description="Information about the regions where the startup are located",
28
+ attributes=[
29
+ Text(id="country",
30
+ description="The country the startup is located"),
31
+ Text(id="city", description="The city the startup is located")
32
+ ],
33
+
34
+ examples=[
35
+ (
36
+ "We have a center in Sao Pablo, Brasil ", # Text input
37
+ {
38
+ "country": "Brasil",
39
+ "city": "Sao Pablo"
40
+ } # Dictionary with extracted attributes
41
+ )
42
+ # Add more examples in the same format if needed
43
+ ],
44
+ many=True
45
+ )
46
+
47
+ schema = Object(
48
+ id="startup_info",
49
+ description="Pitchdeck Information about a given startup.",
50
+ attributes=[
51
+ Text(
52
+ id="startup_overview",
53
+ description="A brief overview of the startup.",
54
+ examples=[("""
55
+ We verify sustainable agriculture at global scale! simple, scalable and auditable from farm to marketWork with ucrop.it to develop a program to incentivize farmers to adopt desired agricultural changes
56
+ Enabling farmers and AgFood companies to agree, trace, achieve & verify sustainability goals from Farm to market
57
+ The solution is to MRV* the use of land and the crops management for a net nature-positive impact
58
+ """,
59
+
60
+ "ucrop.it enables farmers and AgFood companies to trace, achieve, and verify sustainable agriculture practices from farm to market, ensuring nature-positive impacts and profitability through verified Crop Stories")],
61
+ ),
62
+ Text(
63
+ id="industry",
64
+ description="The industry or sector in which the startup operates.",
65
+ examples=[("We verify sustainable agriculture at global scale! simple, scalable and auditable from farm to market",
66
+ "Agriculture, AI, Data Analytics, Blockchain")],
67
+ ),
68
+ Text(
69
+ id="startup_name",
70
+ description="The Name of a startup",
71
+ examples=[
72
+ ("Work with ucrop.it to develop a program to incentivize farmers to adopt desired agricultural changes", "ucrop.it")],
73
+ ),
74
+ Text(
75
+ id="product_or_service",
76
+ description="The product or service the startup sells",
77
+ examples=[
78
+ ("They offer a SaaS platform for farmers", "SaaS platform")],
79
+ ),
80
+
81
+ team,
82
+
83
+
84
+ Text(
85
+ id="business_model",
86
+ description="The business model of the startup",
87
+ examples=[("""
88
+ 2024 (F) 2022 2023 Annual Recurring Revenues Gross Margin 9% 58% 74% 36% 50% 55% Churn rate <5% Average customer value$23K Revenues In Thousands B2B Customer contracts Our growth since 2020 has been exponential
89
+ """,
90
+ """
91
+ ucrop.it operates a B2B subscription-based model, charging for verified sustainable crop stories and land use assessments, with an average customer value of $23K and a gross margin of 55%.
92
+ """)],
93
+ ),
94
+
95
+ Text(
96
+ id="company_stage",
97
+ description="The current stage of the startup (e.g., early-stage, growth, established).",
98
+ examples=[
99
+ ("Business development in Australia targeting Asia Pacific Region preparing for Series B growth opportunity", "early-stage")],
100
+ ),
101
+
102
+ Text(
103
+ id="website_url",
104
+ description="The website link of the startup.",
105
+ # examples=[
106
+ # ("A Brighter Future for Agriculture\nwww.deepagro.com2ARTIFICIAL INTELLIGENCE ", "https://www.deepagro.com/")],
107
+ ),
108
+ region,
109
+
110
+ Text(
111
+ id="traction ",
112
+ description="Key performance indicators or metrics that demonstrate the startup’s progress.",
113
+ examples=[("""
114
+ 2024 (F) 2022 2023 Annual Recurring Revenues Gross Margin 9% 58% 74% 36% 50% 55% Churn rate <5% Average customer value$23K Revenues In Thousands B2B Customer contracts Our growth since 2020 has been exponential
115
+ """,
116
+ """
117
+ Exponential growth since 2020, with $1.4M in annual recurring revenues in 2023, 70+ corporate customers, and operations in 10 countries.
118
+ """)],
119
+ ),
120
+
121
+ Text(
122
+ id="go_to_market_strategy",
123
+ description="The approach the startup uses to reach its target audience.",
124
+ examples=[("""
125
+ Market penetration Consolidate N. America operations. Co-Founder relocation (US) Business development leveraging commercial contracts in BrazilBusiness development in Australia targeting Asia Pacific Region preparing
126
+ for Series B growth opportunity Talent growth 15% ESOPS vesting equity rights for Validated talents Key talent acquisition Strategic Geographies Use of Funds Series A Ask""",
127
+ "The go-to-market strategy involves leveraging commercial contracts, co-founder relocation to the US, and business development in key regions.")],
128
+ ),
129
+ Text(
130
+ id="market_opportunity",
131
+ description="The market gap or need that the startup aims to address.",
132
+ examples=[("""
133
+ 2050 $ 77 Bn*on a 7~10 Tr market About us Ag transition: The MRV market value for the sustainability will 4x by 2050 2030 $ 20 Bn * MRV price rate is 1% of the Tr market value 4 x Market penetration Consolidate N. America
134
+ operations. Co-Founder relocation (US) Business development leveraging commercial contracts in BrazilBusiness development in Australia targeting Asia Pacific Region preparing for Series B growth opportunity
135
+ """,
136
+ "The MRV market for sustainability is projected to grow 4x by 2050, reaching $77 billion from $20 billion in 2030, with a focus on North America, Brazil, and the Asia Pacific region.")],
137
+ ),
138
+ Text(
139
+ id="financial_projections",
140
+ description="Projected financial performance (revenue, expenses, profits) over a specific period.",
141
+ examples=[
142
+ ("DeepAgro aims to achieve $1 million in annual revenue within the next three years.", "$1 million, for the next 3 years")],
143
+ ),
144
+ Text(
145
+ id="Investment_Round",
146
+ description="The type and amount of funding the startup is seeking.",
147
+ examples=[("Use of Funds Series A AsK $6 M ",
148
+ "Serie A funding with an ask 6M")],
149
+ ),
150
+ Text(
151
+ id="value_proposition",
152
+ description="The unique value the startup offers to its customers.",
153
+ examples=[("""
154
+ The startup addresses the following Problem: Farmers and AgFood companies need to monitor, report, and verify sustainable agricultural practices to ensure compliance and add value to the supply chain.
155
+ ucrop.it provides a platform for creating and sharing traceable and verifiable Sustainable Crop Stories™ of land use and crop management practices.
156
+
157
+ """,
158
+ "The company's platform offers a unique value proposition by enabling farmers and AgFood companies to monitor, report, and verify sustainable agricultural practices. This could attract a large customer base.")],
159
+ ),
160
+
161
+ ],
162
+ )
scrape_parse_combine.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from customgraph import get_data
2
+ import json
3
+
4
+
5
+ def scrape_parse_combine(openai_key):
6
+ # Load JSON data from a file
7
+ with open('pdf_data.json', 'r') as json_file:
8
+ data = json.load(json_file)
9
+
10
+
11
+ if "pdf_doc" in data.keys():
12
+ data["startup_info"] = get_data(data["pdf_doc"].encode("utf8").decode("utf8"),
13
+ "http://" + data["website_url"].encode("utf8").decode("utf8"),openai_key)
14
+
15
+ with open('pdf_data.json', 'w') as json_file:
16
+ json.dump(data, json_file)
17
+