gentrich commited on
Commit
3b60800
·
1 Parent(s): 874dcad

Add brochure code.

Browse files
Files changed (4) hide show
  1. .gitignore +184 -0
  2. app.py +63 -3
  3. requirements.txt +40 -0
  4. runnable_with_tools.py +55 -0
.gitignore ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Github's default gitignore for Python
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+ llms/
133
+ llms.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ .idea/
161
+
162
+ # Added this to ignore models downloaded from HF
163
+ model_cache/
164
+ # Ignore finder files
165
+ .DS_Store
166
+ /.DS_Store
167
+
168
+ # Ignore Chroma vector database
169
+ vector_db/
170
+ products_vectorstore/
171
+
172
+ # And ignore any pickle files made during the course
173
+ *.pkl
174
+
175
+ # ignore gradio private files
176
+ .gradio
177
+ /.gradio
178
+
179
+ # ignore diagnostics reports
180
+ **/report.txt
181
+
182
+ # ignore optimized C++ code from being checked into repo
183
+ week4/optimized
184
+ week4/simple
app.py CHANGED
@@ -1,7 +1,67 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  demo.launch()
 
1
  import gradio as gr
2
 
3
+ from typing import TypedDict, Annotated, Optional
 
4
 
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+ from dotenv import load_dotenv
8
+ from pydantic import BaseModel, Field
9
+
10
+ from runnable_with_tools import RunnableWithTools
11
+ from langchain_anthropic import ChatAnthropic
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain_core.tools import tool
14
+
15
+ class WebPage(BaseModel):
16
+ """ Information about a web page, including its title, content and useful links """
17
+ url: str = Field(description="URL of this web page")
18
+ title: Optional[str] = Field(default=None, description="The web page title")
19
+ content: Optional[str] = Field(default=None, description="Web page content")
20
+ links: list[str] = Field(default_factory=list, description="the useful links on this web page")
21
+
22
+ @tool
23
+ def get_web_page(url: Annotated[str, 'the url of the web page']) -> Annotated[WebPage, 'information on the web page']:
24
+ """ Retrieve information about a web page, including its title, content and useful links """
25
+ print(f'Retrieving {url}')
26
+ response = requests.get(url)
27
+ soup = BeautifulSoup(response.content, 'html.parser')
28
+ title = soup.title.string if soup.title else None
29
+ content = None
30
+ if soup.body:
31
+ for irrelevant in soup.body(["script", "style", "img", "input"]):
32
+ irrelevant.decompose()
33
+ content = soup.body.get_text(separator="\n", strip=True)
34
+ links = [link.get('href') for link in soup.find_all('a') if link.get('href') is not None]
35
+ return WebPage(url=url, title=title, content=content, links=links)
36
+
37
+ TOOLS = {
38
+ "get_web_page": get_web_page
39
+ }
40
+
41
+ def make_brochure(company_name: str, company_web_site: str) -> str:
42
+ system = """
43
+ You are an assistant that can crate company brochure from company's web site.
44
+ You can do this in a few steps:
45
+ 1. Retrieve web page information from the provide company web site url
46
+ 2. Find out the links that are useful for making company brochure
47
+ 3. Retrieve web page information from these links
48
+ 4. Create a company brochure from information on these wwn pages
49
+ """
50
+
51
+ user = """
52
+ Please create a company brochure for {company_name}. Its web site url is {company_web_site}
53
+ """
54
+
55
+ prompt_template = ChatPromptTemplate.from_messages([
56
+ ('system', system),
57
+ ('user', user)
58
+ ])
59
+ prompt = prompt_template.invoke({'company_name': company_name, 'company_web_site': company_web_site})
60
+ return model_with_tools.invoke(prompt.to_messages(), max_depth=20).content
61
+
62
+ load_dotenv()
63
+ model = ChatAnthropic(model="claude-3-5-haiku-20241022", temperature=0)
64
+ model_with_tools = RunnableWithTools(bound=model.bind_tools([get_web_page]), tools=TOOLS)
65
+
66
+ demo = gr.Interface(fn=make_brochure, inputs=["text", "text"], outputs="markdown", flagging_mode="never")
67
  demo.launch()
requirements.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ jupyterlab
3
+ ipywidgets
4
+ requests
5
+ numpy
6
+ pandas
7
+ scipy
8
+ scikit-learn
9
+ matplotlib
10
+ gensim
11
+ transformers
12
+ tqdm
13
+ openai
14
+ gradio
15
+ langchain
16
+ tiktoken
17
+ faiss-cpu
18
+ langchain-openai
19
+ langchain-anthropic
20
+ langchain_chroma
21
+ langchain[docarray]
22
+ datasets
23
+ sentencepiece
24
+ matplotlib
25
+ google-generativeai
26
+ anthropic
27
+ scikit-learn
28
+ chromadb
29
+ plotly
30
+ jupyter-dash
31
+ beautifulsoup4
32
+ pydub
33
+ modal
34
+ ollama
35
+ accelerate
36
+ sentencepiece
37
+ bitsandbytes
38
+ psutil
39
+ setuptools
40
+ speedtest-cli
runnable_with_tools.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Any, Callable
2
+
3
+ from langchain_community.vectorstores.pgvector import BaseModel
4
+ from pydantic import ConfigDict
5
+
6
+ from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
7
+ from langchain_core.runnables import RunnableSerializable, RunnableConfig, Runnable
8
+ from langchain_core.runnables.utils import Input, Output
9
+ from langchain_core.tools import BaseTool
10
+
11
+ class RunnableWithTools(RunnableSerializable[Input, Output]):
12
+ bound: Runnable[Input, Output]
13
+ tools: dict[str, BaseTool]
14
+
15
+ model_config = ConfigDict(
16
+ arbitrary_types_allowed=True,
17
+ )
18
+
19
+ def invoke(
20
+ self,
21
+ input: Input,
22
+ config: Optional[RunnableConfig] = None,
23
+ max_depth: Optional[int] = 3,
24
+ **kwargs: Any
25
+ ) -> Output:
26
+ depth = 0
27
+ message = None
28
+ while depth < max_depth:
29
+ message = self.bound.invoke(input)
30
+ if isinstance(message, AIMessage) and message.tool_calls and self.tools:
31
+ text = ''
32
+ if isinstance(message.content, list) and 'text' in message.content[0]:
33
+ text += message.content[0]['text']
34
+ elif isinstance(message.content, str):
35
+ text += message.content
36
+ # input.append(AIMessage(content=text, **message.additional_kwargs))
37
+ input.append(message)
38
+
39
+ text = ''
40
+ for tool_call in message.tool_calls:
41
+ selected_tool = self.tools[tool_call["name"].lower()]
42
+ if selected_tool:
43
+ tool_msg = selected_tool.invoke(tool_call)
44
+ if isinstance(tool_msg, BaseModel):
45
+ tool_msg = tool_msg.model_dump()
46
+ else:
47
+ tool_msg = str(tool_msg)
48
+ text += '\n' + tool_msg
49
+ input.append(ToolMessage(tool_call_id=tool_call['id'], content=text))
50
+ depth += 1
51
+ else:
52
+ break
53
+
54
+ return message
55
+