dylanhogg commited on
Commit
44c8200
·
1 Parent(s): 671279a

Add address parser app

Browse files
Files changed (6) hide show
  1. .gitignore +173 -0
  2. Makefile +33 -0
  3. address_parser.py +87 -0
  4. requirements-dev.txt +4 -0
  5. requirements.txt +4 -0
  6. requirements_freeze.txt +75 -0
.gitignore ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data/
2
+ data*/
3
+ _data/
4
+ _data*/
5
+ .joblib_cache/
6
+ .joblib_cache*/
7
+
8
+ .idea/
9
+ .vscode/
10
+ log/
11
+ *.log
12
+ .DS_Store
13
+
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # PyInstaller
43
+ # Usually these files are written by a python script from a template
44
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
45
+ *.manifest
46
+ *.spec
47
+
48
+ # Installer logs
49
+ pip-log.txt
50
+ pip-delete-this-directory.txt
51
+
52
+ # Unit test / coverage reports
53
+ htmlcov/
54
+ .tox/
55
+ .nox/
56
+ .coverage
57
+ .coverage.*
58
+ .cache
59
+ nosetests.xml
60
+ coverage.xml
61
+ *.cover
62
+ *.py,cover
63
+ .hypothesis/
64
+ .pytest_cache/
65
+ cover/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+ db.sqlite3-journal
76
+
77
+ # Flask stuff:
78
+ instance/
79
+ .webassets-cache
80
+
81
+ # Scrapy stuff:
82
+ .scrapy
83
+
84
+ # Sphinx documentation
85
+ docs/_build/
86
+
87
+ # PyBuilder
88
+ .pybuilder/
89
+ target/
90
+
91
+ # Jupyter Notebook
92
+ .ipynb_checkpoints
93
+
94
+ # IPython
95
+ profile_default/
96
+ ipython_config.py
97
+
98
+ # pyenv
99
+ # For a library or package, you might want to ignore these files since the code is
100
+ # intended to run in multiple environments; otherwise, check them in:
101
+ # .python-version
102
+
103
+ # pipenv
104
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
106
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
107
+ # install all needed dependencies.
108
+ #Pipfile.lock
109
+
110
+ # poetry
111
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
115
+ #poetry.lock
116
+
117
+ # pdm
118
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
119
+ #pdm.lock
120
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
121
+ # in version control.
122
+ # https://pdm.fming.dev/#use-with-ide
123
+ .pdm.toml
124
+
125
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
126
+ __pypackages__/
127
+
128
+ # Celery stuff
129
+ celerybeat-schedule
130
+ celerybeat.pid
131
+
132
+ # SageMath parsed files
133
+ *.sage.py
134
+
135
+ # Environments
136
+ .env
137
+ .venv
138
+ env/
139
+ venv/
140
+ ENV/
141
+ env.bak/
142
+ venv.bak/
143
+
144
+ # Spyder project settings
145
+ .spyderproject
146
+ .spyproject
147
+
148
+ # Rope project settings
149
+ .ropeproject
150
+
151
+ # mkdocs documentation
152
+ /site
153
+
154
+ # mypy
155
+ .mypy_cache/
156
+ .dmypy.json
157
+ dmypy.json
158
+
159
+ # Pyre type checker
160
+ .pyre/
161
+
162
+ # pytype static type analyzer
163
+ .pytype/
164
+
165
+ # Cython debug symbols
166
+ cython_debug/
167
+
168
+ # PyCharm
169
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
170
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
171
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
172
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
173
+ #.idea/
Makefile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ venv:
2
+ python3 -m venv .venv
3
+ source .venv/bin/activate ; pip install --upgrade pip ; python3 -m pip install -r requirements-dev.txt
4
+ source .venv/bin/activate ; pip freeze > requirements_freeze.txt
5
+
6
+ which-python:
7
+ source .venv/bin/activate ; which python
8
+
9
+ clean:
10
+ rm -rf .venv
11
+
12
+ run:
13
+ source .venv/bin/activate ; python address_parser.py
14
+
15
+ black-check:
16
+ source .venv/bin/activate ; black . --check --verbose --line-length 120
17
+
18
+ black:
19
+ source .venv/bin/activate ; black . --line-length 120
20
+
21
+ ruff-check:
22
+ source .venv/bin/activate ; ruff check .
23
+
24
+ ruff:
25
+ source .venv/bin/activate ; ruff check . --fix
26
+
27
+ test:
28
+ source .venv/bin/activate ; PYTHONPATH='./src' pytest -vv --capture=no tests
29
+
30
+ .DEFAULT_GOAL := help
31
+ .PHONY: help
32
+ help:
33
+ @LC_ALL=C $(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$'
address_parser.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+ import json
5
+
6
+ # Initialize model pipeline
7
+ model_id = "dylanhogg/gnaf-structured-address-v0.1-75a1791-20250921-063650"
8
+
9
+ pipe = pipeline(
10
+ "text-generation",
11
+ model=model_id,
12
+ dtype=torch.bfloat16,
13
+ device_map="auto",
14
+ )
15
+
16
+
17
+ def parse_address(user_address: str) -> tuple[str, str]:
18
+ """Parse address and return both raw response and JSON"""
19
+ user_content = f"Translate a text address into structured json.\n{user_address}"
20
+ messages = [
21
+ {"role": "system", "content": "Translate a text address into structured json."},
22
+ {"role": "user", "content": user_content},
23
+ ]
24
+
25
+ outputs = pipe(
26
+ messages,
27
+ max_new_tokens=256,
28
+ do_sample=False,
29
+ )
30
+
31
+ response = outputs[0]["generated_text"]
32
+ last_content = response[-1]["content"]
33
+
34
+ # Try to extract JSON from the response
35
+ try:
36
+ # Find first '{' and last '}' to extract JSON
37
+ start = last_content.find("{")
38
+ end = last_content.rfind("}") + 1
39
+ if start != -1 and end > start:
40
+ json_str = last_content[start:end]
41
+ parsed = json.loads(json_str)
42
+ formatted_json = json.dumps(parsed, indent=2)
43
+ else:
44
+ formatted_json = "No JSON found in response"
45
+ except Exception as e:
46
+ formatted_json = f"Error parsing JSON: {str(e)}\n\nRaw output:\n{last_content}"
47
+
48
+ return formatted_json, last_content
49
+
50
+
51
+ # Create Gradio interface
52
+ with gr.Blocks(title="Address Parser") as demo:
53
+ gr.Markdown("# 🏠 Structured Address Parser")
54
+ gr.Markdown("This model converts text addresses into structured JSON format.")
55
+
56
+ with gr.Row():
57
+ with gr.Column():
58
+ input_text = gr.Textbox(
59
+ label="Input Address", placeholder="Enter an address...", value="48a Pirrama Rd Pyrmont NSW 2009"
60
+ )
61
+ submit_btn = gr.Button("Parse Address", variant="primary")
62
+
63
+ with gr.Column():
64
+ json_output = gr.Textbox(label="Structured JSON", interactive=False, lines=10)
65
+ raw_output = gr.Textbox(label="Raw Model Output", interactive=False, lines=5)
66
+
67
+ # Examples
68
+ gr.Examples(
69
+ examples=[
70
+ "48a Pirrama Rd Pyrmont NSW 2009",
71
+ "Floor 3, 152-156 Clarence St, Sydney NSW 2000",
72
+ "Aptt 16, 400 Bondi Rd, Bondi NSW 2026",
73
+ "Unit 18/14-18 Flood St, Bondi, NSW 2026",
74
+ "Lvl 15/333 George St Sydney NSW 2000",
75
+ "Check out: 44 Ulm St, Maroubra NSW 2035",
76
+ "44 Ulm St, Maroubra NSW 2035 is where it's at!",
77
+ ],
78
+ inputs=input_text,
79
+ )
80
+
81
+ # Handle events
82
+ submit_btn.click(fn=parse_address, inputs=input_text, outputs=[json_output, raw_output])
83
+ input_text.submit(fn=parse_address, inputs=input_text, outputs=[json_output, raw_output])
84
+
85
+ # Launch the app
86
+ if __name__ == "__main__":
87
+ demo.launch()
requirements-dev.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ -r requirements.txt
2
+ pytest
3
+ black
4
+ ruff
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ accelerate
requirements_freeze.txt ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.10.1
2
+ aiofiles==24.1.0
3
+ annotated-types==0.7.0
4
+ anyio==4.11.0
5
+ audioop-lts==0.2.2
6
+ black==25.9.0
7
+ Brotli==1.1.0
8
+ certifi==2025.10.5
9
+ charset-normalizer==3.4.4
10
+ click==8.3.0
11
+ fastapi==0.119.0
12
+ ffmpy==0.6.3
13
+ filelock==3.20.0
14
+ fsspec==2025.9.0
15
+ gradio==5.49.1
16
+ gradio_client==1.13.3
17
+ groovy==0.1.2
18
+ h11==0.16.0
19
+ hf-xet==1.1.10
20
+ httpcore==1.0.9
21
+ httpx==0.28.1
22
+ huggingface-hub==0.35.3
23
+ idna==3.11
24
+ iniconfig==2.1.0
25
+ Jinja2==3.1.6
26
+ markdown-it-py==4.0.0
27
+ MarkupSafe==3.0.3
28
+ mdurl==0.1.2
29
+ mpmath==1.3.0
30
+ mypy_extensions==1.1.0
31
+ networkx==3.5
32
+ numpy==2.3.4
33
+ orjson==3.11.3
34
+ packaging==25.0
35
+ pandas==2.3.3
36
+ pathspec==0.12.1
37
+ pillow==11.3.0
38
+ platformdirs==4.5.0
39
+ pluggy==1.6.0
40
+ psutil==7.1.0
41
+ pydantic==2.11.10
42
+ pydantic_core==2.33.2
43
+ pydub==0.25.1
44
+ Pygments==2.19.2
45
+ pytest==8.4.2
46
+ python-dateutil==2.9.0.post0
47
+ python-multipart==0.0.20
48
+ pytokens==0.2.0
49
+ pytz==2025.2
50
+ PyYAML==6.0.3
51
+ regex==2025.9.18
52
+ requests==2.32.5
53
+ rich==14.2.0
54
+ ruff==0.14.1
55
+ safehttpx==0.1.6
56
+ safetensors==0.6.2
57
+ semantic-version==2.10.0
58
+ setuptools==80.9.0
59
+ shellingham==1.5.4
60
+ six==1.17.0
61
+ sniffio==1.3.1
62
+ starlette==0.48.0
63
+ sympy==1.14.0
64
+ tokenizers==0.22.1
65
+ tomlkit==0.13.3
66
+ torch==2.9.0
67
+ tqdm==4.67.1
68
+ transformers==4.57.1
69
+ typer==0.19.2
70
+ typing-inspection==0.4.2
71
+ typing_extensions==4.15.0
72
+ tzdata==2025.2
73
+ urllib3==2.5.0
74
+ uvicorn==0.37.0
75
+ websockets==15.0.1