hugobowne commited on
Commit
9ca6b58
·
verified ·
1 Parent(s): 672451d

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +46 -6
  2. app.py +240 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,52 @@
1
  ---
2
- title: Llmops Database Mcp
3
- emoji: 🐠
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.1.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: LLMOps Database MCP Server
3
+ emoji: 🔍
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: "5.0.0"
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
+ tags:
12
+ - mcp-server
13
+ - llmops
14
+ - datasets
15
  ---
16
 
17
+ # LLMOps Database MCP Server
18
+
19
+ An MCP server for querying the [ZenML LLMOps Database](https://huggingface.co/datasets/zenml/llmops-database) - a collection of 1,100+ real-world LLMOps case studies.
20
+
21
+ ## Tools
22
+
23
+ - **search** - Search with optional filters (query, industry, company, year, tag)
24
+ - **get_case_study_details** - Get full details of a case study
25
+ - **get_statistics** - Database statistics
26
+ - **list_options** - Available filter values
27
+
28
+ ## Use as MCP Server
29
+
30
+ Add to your MCP client (Cursor, Claude Desktop, etc.):
31
+
32
+ ```json
33
+ {
34
+ "mcpServers": {
35
+ "llmops-database": {
36
+ "command": "npx",
37
+ "args": [
38
+ "mcp-remote",
39
+ "https://hugobowne-llmops-database-mcp.hf.space/gradio_api/mcp/"
40
+ ]
41
+ }
42
+ }
43
+ }
44
+ ```
45
+
46
+ ## Dataset
47
+
48
+ The [ZenML LLMOps Database](https://huggingface.co/datasets/zenml/llmops-database) contains:
49
+ - 1,100+ case studies of real-world LLM deployments
50
+ - Metadata: company, industry, year, source URL
51
+ - Tags: tools, techniques, applications
52
+ - Short and full summaries
app.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio MCP Server for querying the ZenML LLMOps Database.
3
+
4
+ Exposes 4 tools:
5
+ 1. search - flexible search with optional filters
6
+ 2. get_case_study_details - get full details of a case study
7
+ 3. get_statistics - database statistics
8
+ 4. list_options - available industries, companies, years
9
+ """
10
+
11
+ import gradio as gr
12
+ from datasets import load_dataset
13
+ import pandas as pd
14
+
15
+ # Load the dataset once at startup
16
+ print("Loading ZenML LLMOps Database...")
17
+ ds = load_dataset("zenml/llmops-database", split="train")
18
+ df = ds.to_pandas()
19
+ print(f"Loaded {len(df)} case studies")
20
+
21
+
22
+ def search(
23
+ query: str = None,
24
+ industry: str = None,
25
+ company: str = None,
26
+ year: int = None,
27
+ tag: str = None,
28
+ limit: int = 20
29
+ ) -> str:
30
+ """
31
+ Search the LLMOps database with optional filters. All parameters can be combined.
32
+
33
+ Args:
34
+ query: Text to search for in titles and summaries (e.g., 'RAG', 'fine-tuning', 'agents')
35
+ industry: Filter by industry (e.g., 'Tech', 'Finance', 'Healthcare')
36
+ company: Filter by company (e.g., 'meta', 'google', 'openai')
37
+ year: Filter by year (e.g., 2023, 2024)
38
+ tag: Filter by tag in any tag field (e.g., 'pytorch', 'monitoring', 'rag')
39
+ limit: Maximum results to return (default 20)
40
+
41
+ Returns:
42
+ Matching case studies with title, company, industry, year, and summary
43
+ """
44
+ # Start with all rows
45
+ mask = pd.Series([True] * len(df))
46
+
47
+ # Apply text search if provided
48
+ if query and query.strip():
49
+ query_lower = query.lower()
50
+ text_mask = (
51
+ df["title"].str.lower().str.contains(query_lower, na=False) |
52
+ df["short_summary"].str.lower().str.contains(query_lower, na=False) |
53
+ df["full_summary"].str.lower().str.contains(query_lower, na=False)
54
+ )
55
+ mask = mask & text_mask
56
+
57
+ # Apply industry filter if provided
58
+ if industry and industry.strip():
59
+ mask = mask & df["industry"].str.lower().str.contains(industry.lower(), na=False)
60
+
61
+ # Apply company filter if provided
62
+ if company and company.strip():
63
+ mask = mask & df["company"].str.lower().str.contains(company.lower(), na=False)
64
+
65
+ # Apply year filter if provided
66
+ if year:
67
+ mask = mask & (df["year"] == year)
68
+
69
+ # Apply tag filter if provided
70
+ if tag and tag.strip():
71
+ tag_lower = tag.lower()
72
+ tag_mask = (
73
+ df["tools_tags"].str.lower().str.contains(tag_lower, na=False) |
74
+ df["techniques_tags"].str.lower().str.contains(tag_lower, na=False) |
75
+ df["application_tags"].str.lower().str.contains(tag_lower, na=False) |
76
+ df["extra_tags"].str.lower().str.contains(tag_lower, na=False)
77
+ )
78
+ mask = mask & tag_mask
79
+
80
+ results = df[mask].head(limit)
81
+
82
+ if len(results) == 0:
83
+ filters = []
84
+ if query: filters.append(f"query='{query}'")
85
+ if industry: filters.append(f"industry='{industry}'")
86
+ if company: filters.append(f"company='{company}'")
87
+ if year: filters.append(f"year={year}")
88
+ if tag: filters.append(f"tag='{tag}'")
89
+ return f"No case studies found with filters: {', '.join(filters) if filters else 'none'}"
90
+
91
+ output = f"Found {len(results)} case studies:\n\n"
92
+ for _, row in results.iterrows():
93
+ output += f"## {row['title']}\n"
94
+ output += f"**Company:** {row['company']} | **Industry:** {row['industry']} | **Year:** {row['year']}\n"
95
+ output += f"**Tags:** {row['application_tags']}\n"
96
+ output += f"**Summary:** {row['short_summary']}\n"
97
+ output += f"**Source:** {row['source_url']}\n\n"
98
+ output += "---\n\n"
99
+
100
+ return output
101
+
102
+
103
+ def get_case_study_details(title: str) -> str:
104
+ """
105
+ Get the full details of a specific case study by title.
106
+
107
+ Args:
108
+ title: The title (or part of the title) of the case study to retrieve
109
+
110
+ Returns:
111
+ Complete case study including full summary, all tags, and source URL
112
+ """
113
+ if not title or not title.strip():
114
+ return "Please provide a title to search for"
115
+
116
+ mask = df["title"].str.lower().str.contains(title.lower(), na=False)
117
+ results = df[mask]
118
+
119
+ if len(results) == 0:
120
+ return f"No case study found with title containing '{title}'"
121
+
122
+ row = results.iloc[0]
123
+
124
+ output = f"# {row['title']}\n\n"
125
+ output += f"**Company:** {row['company']}\n"
126
+ output += f"**Industry:** {row['industry']}\n"
127
+ output += f"**Year:** {row['year']}\n"
128
+ output += f"**Source:** {row['source_url']}\n\n"
129
+ output += f"## Tags\n"
130
+ output += f"- **Application:** {row['application_tags']}\n"
131
+ output += f"- **Tools:** {row['tools_tags']}\n"
132
+ output += f"- **Techniques:** {row['techniques_tags']}\n"
133
+ output += f"- **Extra:** {row['extra_tags']}\n\n"
134
+ output += f"## Full Summary\n\n{row['full_summary']}\n"
135
+
136
+ return output
137
+
138
+
139
+ def get_statistics() -> str:
140
+ """
141
+ Get statistics about the LLMOps Database.
142
+
143
+ Returns:
144
+ Summary statistics including total count, breakdown by industry, year, and top companies
145
+ """
146
+ output = "# LLMOps Database Statistics\n\n"
147
+ output += f"**Total case studies:** {len(df)}\n\n"
148
+
149
+ output += "## By Industry\n"
150
+ industry_counts = df["industry"].value_counts()
151
+ for industry, count in industry_counts.items():
152
+ output += f"- {industry}: {count}\n"
153
+
154
+ output += "\n## By Year\n"
155
+ year_counts = df["year"].value_counts().sort_index()
156
+ for year, count in year_counts.items():
157
+ output += f"- {int(year)}: {count}\n"
158
+
159
+ output += "\n## Top 15 Companies\n"
160
+ company_counts = df["company"].value_counts().head(15)
161
+ for company, count in company_counts.items():
162
+ output += f"- {company}: {count}\n"
163
+
164
+ return output
165
+
166
+
167
+ def list_options() -> str:
168
+ """
169
+ List available filter options (industries, top companies, years).
170
+ Use this to know what values you can filter by in the search function.
171
+
172
+ Returns:
173
+ Lists of available industries, companies, and years
174
+ """
175
+ output = "# Available Filter Options\n\n"
176
+
177
+ output += "## Industries\n"
178
+ for industry in df["industry"].dropna().unique():
179
+ output += f"- {industry}\n"
180
+
181
+ output += "\n## Years\n"
182
+ for year in sorted(df["year"].dropna().unique()):
183
+ output += f"- {int(year)}\n"
184
+
185
+ output += "\n## Top 30 Companies\n"
186
+ for company in df["company"].value_counts().head(30).index:
187
+ output += f"- {company}\n"
188
+
189
+ return output
190
+
191
+
192
+ # Create the Gradio interface
193
+ with gr.Blocks(title="LLMOps Database MCP Server") as demo:
194
+ gr.Markdown("""
195
+ # 🔍 ZenML LLMOps Database MCP Server
196
+
197
+ Query the [ZenML LLMOps Database](https://huggingface.co/datasets/zenml/llmops-database) -
198
+ a collection of 1,100+ real-world LLMOps case studies.
199
+
200
+ **This app is an MCP server** - add it to your AI assistant (like Cursor) to query the database!
201
+ """)
202
+
203
+ with gr.Tab("Search"):
204
+ gr.Markdown("### Search with optional filters (all can be combined)")
205
+ with gr.Row():
206
+ query_input = gr.Textbox(label="Text Search", placeholder="e.g., RAG, fine-tuning, agents")
207
+ industry_input = gr.Textbox(label="Industry", placeholder="e.g., Tech, Finance, Healthcare")
208
+ with gr.Row():
209
+ company_input = gr.Textbox(label="Company", placeholder="e.g., meta, google, openai")
210
+ year_input = gr.Number(label="Year", value=None)
211
+ with gr.Row():
212
+ tag_input = gr.Textbox(label="Tag", placeholder="e.g., pytorch, monitoring, rag")
213
+ limit_input = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Max Results")
214
+ search_btn = gr.Button("Search")
215
+ search_output = gr.Markdown()
216
+ search_btn.click(
217
+ search,
218
+ inputs=[query_input, industry_input, company_input, year_input, tag_input, limit_input],
219
+ outputs=search_output
220
+ )
221
+
222
+ with gr.Tab("Details"):
223
+ title_input = gr.Textbox(label="Case Study Title", placeholder="Enter part of the title")
224
+ details_btn = gr.Button("Get Details")
225
+ details_output = gr.Markdown()
226
+ details_btn.click(get_case_study_details, inputs=[title_input], outputs=details_output)
227
+
228
+ with gr.Tab("Statistics"):
229
+ stats_btn = gr.Button("Get Statistics")
230
+ stats_output = gr.Markdown()
231
+ stats_btn.click(get_statistics, outputs=stats_output)
232
+
233
+ gr.Markdown("---")
234
+ options_btn = gr.Button("List Filter Options")
235
+ options_output = gr.Markdown()
236
+ options_btn.click(list_options, outputs=options_output)
237
+
238
+ # Launch with MCP server enabled
239
+ if __name__ == "__main__":
240
+ demo.launch(mcp_server=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio[mcp]>=5.0.0
2
+ datasets>=4.4.1
3
+ pandas