Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import TextLoader | |
| # Initialize the Hugging Face Inference client with an open-source LLM | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # You can use any supported model | |
| # Sample knowledge base for Crustdata APIs | |
| docs = """ | |
| # Crustdata Dataset API | |
| ## Description | |
| The Crustdata Dataset API provides access to a wide variety of datasets across different domains. It allows users to search, filter, and retrieve datasets based on categories, tags, and other metadata. | |
| ## Key Endpoints | |
| ### 1. **GET /datasets** | |
| - **Description**: Retrieves a list of available datasets. | |
| - **Parameters**: | |
| - `category` (optional): Filter datasets by a specific category. | |
| - `tags` (optional): Filter datasets by tags (comma-separated). | |
| - `limit` (optional): Maximum number of datasets to return (default: 10). | |
| - **Example Request**: | |
| ```bash | |
| curl -X GET "https://api.crustdata.com/datasets?category=finance&tags=economy,stocks&limit=5" | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "datasets": [ | |
| { | |
| "id": "12345", | |
| "name": "Global Finance Dataset", | |
| "category": "finance", | |
| "tags": ["economy", "stocks"] | |
| }, | |
| ... | |
| ] | |
| } | |
| ``` | |
| ### 2. **GET /datasets/{id}** | |
| - **Description**: Retrieves detailed information about a specific dataset. | |
| - **Parameters**: | |
| - `id` (required): The unique identifier of the dataset. | |
| - **Example Request**: | |
| ```bash | |
| curl -X GET "https://api.crustdata.com/datasets/12345" | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "id": "12345", | |
| "name": "Global Finance Dataset", | |
| "description": "A comprehensive dataset on global financial markets.", | |
| "category": "finance", | |
| "tags": ["economy", "stocks"], | |
| "source": "World Bank" | |
| } | |
| ``` | |
| --- | |
| # Crustdata Discovery and Enrichment API | |
| ## Description | |
| The Crustdata Discovery and Enrichment API allows users to enrich their datasets by adding metadata, geolocation information, and other relevant attributes. | |
| ## Key Endpoints | |
| ### 1. **POST /enrich** | |
| - **Description**: Enriches input data with additional metadata based on the specified enrichment type. | |
| - **Parameters**: | |
| - `input_data` (required): A list of data entries to be enriched. | |
| - `enrichment_type` (required): The type of enrichment to apply. Supported types: | |
| - `geolocation` | |
| - `demographics` | |
| - **Example Request**: | |
| ```bash | |
| curl -X POST "https://api.crustdata.com/enrich" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "input_data": [{"address": "123 Main St, Springfield"}], | |
| "enrichment_type": "geolocation" | |
| }' | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "enriched_data": [ | |
| { | |
| "address": "123 Main St, Springfield", | |
| "latitude": 37.12345, | |
| "longitude": -93.12345 | |
| } | |
| ] | |
| } | |
| ``` | |
| ### 2. **POST /search** | |
| - **Description**: Searches for relevant metadata or datasets based on user-provided criteria. | |
| - **Parameters**: | |
| - `query` (required): The search term or query string. | |
| - `filters` (optional): Additional filters to narrow down the search results. | |
| - **Example Request**: | |
| ```bash | |
| curl -X POST "https://api.crustdata.com/search" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "query": "energy consumption", | |
| "filters": {"category": "energy"} | |
| }' | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "results": [ | |
| { | |
| "id": "67890", | |
| "name": "Energy Consumption Dataset", | |
| "category": "energy", | |
| "tags": ["consumption", "renewables"] | |
| } | |
| ] | |
| } | |
| ``` | |
| --- | |
| # General Notes | |
| - All endpoints require authentication using an API key. | |
| - API requests must include the `Authorization` header: | |
| ```plaintext | |
| Authorization: Bearer YOUR_API_KEY | |
| ``` | |
| - Response format: JSON | |
| - Base URL: `https://api.crustdata.com` | |
| """ | |
| # Split the documentation into smaller chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| doc_chunks = text_splitter.create_documents([docs]) | |
| # Create embeddings and initialize FAISS vector store | |
| embedding_model = "sentence-transformers/all-MiniLM-L6-v2" | |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
| docsearch = FAISS.from_documents(doc_chunks, embeddings) | |
| def retrieve_context(query): | |
| """Retrieve the most relevant context from the knowledge base.""" | |
| results = docsearch.similarity_search(query, k=2) # Retrieve top 2 most similar chunks | |
| context = "\n".join([res.page_content for res in results]) | |
| return context | |
| def respond( | |
| message, | |
| history: list[tuple[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| """Generate a response using the Hugging Face Inference API.""" | |
| # Retrieve relevant context from the knowledge base | |
| context = retrieve_context(message) | |
| prompt = f"{system_message}\n\nContext:\n{context}\n\nUser: {message}\nAssistant:" | |
| messages = [{"role": "system", "content": system_message}] | |
| for val in history: | |
| if val[0]: | |
| messages.append({"role": "user", "content": val[0]}) | |
| if val[1]: | |
| messages.append({"role": "assistant", "content": val[1]}) | |
| messages.append({"role": "user", "content": prompt}) | |
| response = "" | |
| for message in client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = message.choices[0].delta.content | |
| response += token | |
| yield response | |
| # Gradio interface | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a technical assistant for Crustdata APIs.", label="System message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
| ], | |
| title="Crustdata API Chatbot", | |
| description="Ask any technical questions about Crustdata’s Dataset and Discovery APIs.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |