Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| from langchain.chains import RetrievalQA | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.document_loaders import TextLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # Load a Hugging Face model for Q&A | |
| model_name = "HuggingFaceH4/zephyr-7b-beta" # You can choose a lighter model if needed | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=512) | |
| # Knowledge base for Crustdata APIs | |
| # Knowledge base for Crustdata APIs | |
| docs = """ | |
| # Crustdata Dataset API | |
| ## Description | |
| The Crustdata Dataset API provides access to a wide variety of datasets across different domains. It allows users to search, filter, and retrieve datasets based on categories, tags, and other metadata. | |
| ## Key Endpoints | |
| ### 1. **GET /datasets** | |
| - **Description**: Retrieves a list of available datasets. | |
| - **Parameters**: | |
| - `category` (optional): Filter datasets by a specific category. | |
| - `tags` (optional): Filter datasets by tags (comma-separated). | |
| - `limit` (optional): Maximum number of datasets to return (default: 10). | |
| - **Example Request**: | |
| ```bash | |
| curl -X GET "https://api.crustdata.com/datasets?category=finance&tags=economy,stocks&limit=5" | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "datasets": [ | |
| { | |
| "id": "12345", | |
| "name": "Global Finance Dataset", | |
| "category": "finance", | |
| "tags": ["economy", "stocks"] | |
| }, | |
| ... | |
| ] | |
| } | |
| ``` | |
| ### 2. **GET /datasets/{id}** | |
| - **Description**: Retrieves detailed information about a specific dataset. | |
| - **Parameters**: | |
| - `id` (required): The unique identifier of the dataset. | |
| - **Example Request**: | |
| ```bash | |
| curl -X GET "https://api.crustdata.com/datasets/12345" | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "id": "12345", | |
| "name": "Global Finance Dataset", | |
| "description": "A comprehensive dataset on global financial markets.", | |
| "category": "finance", | |
| "tags": ["economy", "stocks"], | |
| "source": "World Bank" | |
| } | |
| ``` | |
| --- | |
| # Crustdata Discovery and Enrichment API | |
| ## Description | |
| The Crustdata Discovery and Enrichment API allows users to enrich their datasets by adding metadata, geolocation information, and other relevant attributes. | |
| ## Key Endpoints | |
| ### 1. **POST /enrich** | |
| - **Description**: Enriches input data with additional metadata based on the specified enrichment type. | |
| - **Parameters**: | |
| - `input_data` (required): A list of data entries to be enriched. | |
| - `enrichment_type` (required): The type of enrichment to apply. Supported types: | |
| - `geolocation` | |
| - `demographics` | |
| - **Example Request**: | |
| ```bash | |
| curl -X POST "https://api.crustdata.com/enrich" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "input_data": [{"address": "123 Main St, Springfield"}], | |
| "enrichment_type": "geolocation" | |
| }' | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "enriched_data": [ | |
| { | |
| "address": "123 Main St, Springfield", | |
| "latitude": 37.12345, | |
| "longitude": -93.12345 | |
| } | |
| ] | |
| } | |
| ``` | |
| ### 2. **POST /search** | |
| - **Description**: Searches for relevant metadata or datasets based on user-provided criteria. | |
| - **Parameters**: | |
| - `query` (required): The search term or query string. | |
| - `filters` (optional): Additional filters to narrow down the search results. | |
| - **Example Request**: | |
| ```bash | |
| curl -X POST "https://api.crustdata.com/search" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "query": "energy consumption", | |
| "filters": {"category": "energy"} | |
| }' | |
| ``` | |
| - **Example Response**: | |
| ```json | |
| { | |
| "results": [ | |
| { | |
| "id": "67890", | |
| "name": "Energy Consumption Dataset", | |
| "category": "energy", | |
| "tags": ["consumption", "renewables"] | |
| } | |
| ] | |
| } | |
| ``` | |
| --- | |
| # General Notes | |
| - All endpoints require authentication using an API key. | |
| - API requests must include the `Authorization` header: | |
| ```plaintext | |
| Authorization: Bearer YOUR_API_KEY | |
| ``` | |
| - Response format: JSON | |
| - Base URL: `https://api.crustdata.com` | |
| """ | |
| # Split the documentation into chunks for embedding | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| doc_chunks = text_splitter.create_documents([docs]) | |
| # Embed the documents using sentence-transformers | |
| embedding_model = "sentence-transformers/all-MiniLM-L6-v2" | |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
| docsearch = FAISS.from_documents(doc_chunks, embeddings) | |
| # Create a QA chain | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=qa_pipeline, | |
| retriever=docsearch.as_retriever(), | |
| return_source_documents=True | |
| ) | |
| # Function to handle user queries | |
| def answer_question(question): | |
| result = qa_chain.run(question) | |
| return result | |
| # Create a Gradio interface | |
| chat_interface = gr.Interface( | |
| fn=answer_question, | |
| inputs=gr.Textbox(lines=2, placeholder="Ask a question about Crustdata APIs..."), | |
| outputs="text", | |
| title="Crustdata API Chat", | |
| description="Ask any technical questions about Crustdata’s Dataset and Discovery APIs.", | |
| ) | |
| # Launch the Gradio app | |
| chat_interface.launch(share=True) | |