Spaces:

muhammadnoman76
/

cortex

Sleeping

App Files Files Community

muhammadnoman76 commited on Apr 30, 2025

Commit

f98f8ce

1 Parent(s): 8664e1a

update

Browse files

Files changed (5) hide show

Dockerfile +33 -0
README.md +11 -3
app.py +80 -0
packages.txt +5 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.10-slim
+WORKDIR /code
+# Copy packages.txt and install system dependencies
+COPY packages.txt /root/packages.txt
+RUN apt-get update && \
+    xargs -r -a /root/packages.txt apt-get install -y && \
+    rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Install llama-cpp-python separately to handle potential issues
+RUN pip install --no-cache-dir llama-cpp-python
+# Set Hugging Face cache directory to a writable location
+ENV HF_HOME=/code/.cache/huggingface
+RUN mkdir -p /code/.cache/huggingface && \
+    chmod -R 777 /code/.cache
+# Copy application code
+COPY . .
+# Ensure correct permissions for the working directory
+RUN chmod -R 777 /code
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,19 @@
 ---
 title: Cortex
-emoji: 📈
-colorFrom: yellow
 colorTo: gray
 sdk: docker
 pinned: false
-license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Cortex
+emoji: 🐢
+colorFrom: indigo
 colorTo: gray
 sdk: docker
 pinned: false
+license: afl-3.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# LLM Streaming API
+This Space provides a FastAPI application that streams responses from the Cortex LLM model.
+- Visit `/ui` for a simple interface to test the model
+- Send POST requests to `/generate` with JSON body containing `task_description`, `max_tokens` (optional), and `temperature` (optional)

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import asyncio
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Download the GGUF file
+model_id = "muhammadnoman76/cortex_q4"
+gguf_filename = "unsloth.Q4_K_M.gguf"  # Replace with the correct filename
+model_path = hf_hub_download(
+    repo_id=model_id,
+    filename=gguf_filename,
+    local_dir=".",
+    local_dir_use_symlinks=False
+)
+alpaca_prompt = """
+Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+You are an intelligent agent that analyzes user requests and breaks them down into structured components. Your task is to:
+1. Identify the specific actions needed to complete the request
+2. Determine which intent-based tools would be appropriate (selecting only from the available intent list)
+3. Provide brief justifications for why each intent is relevant
+4. Define the high-level goals the request aims to accomplish
+5. Generate a concise instruction prompt summarizing how to fulfill the request
+Available intents = ["schedule", "email", "sms", "whatsapp", "web_search", "parse_document", "visualize_data", "analyze_data", "analyze_image", "gen_code", "gen_image", "calculate", "execute_code", "academic_search", "finance_news", "translation", "url", "database", "social_media"]
+Important notes:
+- Provide only the intent category (e.g., "email"), not specific tool names
+- If you identify a needed intent that isn't in the list above, include it with "(new)" notation
+- Be concise but thorough in your analysis
+- Focus on practical implementation rather than theoretical discussion
+### Input:
+{}
+### Response:
+"""
+# Load model from local file in the copied folder
+llm = Llama(
+    model_path= r'.//unsloth.Q4_K_M.gguf',
+    n_ctx=2048,
+    n_batch=512,
+    verbose=False
+)
+async def stream_llm_response(task_description: str):
+    prompt = alpaca_prompt.format(task_description)
+    stream = llm(
+        prompt,
+        max_tokens=2048,
+        stream=True,
+    )
+    for output in stream:
+        yield output["choices"][0]["text"]
+        await asyncio.sleep(0)
+@app.get("/stream")
+async def stream_response(task: str = "make an agent which send mail by searching top 5 website from google"):
+    return StreamingResponse(stream_llm_response(task), media_type="text/plain")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

packages.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+build-essential
+cmake
+git
+libopenblas-dev
+libomp-dev

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi>=0.115.12
+uvicorn>=0.34.2
+pydantic>=2.11.4
+llama-cpp-python>=0.3.8
+huggingface_hub>=0.25.0