muhammadnoman76 commited on
Commit
f98f8ce
·
1 Parent(s): 8664e1a
Files changed (5) hide show
  1. Dockerfile +33 -0
  2. README.md +11 -3
  3. app.py +80 -0
  4. packages.txt +5 -0
  5. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Copy packages.txt and install system dependencies
6
+ COPY packages.txt /root/packages.txt
7
+ RUN apt-get update && \
8
+ xargs -r -a /root/packages.txt apt-get install -y && \
9
+ rm -rf /var/lib/apt/lists/*
10
+
11
+ # Install Python dependencies
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ # Install llama-cpp-python separately to handle potential issues
16
+ RUN pip install --no-cache-dir llama-cpp-python
17
+
18
+ # Set Hugging Face cache directory to a writable location
19
+ ENV HF_HOME=/code/.cache/huggingface
20
+ RUN mkdir -p /code/.cache/huggingface && \
21
+ chmod -R 777 /code/.cache
22
+
23
+ # Copy application code
24
+ COPY . .
25
+
26
+ # Ensure correct permissions for the working directory
27
+ RUN chmod -R 777 /code
28
+
29
+ # Expose port
30
+ EXPOSE 7860
31
+
32
+ # Run the application
33
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,19 @@
1
  ---
2
  title: Cortex
3
- emoji: 📈
4
- colorFrom: yellow
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Cortex
3
+ emoji: 🐢
4
+ colorFrom: indigo
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
8
+ license: afl-3.0
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+
14
+ # LLM Streaming API
15
+
16
+ This Space provides a FastAPI application that streams responses from the Cortex LLM model.
17
+
18
+ - Visit `/ui` for a simple interface to test the model
19
+ - Send POST requests to `/generate` with JSON body containing `task_description`, `max_tokens` (optional), and `temperature` (optional)
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import StreamingResponse
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+ import asyncio
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ app = FastAPI()
9
+
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ # Download the GGUF file
19
+ model_id = "muhammadnoman76/cortex_q4"
20
+ gguf_filename = "unsloth.Q4_K_M.gguf" # Replace with the correct filename
21
+ model_path = hf_hub_download(
22
+ repo_id=model_id,
23
+ filename=gguf_filename,
24
+ local_dir=".",
25
+ local_dir_use_symlinks=False
26
+ )
27
+
28
+ alpaca_prompt = """
29
+ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
30
+
31
+ ### Instruction:
32
+ You are an intelligent agent that analyzes user requests and breaks them down into structured components. Your task is to:
33
+
34
+ 1. Identify the specific actions needed to complete the request
35
+ 2. Determine which intent-based tools would be appropriate (selecting only from the available intent list)
36
+ 3. Provide brief justifications for why each intent is relevant
37
+ 4. Define the high-level goals the request aims to accomplish
38
+ 5. Generate a concise instruction prompt summarizing how to fulfill the request
39
+
40
+ Available intents = ["schedule", "email", "sms", "whatsapp", "web_search", "parse_document", "visualize_data", "analyze_data", "analyze_image", "gen_code", "gen_image", "calculate", "execute_code", "academic_search", "finance_news", "translation", "url", "database", "social_media"]
41
+
42
+ Important notes:
43
+ - Provide only the intent category (e.g., "email"), not specific tool names
44
+ - If you identify a needed intent that isn't in the list above, include it with "(new)" notation
45
+ - Be concise but thorough in your analysis
46
+ - Focus on practical implementation rather than theoretical discussion
47
+
48
+ ### Input:
49
+ {}
50
+
51
+ ### Response:
52
+ """
53
+
54
+ # Load model from local file in the copied folder
55
+ llm = Llama(
56
+ model_path= r'.//unsloth.Q4_K_M.gguf',
57
+ n_ctx=2048,
58
+ n_batch=512,
59
+ verbose=False
60
+ )
61
+
62
+ async def stream_llm_response(task_description: str):
63
+ prompt = alpaca_prompt.format(task_description)
64
+ stream = llm(
65
+ prompt,
66
+ max_tokens=2048,
67
+ stream=True,
68
+ )
69
+
70
+ for output in stream:
71
+ yield output["choices"][0]["text"]
72
+ await asyncio.sleep(0)
73
+
74
+ @app.get("/stream")
75
+ async def stream_response(task: str = "make an agent which send mail by searching top 5 website from google"):
76
+ return StreamingResponse(stream_llm_response(task), media_type="text/plain")
77
+
78
+ if __name__ == "__main__":
79
+ import uvicorn
80
+ uvicorn.run(app, host="0.0.0.0", port=8000)
packages.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ build-essential
2
+ cmake
3
+ git
4
+ libopenblas-dev
5
+ libomp-dev
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi>=0.115.12
2
+ uvicorn>=0.34.2
3
+ pydantic>=2.11.4
4
+ llama-cpp-python>=0.3.8
5
+ huggingface_hub>=0.25.0