Spaces:

muhammadnoman76
/

cortex

Sleeping

App Files Files Community

cortex / app.py

muhammadnoman76

update

cd4a687 8 months ago

raw

history blame contribute delete

2.74 kB

	from fastapi import FastAPI
	from fastapi.responses import StreamingResponse
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import asyncio
	from fastapi.middleware.cors import CORSMiddleware

	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Download the GGUF file
	model_id = "muhammadnoman76/cortex_q4"
	gguf_filename = "unsloth.Q4_K_M.gguf" # Replace with the correct filename
	model_path = hf_hub_download(
	repo_id=model_id,
	filename=gguf_filename,
	local_dir=".",
	local_dir_use_symlinks=False
	)

	alpaca_prompt = """
	Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	You are an intelligent agent that analyzes user requests and breaks them down into structured components. Your task is to:

	1. Identify the specific actions needed to complete the request
	2. Determine which intent-based tools would be appropriate (selecting only from the available intent list)
	3. Provide brief justifications for why each intent is relevant
	4. Define the high-level goals the request aims to accomplish
	5. Generate a concise instruction prompt summarizing how to fulfill the request

	Available intents = ["schedule", "email", "sms", "whatsapp", "web_search", "parse_document", "visualize_data", "analyze_data", "analyze_image", "gen_code", "gen_image", "calculate", "execute_code", "academic_search", "finance_news", "translation", "url", "database", "social_media"]

	Important notes:
	- Provide only the intent category (e.g., "email"), not specific tool names
	- If you identify a needed intent that isn't in the list above, include it with "(new)" notation
	- Be concise but thorough in your analysis
	- Focus on practical implementation rather than theoretical discussion

	### Input:
	{}

	### Response:
	"""

	# Load model from local file in the copied folder
	llm = Llama(
	model_path= r'.//unsloth.Q4_K_M.gguf',
	n_ctx=2048,
	n_batch=512,
	verbose=False
	)

	async def stream_llm_response(task_description: str):
	prompt = alpaca_prompt.format(task_description)
	stream = llm(
	prompt,
	max_tokens=2048,
	stream=True,
	)

	for output in stream:
	yield output["choices"][0]["text"]
	await asyncio.sleep(0)

	@app.get("/stream")
	async def stream_response(task: str = "make an agent which send mail by searching top 5 website from google"):
	return StreamingResponse(stream_llm_response(task), media_type="text/plain")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)