Spaces:

Luisgust
/

Me

Paused

App Files Files Community

Me / main.py

Luisgust

Create main.py

3a2f68b verified over 1 year ago

raw

history blame contribute delete

2.05 kB

	import os
	import random
	from typing import List, Tuple
	from fastapi import FastAPI, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from huggingface_hub import InferenceClient

	# Initialize FastAPI app
	app = FastAPI()

	# Allow CORS for your frontend application
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Change this to your frontend's URL in production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize Hugging Face Inference Client
	client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")

	def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
	prompt = "<s>"
	for user_prompt, bot_response in history:
	prompt += f"[INST] {user_prompt} [/INST]"
	prompt += f" {bot_response}</s> "
	prompt += f"[INST] {message} [/INST]"
	return prompt

	@app.post("/generate/")
	async def generate(
	prompt: str = Form(...),
	history: str = Form(...),
	temperature: float = Form(0.9),
	max_new_tokens: int = Form(512),
	top_p: float = Form(0.95),
	repetition_penalty: float = Form(1.0)
	):
	try:
	# Parse history from JSON string to list of tuples
	chat_history = eval(history)

	# Format the prompt
	formatted_prompt = format_prompt(prompt, chat_history)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=random.randint(0, 10**7),
	)

	# Generate text using the model
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""

	for response in stream:
	output += response.token.text

	return JSONResponse(content={"response": output})

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))