Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,10 +18,10 @@ app = FastAPI(
|
|
| 18 |
|
| 19 |
# Define valid models (replace with actual models supported by https://models.github.ai/inference)
|
| 20 |
VALID_MODELS = [
|
| 21 |
-
"
|
| 22 |
-
"gpt-3.5-turbo",
|
| 23 |
-
"llama-3",
|
| 24 |
-
"mistral-7b"
|
| 25 |
]
|
| 26 |
|
| 27 |
class GenerateRequest(BaseModel):
|
|
@@ -52,10 +52,13 @@ async def generate_ai_response(prompt: str, model: str, publisher: Optional[str]
|
|
| 52 |
raise HTTPException(status_code=400, detail=f"Invalid model. Valid models: {VALID_MODELS}")
|
| 53 |
|
| 54 |
logger.debug(f"Using endpoint: {endpoint}, publisher: {final_publisher}")
|
| 55 |
-
client = AsyncOpenAI(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
try:
|
| 58 |
-
# Include publisher in the request payload
|
| 59 |
stream = await client.chat.completions.create(
|
| 60 |
messages=[
|
| 61 |
{"role": "system", "content": "You are a helpful assistant named Orion, created by Abdullah Ali"},
|
|
@@ -64,8 +67,7 @@ async def generate_ai_response(prompt: str, model: str, publisher: Optional[str]
|
|
| 64 |
model=model,
|
| 65 |
temperature=1.0,
|
| 66 |
top_p=1.0,
|
| 67 |
-
stream=True
|
| 68 |
-
extra_body={"publisher": final_publisher} # Add publisher to extra_body
|
| 69 |
)
|
| 70 |
|
| 71 |
async for chunk in stream:
|
|
@@ -74,12 +76,16 @@ async def generate_ai_response(prompt: str, model: str, publisher: Optional[str]
|
|
| 74 |
|
| 75 |
except Exception as err:
|
| 76 |
logger.error(f"AI generation failed: {str(err)}")
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
@app.post("/generate", summary="Generate AI response", response_description="Streaming AI response")
|
| 81 |
async def generate_response(
|
| 82 |
-
model: str = Query("
|
| 83 |
prompt: Optional[str] = Query(None, description="The input text prompt for the AI"),
|
| 84 |
publisher: Optional[str] = Query(None, description="Publisher identifier (optional, defaults to DEFAULT_PUBLISHER env var)"),
|
| 85 |
request: Optional[GenerateRequest] = None
|
|
@@ -87,7 +93,7 @@ async def generate_response(
|
|
| 87 |
"""
|
| 88 |
Generate a streaming AI response based on the provided prompt, model, and publisher.
|
| 89 |
|
| 90 |
-
- **model**: The AI model to use (e.g.,
|
| 91 |
- **prompt**: The input text prompt for the AI (query param or body)
|
| 92 |
- **publisher**: The publisher identifier (optional, defaults to DEFAULT_PUBLISHER env var)
|
| 93 |
"""
|
|
@@ -120,3 +126,4 @@ async def list_models():
|
|
| 120 |
|
| 121 |
def get_app():
|
| 122 |
return app
|
|
|
|
|
|
| 18 |
|
| 19 |
# Define valid models (replace with actual models supported by https://models.github.ai/inference)
|
| 20 |
VALID_MODELS = [
|
| 21 |
+
"DeepSeek-V3-0324", # Try without deepseek/ prefix
|
| 22 |
+
"gpt-3.5-turbo", # Placeholder
|
| 23 |
+
"llama-3", # Placeholder
|
| 24 |
+
"mistral-7b" # Placeholder
|
| 25 |
]
|
| 26 |
|
| 27 |
class GenerateRequest(BaseModel):
|
|
|
|
| 52 |
raise HTTPException(status_code=400, detail=f"Invalid model. Valid models: {VALID_MODELS}")
|
| 53 |
|
| 54 |
logger.debug(f"Using endpoint: {endpoint}, publisher: {final_publisher}")
|
| 55 |
+
client = AsyncOpenAI(
|
| 56 |
+
base_url=endpoint,
|
| 57 |
+
api_key=token,
|
| 58 |
+
default_headers={"X-Publisher": final_publisher} # Pass publisher as header
|
| 59 |
+
)
|
| 60 |
|
| 61 |
try:
|
|
|
|
| 62 |
stream = await client.chat.completions.create(
|
| 63 |
messages=[
|
| 64 |
{"role": "system", "content": "You are a helpful assistant named Orion, created by Abdullah Ali"},
|
|
|
|
| 67 |
model=model,
|
| 68 |
temperature=1.0,
|
| 69 |
top_p=1.0,
|
| 70 |
+
stream=True
|
|
|
|
| 71 |
)
|
| 72 |
|
| 73 |
async for chunk in stream:
|
|
|
|
| 76 |
|
| 77 |
except Exception as err:
|
| 78 |
logger.error(f"AI generation failed: {str(err)}")
|
| 79 |
+
# Check for specific AI server errors
|
| 80 |
+
error_msg = str(err)
|
| 81 |
+
if "unknown_model" in error_msg.lower():
|
| 82 |
+
raise HTTPException(status_code=400, detail=f"AI server error: {error_msg}")
|
| 83 |
+
yield f"Error: {error_msg}"
|
| 84 |
+
raise HTTPException(status_code=500, detail=f"AI generation failed: {error_msg}")
|
| 85 |
|
| 86 |
@app.post("/generate", summary="Generate AI response", response_description="Streaming AI response")
|
| 87 |
async def generate_response(
|
| 88 |
+
model: str = Query("DeepSeek-V3-0324", description="The AI model to use"),
|
| 89 |
prompt: Optional[str] = Query(None, description="The input text prompt for the AI"),
|
| 90 |
publisher: Optional[str] = Query(None, description="Publisher identifier (optional, defaults to DEFAULT_PUBLISHER env var)"),
|
| 91 |
request: Optional[GenerateRequest] = None
|
|
|
|
| 93 |
"""
|
| 94 |
Generate a streaming AI response based on the provided prompt, model, and publisher.
|
| 95 |
|
| 96 |
+
- **model**: The AI model to use (e.g., DeepSeek-V3-0324)
|
| 97 |
- **prompt**: The input text prompt for the AI (query param or body)
|
| 98 |
- **publisher**: The publisher identifier (optional, defaults to DEFAULT_PUBLISHER env var)
|
| 99 |
"""
|
|
|
|
| 126 |
|
| 127 |
def get_app():
|
| 128 |
return app
|
| 129 |
+
|