Spaces:

Fred808
/

808-GPT2

Paused

App Files Files Community

Fred808 commited on Jan 29, 2025

Commit

03ac765

verified ·

1 Parent(s): 7c1d81b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -33

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import os
 import logging
-import requests
 # Read the NVIDIA API key from environment variables
 api_key = os.getenv("NVIDIA_API_KEY")
@@ -16,12 +16,9 @@ app = FastAPI()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# NVIDIA API configuration
-base_url = "https://integrate.api.nvidia.com/v1"
-headers = {
-    "Authorization": f"Bearer {api_key}",
-    "Content-Type": "application/json"
-}
 # Define request body schema
 class TextGenerationRequest(BaseModel):
@@ -38,35 +35,30 @@ async def generate_text(request: TextGenerationRequest):
         logger.info("Generating text with NVIDIA API...")
         # Prepare the payload for the NVIDIA API request
-        payload = {
-            "model": "meta/llama-3.1-405b-instruct",  # NVIDIA-specific model
-            "messages": [{"role": "user", "content": request.prompt}],
-            "temperature": request.temperature,
-            "top_p": request.top_p,
-            "max_tokens": request.max_new_tokens,
-            "stream": request.stream
-        }
-        # Send POST request to NVIDIA API (streaming enabled)
-        response = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, stream=True)
-        if response.status_code != 200:
-            raise HTTPException(status_code=response.status_code, detail=f"Error: {response.text}")
-        # Process the streaming response
         response_text = ""
-        for chunk in response.iter_lines():
-            if chunk:
-                data = chunk.decode("utf-8")
-                # Assuming the API response contains 'choices' and 'delta'
-                try:
-                    content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
                     if content:
                         response_text += content
-                        print(content, end="")  # Print the content to stream it out
-                except Exception as e:
-                    logger.error(f"Error processing chunk: {e}")
         return {"generated_text": response_text}
     except Exception as e:
@@ -76,7 +68,7 @@ async def generate_text(request: TextGenerationRequest):
 # Add a root endpoint for health checks
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the NVIDIA Text Generation API!"}
 # Add a test endpoint
 @app.get("/test")

 from pydantic import BaseModel
 import os
 import logging
+import openai
 # Read the NVIDIA API key from environment variables
 api_key = os.getenv("NVIDIA_API_KEY")
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Configure OpenAI client to use NVIDIA's API (via OpenAI wrapper)
+openai.api_key = api_key  # Using the NVIDIA API key
+openai.api_base = "https://integrate.api.nvidia.com/v1"  # Set the NVIDIA base URL
 # Define request body schema
 class TextGenerationRequest(BaseModel):
         logger.info("Generating text with NVIDIA API...")
         # Prepare the payload for the NVIDIA API request
+        response = openai.ChatCompletion.create(
+            model="meta/llama-3.1-405b-instruct",  # Model for NVIDIA API
+            messages=[{"role": "user", "content": request.prompt}],
+            temperature=request.temperature,
+            top_p=request.top_p,
+            max_tokens=request.max_new_tokens,
+            stream=request.stream
+        )
         response_text = ""
+        if request.stream:
+            # Handle streaming response
+            for chunk in response:
+                if isinstance(chunk, dict):  # Ensure the chunk is a dictionary
+                    # Extract content from each chunk safely
+                    content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
                     if content:
                         response_text += content
+                        print(content, end="")  # Print content as it is streamed
+                else:
+                    logger.error(f"Unexpected chunk format: {chunk}")  # Log if the chunk format is unexpected
+        else:
+            response_text = response["choices"][0]["message"]["content"]
         return {"generated_text": response_text}
     except Exception as e:
 # Add a root endpoint for health checks
 @app.get("/")
 async def root():
+    return {"message": "Welcome to the NVIDIA Text Generation API using OpenAI Wrapper!"}
 # Add a test endpoint
 @app.get("/test")