Spaces:

Maximofn
/

SmolLM2_backend_LocalModel

Build error

App Files Files Community

Maximofn commited on Mar 3, 2025

Commit

8e2c98e

1 Parent(s): 6fdc750

First commit

Browse files

Files changed (5) hide show

.gitignore +44 -0
Dockerfile +16 -0
README.md +88 -6
app.py +114 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Archivos de entorno
+.env
+.env.*
+# Archivos de Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# Directorios virtuales
+venv/
+ENV/
+env/
+# Archivos de IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Logs
+*.log
+logs/
+# Archivos temporales
+.DS_Store
+Thumbs.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.13-slim
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+EXPOSE 7860
+RUN --mount=type=secret,id=HUGGINGFACE_TOKEN,mode=0444,required=true \
+    test -f /run/secrets/HUGGINGFACE_TOKEN && echo "Secret exists!"
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,12 +1,94 @@
 ---
-title: SmolLM2 Backend LocalModel
-emoji: 👁
-colorFrom: red
-colorTo: blue
 sdk: docker
 pinned: false
 license: apache-2.0
-short_description: 'SmolLM2 Backend - FastAPI, Langchain and Docker. LLM local  '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SmolLM2 Backend Local Model
+emoji: 📊
+colorFrom: yellow
+colorTo: red
 sdk: docker
 pinned: false
 license: apache-2.0
+short_description: Backend of SmolLM2 chatbot with local model
+app_port: 7860
 ---
+# SmolLM2 Backend Local Model
+This project implements a FastAPI API that uses LangChain and LangGraph to generate text with the Qwen2.5-72B-Instruct model from HuggingFace.
+## Configuration
+### In HuggingFace Spaces
+This project is designed to run in HuggingFace Spaces. To configure it:
+1. Create a new Space in HuggingFace with SDK Docker
+2. Configure the `HUGGINGFACE_TOKEN` or `HF_TOKEN` environment variable in the Space configuration:
+   - Go to the "Settings" tab of your Space
+   - Scroll down to the "Repository secrets" section
+   - Add a new variable with the name `HUGGINGFACE_TOKEN` and your token as the value
+   - Save the changes
+### Local development
+For local development:
+1. Clone this repository
+2. Create a `.env` file in the project root with your HuggingFace token:
+   ```
+   HUGGINGFACE_TOKEN=your_token_here
+   ```
+3. Install the dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+## Local execution
+```bash
+uvicorn app:app --reload
+```
+The API will be available at `http://localhost:7860`.
+## Endpoints
+### GET `/`
+Welcome endpoint that returns a greeting message.
+### POST `/generate`
+Endpoint to generate text using the language model.
+**Request parameters:**
+```json
+{
+  "query": "Your question here",
+  "thread_id": "optional_thread_identifier"
+}
+```
+**Response:**
+```json
+{
+  "generated_text": "Generated text by the model",
+  "thread_id": "thread identifier"
+}
+```
+## Docker
+To run the application in a Docker container:
+```bash
+# Build the image
+docker build -t smollm2-backend .
+# Run the container
+docker run -p 7860:7860 --env-file .env smollm2-backend
+```
+## API documentation
+The interactive API documentation is available at:
+- Swagger UI: `http://localhost:7860/docs`
+- ReDoc: `http://localhost:7860/redoc`

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from huggingface_hub import InferenceClient
+from langchain_core.messages import HumanMessage, AIMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, MessagesState, StateGraph
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# HuggingFace token
+HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN", os.getenv("HUGGINGFACE_TOKEN"))
+# Initialize the HuggingFace model
+model = InferenceClient(
+    model="Qwen/Qwen2.5-72B-Instruct",
+    api_key=os.getenv("HUGGINGFACE_TOKEN")
+)
+# Define the function that calls the model
+def call_model(state: MessagesState):
+    """
+    Call the model with the given messages
+    Args:
+        state: MessagesState
+    Returns:
+        dict: A dictionary containing the generated text and the thread ID
+    """
+    # Convert LangChain messages to HuggingFace format
+    hf_messages = []
+    for msg in state["messages"]:
+        if isinstance(msg, HumanMessage):
+            hf_messages.append({"role": "user", "content": msg.content})
+        elif isinstance(msg, AIMessage):
+            hf_messages.append({"role": "assistant", "content": msg.content})
+    # Call the API
+    response = model.chat_completion(
+        messages=hf_messages,
+        temperature=0.5,
+        max_tokens=64,
+        top_p=0.7
+    )
+    # Convert the response to LangChain format
+    ai_message = AIMessage(content=response.choices[0].message.content)
+    return {"messages": state["messages"] + [ai_message]}
+# Define the graph
+workflow = StateGraph(state_schema=MessagesState)
+# Define the node in the graph
+workflow.add_edge(START, "model")
+workflow.add_node("model", call_model)
+# Add memory
+memory = MemorySaver()
+graph_app = workflow.compile(checkpointer=memory)
+# Define the data model for the request
+class QueryRequest(BaseModel):
+    query: str
+    thread_id: str = "default"
+# Create the FastAPI application
+app = FastAPI(title="LangChain FastAPI", description="API to generate text using LangChain and LangGraph")
+# Welcome endpoint
+@app.get("/")
+async def api_home():
+    """Welcome endpoint"""
+    return {"detail": "Welcome to FastAPI, Langchain, Docker tutorial"}
+# Generate endpoint
+@app.post("/generate")
+async def generate(request: QueryRequest):
+    """
+    Endpoint to generate text using the language model
+    Args:
+        request: QueryRequest
+        query: str
+        thread_id: str = "default"
+    Returns:
+        dict: A dictionary containing the generated text and the thread ID
+    """
+    try:
+        # Configure the thread ID
+        config = {"configurable": {"thread_id": request.thread_id}}
+        # Create the input message
+        input_messages = [HumanMessage(content=request.query)]
+        # Invoke the graph
+        output = graph_app.invoke({"messages": input_messages}, config)
+        # Get the model response
+        response = output["messages"][-1].content
+        return {
+            "generated_text": response,
+            "thread_id": request.thread_id
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error al generar texto: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+requests
+pydantic>=2.0.0
+langchain
+langchain-huggingface
+langchain-core
+langgraph > 0.2.27
+python-dotenv