Spaces:
Sleeping
Sleeping
new files
Browse files- Dockerfile +23 -0
- api.py +31 -0
- download_data.py +13 -0
- helper.py +49 -0
- requirements.txt +11 -0
- run.sh +2 -0
- sk_logreg.pkl +3 -0
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory (use a writable folder)
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Copy and install dependencies
|
| 7 |
+
COPY ./requirements.txt /app/requirements.txt
|
| 8 |
+
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
|
| 9 |
+
|
| 10 |
+
# Copy source code
|
| 11 |
+
COPY . /app
|
| 12 |
+
|
| 13 |
+
# Ensure run.sh is executable
|
| 14 |
+
RUN chmod +x /app/run.sh
|
| 15 |
+
|
| 16 |
+
# Set environment variable for NLTK data
|
| 17 |
+
ENV NLTK_DATA=/tmp/nltk_data
|
| 18 |
+
|
| 19 |
+
# Expose the port for Chainlit
|
| 20 |
+
EXPOSE 8000
|
| 21 |
+
|
| 22 |
+
# Run the app
|
| 23 |
+
CMD ["/app/run.sh"]
|
api.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Form
|
| 2 |
+
import uvicorn
|
| 3 |
+
|
| 4 |
+
from model import LogisticRegressionModel
|
| 5 |
+
from helper import get_llm, classification_modeL_cache, llm_model_cache, prompt
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
|
| 9 |
+
@app.get("/")
|
| 10 |
+
async def root():
|
| 11 |
+
return {"message": "Sentiment Analysis API is running."}
|
| 12 |
+
|
| 13 |
+
@app.post("/chat", response_model=str)
|
| 14 |
+
async def chat_endpoint(message: str = Form(...)):
|
| 15 |
+
if "model" not in classification_modeL_cache:
|
| 16 |
+
classification_modeL_cache["model"] = LogisticRegressionModel()
|
| 17 |
+
|
| 18 |
+
if "llm" not in llm_model_cache:
|
| 19 |
+
llm_model_cache["llm"] = get_llm()
|
| 20 |
+
|
| 21 |
+
prediction = classification_modeL_cache["model"].predict(message)
|
| 22 |
+
sentiment = "Positive" if prediction[0][1] > 0.5 else "Negative"
|
| 23 |
+
|
| 24 |
+
result = llm_model_cache["llm"].invoke(
|
| 25 |
+
prompt.format(text=message, prediction=sentiment))
|
| 26 |
+
|
| 27 |
+
if result:
|
| 28 |
+
return result.content
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
uvicorn.run(app, host="127.0.0.1", port=7861)
|
download_data.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nltk
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Create a folder for NLTK data inside your repo
|
| 5 |
+
NLTK_DATA_DIR = "/tmp/nltk_data"
|
| 6 |
+
os.makedirs(NLTK_DATA_DIR, exist_ok=True)
|
| 7 |
+
|
| 8 |
+
# Download datasets to that folder
|
| 9 |
+
nltk.download("twitter_samples", download_dir=NLTK_DATA_DIR)
|
| 10 |
+
nltk.download("stopwords", download_dir=NLTK_DATA_DIR)
|
| 11 |
+
|
| 12 |
+
# Append the path so NLTK can find the datasets
|
| 13 |
+
nltk.data.path.append(NLTK_DATA_DIR)
|
helper.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
import os
|
| 4 |
+
from langchain.prompts import PromptTemplate
|
| 5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 6 |
+
|
| 7 |
+
classification_modeL_cache = {}
|
| 8 |
+
|
| 9 |
+
llm_model_cache = {}
|
| 10 |
+
|
| 11 |
+
def get_llm(model_name="gemini-2.5-flash-lite"):
|
| 12 |
+
return ChatGoogleGenerativeAI(
|
| 13 |
+
model=model_name,
|
| 14 |
+
temperature=0.6,
|
| 15 |
+
top_p=0.8,
|
| 16 |
+
top_k=1,
|
| 17 |
+
max_tokens=None,
|
| 18 |
+
max_retries=3,
|
| 19 |
+
google_api_key=os.getenv("GOOGLE_API_KEY"),
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
prompt = PromptTemplate(
|
| 23 |
+
input_variables=["text", "prediction"],
|
| 24 |
+
template="""
|
| 25 |
+
You are Sen, a sentiment analysis assistant.
|
| 26 |
+
|
| 27 |
+
TEXT:
|
| 28 |
+
{text}
|
| 29 |
+
|
| 30 |
+
PREDICTION:
|
| 31 |
+
{prediction}
|
| 32 |
+
|
| 33 |
+
Given the above text and sentiment prediction, please analyze the user's content.
|
| 34 |
+
""")
|
| 35 |
+
|
| 36 |
+
eval_prompt = PromptTemplate(
|
| 37 |
+
input_variables=["text"],
|
| 38 |
+
template="""
|
| 39 |
+
You are agent who analyze the sentiment in Twitter's tweets.
|
| 40 |
+
LIST OF TWEETS:
|
| 41 |
+
{text}
|
| 42 |
+
|
| 43 |
+
INSTRUCTIONS
|
| 44 |
+
For each tweet in LIST OF TWEETS, analyze the sentiment only without any explanation, in the format: "positive" or "negative"
|
| 45 |
+
|
| 46 |
+
EXAMPLE:
|
| 47 |
+
["positive", "negative", "positive", "negative", "positive"]
|
| 48 |
+
"""
|
| 49 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
scikit-learn
|
| 4 |
+
nltk
|
| 5 |
+
langchain
|
| 6 |
+
langchain-core
|
| 7 |
+
langchain-google-genai
|
| 8 |
+
google-api-python-client
|
| 9 |
+
fastapi
|
| 10 |
+
uvicorn
|
| 11 |
+
python-dotenv
|
run.sh
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
chainlit run chainlit.py --host 0.0.0.0 --port 8000 --headless
|
sk_logreg.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c0a9258da8c61dc165d827a2037cc689d4bbc4b135b586eb8c628c3c8e2ba24
|
| 3 |
+
size 863
|