tlong-ds commited on
Commit
b2e1431
·
1 Parent(s): e2568f2
Files changed (7) hide show
  1. Dockerfile +23 -0
  2. api.py +31 -0
  3. download_data.py +13 -0
  4. helper.py +49 -0
  5. requirements.txt +11 -0
  6. run.sh +2 -0
  7. sk_logreg.pkl +3 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set working directory (use a writable folder)
4
+ WORKDIR /app
5
+
6
+ # Copy and install dependencies
7
+ COPY ./requirements.txt /app/requirements.txt
8
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
9
+
10
+ # Copy source code
11
+ COPY . /app
12
+
13
+ # Ensure run.sh is executable
14
+ RUN chmod +x /app/run.sh
15
+
16
+ # Set environment variable for NLTK data
17
+ ENV NLTK_DATA=/tmp/nltk_data
18
+
19
+ # Expose the port for Chainlit
20
+ EXPOSE 8000
21
+
22
+ # Run the app
23
+ CMD ["/app/run.sh"]
api.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Form
2
+ import uvicorn
3
+
4
+ from model import LogisticRegressionModel
5
+ from helper import get_llm, classification_modeL_cache, llm_model_cache, prompt
6
+
7
+ app = FastAPI()
8
+
9
+ @app.get("/")
10
+ async def root():
11
+ return {"message": "Sentiment Analysis API is running."}
12
+
13
+ @app.post("/chat", response_model=str)
14
+ async def chat_endpoint(message: str = Form(...)):
15
+ if "model" not in classification_modeL_cache:
16
+ classification_modeL_cache["model"] = LogisticRegressionModel()
17
+
18
+ if "llm" not in llm_model_cache:
19
+ llm_model_cache["llm"] = get_llm()
20
+
21
+ prediction = classification_modeL_cache["model"].predict(message)
22
+ sentiment = "Positive" if prediction[0][1] > 0.5 else "Negative"
23
+
24
+ result = llm_model_cache["llm"].invoke(
25
+ prompt.format(text=message, prediction=sentiment))
26
+
27
+ if result:
28
+ return result.content
29
+
30
+ if __name__ == "__main__":
31
+ uvicorn.run(app, host="127.0.0.1", port=7861)
download_data.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import os
3
+
4
+ # Create a folder for NLTK data inside your repo
5
+ NLTK_DATA_DIR = "/tmp/nltk_data"
6
+ os.makedirs(NLTK_DATA_DIR, exist_ok=True)
7
+
8
+ # Download datasets to that folder
9
+ nltk.download("twitter_samples", download_dir=NLTK_DATA_DIR)
10
+ nltk.download("stopwords", download_dir=NLTK_DATA_DIR)
11
+
12
+ # Append the path so NLTK can find the datasets
13
+ nltk.data.path.append(NLTK_DATA_DIR)
helper.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+ import os
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+
7
+ classification_modeL_cache = {}
8
+
9
+ llm_model_cache = {}
10
+
11
+ def get_llm(model_name="gemini-2.5-flash-lite"):
12
+ return ChatGoogleGenerativeAI(
13
+ model=model_name,
14
+ temperature=0.6,
15
+ top_p=0.8,
16
+ top_k=1,
17
+ max_tokens=None,
18
+ max_retries=3,
19
+ google_api_key=os.getenv("GOOGLE_API_KEY"),
20
+ )
21
+
22
+ prompt = PromptTemplate(
23
+ input_variables=["text", "prediction"],
24
+ template="""
25
+ You are Sen, a sentiment analysis assistant.
26
+
27
+ TEXT:
28
+ {text}
29
+
30
+ PREDICTION:
31
+ {prediction}
32
+
33
+ Given the above text and sentiment prediction, please analyze the user's content.
34
+ """)
35
+
36
+ eval_prompt = PromptTemplate(
37
+ input_variables=["text"],
38
+ template="""
39
+ You are agent who analyze the sentiment in Twitter's tweets.
40
+ LIST OF TWEETS:
41
+ {text}
42
+
43
+ INSTRUCTIONS
44
+ For each tweet in LIST OF TWEETS, analyze the sentiment only without any explanation, in the format: "positive" or "negative"
45
+
46
+ EXAMPLE:
47
+ ["positive", "negative", "positive", "negative", "positive"]
48
+ """
49
+ )
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ nltk
5
+ langchain
6
+ langchain-core
7
+ langchain-google-genai
8
+ google-api-python-client
9
+ fastapi
10
+ uvicorn
11
+ python-dotenv
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ chainlit run chainlit.py --host 0.0.0.0 --port 8000 --headless
sk_logreg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0a9258da8c61dc165d827a2037cc689d4bbc4b135b586eb8c628c3c8e2ba24
3
+ size 863