Coderrs commited on
Commit
144a2a0
·
verified ·
1 Parent(s): fdbd9e7

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +18 -0
  2. app/__init__.py +0 -0
  3. app/main.py +134 -0
  4. docker-compose.yml +31 -0
  5. requirements.txt +9 -0
  6. uploads/.gitkeep +1 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a slim Python base image
2
+ FROM python:3.11-slim
3
+
4
+ # Install build tools just in case a package needs to compile from source
5
+ RUN apt-get update && apt-get install -y build-essential
6
+
7
+ # Set the working directory
8
+ WORKDIR /code
9
+
10
+ # Copy the requirements file and install dependencies
11
+ COPY ./requirements.txt /code/requirements.txt
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ # Copy the application code
15
+ COPY ./app /code/app
16
+
17
+ # Command to run the Uvicorn server
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/main.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel, Field
5
+ from typing import List
6
+
7
+ # Model and DB libraries
8
+ from llama_cpp import Llama
9
+ from huggingface_hub import hf_hub_download
10
+ import chromadb
11
+ from youtube_transcript_api import YouTubeTranscriptApi
12
+
13
+ # --- 1. Constants and Configuration ---
14
+ MODEL_REPO = "bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF"
15
+ GGUF_FILE = "Phi-3.5-mini-instruct_Uncensored-Q4_K_M.gguf" # Good balance
16
+ CHROMA_PATH = "/app/chroma_db" # Path inside the container for persistent storage
17
+ COLLECTION_NAME = "chat_history"
18
+
19
+ # --- 2. Initialize FastAPI app ---
20
+ app = FastAPI(
21
+ title="Enhanced RAG API with Memory",
22
+ description="An API with chat history (ChromaDB) and YouTube analysis.",
23
+ version="1.0",
24
+ )
25
+
26
+ # --- 3. Global Variables (will be loaded on startup) ---
27
+ llm: Llama = None
28
+ chroma_client: chromadb.Client = None
29
+ collection: chromadb.Collection = None
30
+
31
+ # --- 4. Startup Event: Load models and initialize DB ---
32
+ @app.on_event("startup")
33
+ def load_resources():
34
+ global llm, chroma_client, collection
35
+
36
+ # Load the LLM
37
+ print("Downloading and loading LLM...")
38
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=GGUF_FILE)
39
+ llm = Llama(model_path=model_path, n_ctx=4096, n_gpu_layers=-1, verbose=True)
40
+ print("LLM loaded.")
41
+
42
+ # Initialize ChromaDB client
43
+ print("Initializing ChromaDB...")
44
+ # This creates a persistent DB client that stores data in the specified path
45
+ chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
46
+ # Get or create the collection to store chat history
47
+ collection = chroma_client.get_or_create_collection(name=COLLECTION_NAME)
48
+ print("ChromaDB initialized.")
49
+ print("API is ready to go! 🚀")
50
+
51
+ # --- 5. Pydantic Models for API requests ---
52
+ class ChatRequest(BaseModel):
53
+ session_id: str = Field(..., description="Unique identifier for a chat session.")
54
+ message: str = Field(..., description="The user's message.")
55
+
56
+ class YouTubeRequest(BaseModel):
57
+ video_url: str = Field(..., description="URL of the YouTube video to analyze.")
58
+
59
+ # --- 6. API Endpoint for Chat with Memory ---
60
+ @app.post("/chat")
61
+ def chat_with_memory(request: ChatRequest):
62
+ print(f"Received chat request for session: {request.session_id}")
63
+
64
+ # Step 1: Retrieve relevant chat history from ChromaDB
65
+ try:
66
+ history = collection.query(
67
+ where={"session_id": request.session_id},
68
+ n_results=5 # Get the last 5 exchanges
69
+ )
70
+ # Format history for the prompt
71
+ context = "\n".join([f"User: {meta['user_message']}\nAI: {doc}" for doc, meta in zip(history['documents'][0], history['metadatas'][0])])
72
+ except Exception as e:
73
+ print(f"Error querying ChromaDB: {e}")
74
+ context = "" # Start fresh if history fails
75
+
76
+ # Step 2: Construct the prompt with history
77
+ prompt_template = (
78
+ "<s><|system|>\nYou are a helpful AI assistant. "
79
+ "Use the chat history below to provide a relevant and coherent response.\n\n"
80
+ "--- Chat History ---\n{chat_history}\n--- End History ---\n<|end|>\n"
81
+ "<|user|>\n{user_message}<|end|>\n<|assistant|>"
82
+ )
83
+ prompt = prompt_template.format(chat_history=context, user_message=request.message)
84
+
85
+ # Step 3: Generate a response from the LLM
86
+ output = llm(prompt=prompt, max_tokens=256, stop=["<|end|>", "User:"], echo=False)
87
+ ai_response = output["choices"][0]["text"].strip()
88
+
89
+ # Step 4: Save the new exchange to ChromaDB
90
+ try:
91
+ # We store the AI response as the document and the user message in metadata
92
+ doc_id = str(uuid.uuid4())
93
+ collection.add(
94
+ ids=[doc_id],
95
+ documents=[ai_response],
96
+ metadatas=[{"session_id": request.session_id, "user_message": request.message}]
97
+ )
98
+ print(f"Saved new exchange to session {request.session_id}")
99
+ except Exception as e:
100
+ print(f"Error saving to ChromaDB: {e}")
101
+
102
+ return {"session_id": request.session_id, "response": ai_response}
103
+
104
+ # --- 7. API Endpoint for YouTube Video Analysis ---
105
+ @app.post("/analyze_youtube")
106
+ def analyze_youtube_video(request: YouTubeRequest):
107
+ try:
108
+ # Extract video ID from URL
109
+ video_id = request.video_url.split("v=")[1].split("&")[0]
110
+ print(f"Fetching transcript for video ID: {video_id}")
111
+
112
+ # Get transcript
113
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
114
+ transcript = " ".join([item['text'] for item in transcript_list])
115
+ print("Transcript fetched successfully.")
116
+
117
+ # Create a prompt for summarization
118
+ prompt = (
119
+ f"<s><|system|>\nYou are an expert analyst. Summarize the key points of the following YouTube video transcript."
120
+ f"<|end|>\n<|user|>\nTranscript: {transcript[:3000]}\n\nSummary:<|end|>\n<|assistant|>" # Truncate to fit context
121
+ )
122
+
123
+ # Get summary from LLM
124
+ output = llm(prompt, max_tokens=512, stop=["<|end|>"], echo=False)
125
+ summary = output["choices"][0]["text"].strip()
126
+
127
+ return {"video_url": request.video_url, "summary": summary}
128
+
129
+ except Exception as e:
130
+ raise HTTPException(status_code=500, detail=str(e))
131
+
132
+ @app.get("/")
133
+ def read_root():
134
+ return {"status": "API is running."}
docker-compose.yml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docker-compose.yml
2
+
3
+ version: '3.8'
4
+
5
+ services:
6
+ web:
7
+ build: .
8
+ container_name: rag_api_web
9
+ ports:
10
+ - "8000:8000"
11
+ volumes:
12
+ - ./uploads:/app/uploads
13
+ environment:
14
+ - CHROMA_HOST=chroma
15
+ - CHROMA_PORT=8000
16
+ depends_on:
17
+ - chroma
18
+ restart: unless-stopped
19
+
20
+ chroma:
21
+ image: chromadb/chroma
22
+ container_name: rag_api_chroma
23
+ ports:
24
+ - "8001:8000"
25
+ volumes:
26
+ - chroma_data:/chroma/chroma
27
+ restart: unless-stopped
28
+
29
+ volumes:
30
+ chroma_data:
31
+ driver: local
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ llama-cpp-python
4
+ huggingface-hub
5
+ pydantic
6
+ # New additions
7
+ chromadb
8
+ youtube-transcript-api
9
+ uuid
uploads/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+ # ...existing code...