File size: 7,548 Bytes
20a938c
a7bcc23
5dff97b
 
 
 
a7bcc23
3963f4c
a7bcc23
 
 
5dff97b
bbfdb85
 
 
 
a7bcc23
31fb3ef
 
 
 
 
a7bcc23
 
5dff97b
 
bbfdb85
 
a7bcc23
31fb3ef
5dff97b
 
 
a7bcc23
 
6174325
a7bcc23
 
5dff97b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b11b737
5dff97b
a7bcc23
 
5dff97b
a7bcc23
 
 
 
 
 
5dff97b
 
 
a7bcc23
5dff97b
 
 
 
 
a7bcc23
 
 
 
6045ae0
a7bcc23
 
6174325
a7bcc23
 
5dff97b
 
 
 
a7bcc23
5dff97b
 
 
 
 
 
 
 
 
 
6174325
5dff97b
 
 
 
 
 
 
 
 
 
 
 
 
6174325
5dff97b
 
 
 
 
 
 
 
 
 
 
 
a7bcc23
 
 
 
 
 
5dff97b
a7bcc23
5dff97b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
CSV_PATH = "memes_descriptions.csv"

from pydantic import BaseModel, Field
from enum import Enum
import outlines
import openai
import pandas as pd
import spaces
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import gradio as gr
from dotenv import load_dotenv
import os

load_dotenv()


class Meme(BaseModel):
    link: str = Field(..., description="The URL of the meme")
    description: str = Field(..., description="The description of the meme")

# --- CONFIG ---
MODEL_NAME = "qwen/qwen3-32b:free"
CSV_PATH = "memes_descriptions.csv" # Updated CSV_PATH to reflect the data directory


API_KEY=os.getenv("OPENROUTER_API_KEY")


try:
    client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
    model = outlines.from_openai(client, MODEL_NAME)
except Exception as e:
    print(f"❌ Failed to initialize OpenRouter client: {e}")
    client = None

# --- LOAD DATA ---
def load_data_and_create_vectorstore():
    global documents, vectorstore, retriever
    try:
        df = pd.read_csv(CSV_PATH).fillna({"description": "", "link": ""})
        df.columns = df.columns.str.strip().str.lower()  # Normalize header names

        documents = [
            Document(
                page_content=row["description"],
                metadata={"url": str(row["link"]).strip()}
            )
            for _, row in df.iterrows()
        ]

        embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
        vectorstore = FAISS.from_documents(documents, embedding_model)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        print("βœ… Data loaded and vectorstore created.")
    except FileNotFoundError:
        print(f"❌ Data file not found at {CSV_PATH}")
        documents, vectorstore, retriever = [], None, None
    except Exception as e:
        print(f"❌ Error loading data or creating vectorstore: {e}")
        documents, vectorstore, retriever = [], None, None

load_data_and_create_vectorstore()

# --- LLM ---
def ask_llm(question: str, docs: list) -> Meme:
    if client is None:
        return "❌ LLM client not initialized."
    context = "\n\n".join(
        f"Meme {i+1}:\nDescription: {doc.page_content}\nLink: {doc.metadata.get('url', 'N/A')}"
        for i, doc in enumerate(docs)
    )
    messages = f"""You're a meme expert. The user will ask something, and your job is to select the most relevant meme from the following, keep the url link as it is :\n{context} \n\n === example generation === \n link='https://drive.google.com/file/d/1DrXjMZDn-fd7mJDKxZpIbYq1zuatLAQ2/view?usp=drive_link' description="This meme is perhaps the most relevant to situations where someone is trying to distance themselves from trouble, responsibility or confusion. The innocent defense, along with the exaggerated seriousness of the context, captures this human emotion perfectly.  This clip is particularly versatile, since it can be used for everything from dodging blame in a work environment to the cartoonish helplessness of dealing with everyday chaos.  The child's exclamation is a concise and universally understood expression of disassociation, making it an ideal visual accompaniment to anything from minor mishaps to adventurous tales of escape. It's a strong contender for embodying the act of avoiding situations with humor.  The rooting for the 'underdog' (or in this case, the underkid) provides both light-hearted commentary on life's inevitable awkward moments and a shared sense of solidarity in the face of confusion."
    \n\n User : {question}"""
    print(messages)
    try:
        meme = model(messages,Meme, stream=False)
        print(meme)
        meme = Meme.model_validate_json(meme)
        print(meme)
        return meme
    except Exception as e:
        return f"❌ LLM Error: {e}"

# --- MAIN QUERY ---
@spaces.GPU(duration=90)
def query_memes(user_input: str):
    if retriever is None:
        return "❌ RAG system not initialized due to errors.", ""

    src_docs = retriever.invoke(user_input)
    meme = ask_llm(user_input, src_docs)

    raw_url = meme.link
    description = meme.description

    file_id = raw_url.split("/file/d/")[1].split("/")[0]
    embed_url = f"https://drive.google.com/file/d/{file_id}/preview"
    embed_html = f'''
    <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
      <iframe src="{embed_url}"
      style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;"
      frameborder="0" allow="autoplay" allowfullscreen>
      </iframe>
    </div>
    '''

    return embed_html, description

# --- UPLOAD FUNCTION ---
def upload_meme(video_file, description):
    if video_file is None or description is None:
        return "Please provide both a video file and a description."

    try:
        # This is a placeholder. In a real application, you would upload the video
        # to a hosting service (like Google Drive, S3, etc.) and get a shareable link.
        # For this example, we'll just create a dummy link.
        # You might need to implement actual file upload logic here.
        dummy_link = f"https://dummy-hosting.com/videos/{os.path.basename(video_file.name)}"

        new_meme = pd.DataFrame({"link": [dummy_link], "description": [description]})

        # Append to the CSV file
        new_meme.to_csv(CSV_PATH, mode='a', header=False, index=False)

        # Reload data and update vectorstore
        load_data_and_create_vectorstore()

        return "Meme uploaded successfully!"

    except Exception as e:
        return f"Error uploading meme: {e}"


# --- GRADIO INTERFACE ---
if __name__ == "__main__":
    if retriever is None or client is None:
        print("Gradio interface will not run due to RAG/LLM initialization errors.")

    else:
        with gr.Blocks(title="Moul Lmemes 🎬") as demo:
            gr.Markdown("# Moul Lmemes 🎬")
            gr.Markdown("Write what you are looking for and i will find the most revelant moroccan meme for you πŸ”")

            with gr.Tab("Search Memes"):
                search_input = gr.Textbox(label="Type something")
                search_button = gr.Button("Find Meme")
                search_output_video = gr.HTML(label="Top Meme Video")
                search_output_text = gr.Textbox(label="Results")

                search_button.click(
                    fn=query_memes,
                    inputs=search_input,
                    outputs=[search_output_video, search_output_text]
                )
                gr.Examples(
                    examples=[["a man running"],["sharing"],["immigrant in france"]],
                    inputs=search_input,
                    outputs=[search_output_video, search_output_text],
                    fn=query_memes,
                    cache_examples=True,
                )

            with gr.Tab("Upload Meme"):
                upload_video = gr.Video(label="Upload Video")
                upload_description = gr.Textbox(label="Description")
                upload_button = gr.Button("Upload Meme")
                upload_output = gr.Textbox(label="Upload Status")

                upload_button.click(
                    fn=upload_meme,
                    inputs=[upload_video, upload_description],
                    outputs=upload_output
                )


        demo.launch(debug=True)