Spaces:
Sleeping
Sleeping
| import chromadb | |
| from sentence_transformers import SentenceTransformer | |
| from llama_cpp import Llama | |
| import gradio as gr | |
| # ✅ Initialize ChromaDB | |
| chroma_client = chromadb.PersistentClient(path="./chromadb_store") | |
| collection = chroma_client.get_or_create_collection(name="curly_strings_knowledge") | |
| # ✅ Load Local Embedding Model | |
| embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # ✅ Curly Strings Knowledge (Stored in ChromaDB as Vectors) | |
| knowledge_base = [ | |
| {"id": "song_list", "text": """ | |
| Here are some songs by Curly Strings: | |
| 1. **Kalakesed** | |
| 2. **Kus mu süda on ...** | |
| 3. **Vitsalaul** | |
| 4. **Viimases jaamas** | |
| 5. **Salaja** | |
| 6. **Üle ilma** | |
| 7. **Šveits** | |
| 8. **Kallimale** | |
| 9. **Üksteist peab hoidma** | |
| 10. **Suuda öelda ei** | |
| 11. **Annan käe** | |
| 12. **Tulbid ja Bonsai** | |
| 13. **Tüdruk Pika Kleidiga** | |
| 14. **Armasta mind (feat. Vaiko Eplik)** | |
| 15. **Minu, Pets, Margus ja Priit** | |
| 16. **Kauges külas** | |
| 17. **Tule ja jää** | |
| 18. **Kuutõbine** | |
| 19. **Omaenese ilus ja veas** | |
| 20. **Pulmad** | |
| 21. **Pillimeeste laul** | |
| 22. **Tehke ruumi!** | |
| """}, | |
| {"id": "related_artists", "text": """ | |
| If you enjoy Curly Strings, you might also like: | |
| - **Trad.Attack!** | |
| - **Eesti Raadio laululapsed** | |
| - **Körsikud** | |
| - **Karl-Erik Taukar** | |
| - **Dag** | |
| - **Sadamasild** | |
| - **Kruuv** | |
| - **Smilers** | |
| - **Mari Jürjens** | |
| - **Terminaator** | |
| """}, | |
| {"id": "background", "text": """ | |
| Curly Strings started in Estonia and became famous for their unique blend of folk and contemporary music. | |
| They often perform at international festivals and are known for their emotional and poetic lyrics. | |
| """} | |
| ] | |
| # ✅ Store Knowledge in ChromaDB (If Not Already Stored) | |
| existing_data = collection.get() | |
| if not existing_data["ids"]: | |
| for item in knowledge_base: | |
| embedding = embedder.encode(item["text"]).tolist() | |
| collection.add(documents=[item["text"]], embeddings=[embedding], ids=[item["id"]]) | |
| # ✅ Load Llama Model | |
| llm = Llama.from_pretrained( | |
| repo_id="krishna195/second_guff", | |
| filename="unsloth.Q4_K_M.gguf", | |
| ) | |
| # ✅ Function to Retrieve Relevant Knowledge (Fixed Nested List Issue) | |
| def retrieve_context(query): | |
| query_embedding = embedder.encode(query).tolist() | |
| results = collection.query(query_embeddings=[query_embedding], n_results=2) | |
| # Flatten nested lists and ensure only strings are joined | |
| retrieved_texts = [doc for sublist in results.get("documents", []) for doc in sublist if isinstance(doc, str)] | |
| return "\n".join(retrieved_texts) if retrieved_texts else "No relevant data found." | |
| # ✅ Chatbot Function with ChromaDB-RAG | |
| def chatbot_response(user_input): | |
| context = retrieve_context(user_input) # Retrieve relevant info from ChromaDB | |
| messages = [ | |
| {"role": "system", "content": "Use the knowledge retrieved to answer the user’s question."}, | |
| {"role": "user", "content": user_input}, | |
| {"role": "assistant", "content": f"Retrieved Context:\n{context}"}, | |
| ] | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| temperature=0.5, | |
| max_tokens=500, | |
| top_p=0.9, | |
| frequency_penalty=0.8, | |
| ) | |
| return response["choices"][0]["message"]["content"].strip() | |
| # ✅ Gradio Interface | |
| def chat_interface(user_input): | |
| return chatbot_response(user_input) | |
| demo = gr.Interface( | |
| fn=chat_interface, | |
| inputs=gr.Textbox(placeholder="Ask me about Curly Strings..."), | |
| outputs="text", | |
| title="Curly Strings Chatbot", | |
| description="Chat with the bot about Curly Strings, their songs, and related artists." | |
| ) | |
| demo.launch() |