krishna195 commited on
Commit
728d085
·
verified ·
1 Parent(s): b104573

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ import gradio as gr
3
+ from sentence_transformers import SentenceTransformer
4
+ from llama_cpp import Llama
5
+
6
+ # ✅ Initialize ChromaDB
7
+ chroma_client = chromadb.PersistentClient(path="./chromadb_store")
8
+ collection = chroma_client.get_or_create_collection(name="curly_strings_knowledge")
9
+
10
+ # ✅ Load Local Embedding Model
11
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
12
+
13
+ # ✅ Load Fine-Tuned LLaMA Model
14
+ llm = Llama.from_pretrained(
15
+ repo_id="krishna195/second_guff",
16
+ filename="unsloth.Q4_K_M.gguf",
17
+ )
18
+
19
+ # ✅ File-Based Search Function
20
+ def search_in_file(query, file_path="merged_output.txt"):
21
+ try:
22
+ with open(file_path, "r", encoding="utf-8") as file:
23
+ lines = file.readlines()
24
+
25
+ # Search for the query in file content
26
+ matched_lines = [line.strip() for line in lines if query.lower() in line.lower()]
27
+
28
+ return "\n".join(matched_lines) if matched_lines else "No relevant data found in file."
29
+
30
+ except FileNotFoundError:
31
+ return "File not found. Please check the file path."
32
+
33
+ # ✅ Retrieve Context from ChromaDB & File
34
+ def retrieve_context(query):
35
+ query_embedding = embedder.encode(query).tolist()
36
+ results = collection.query(query_embeddings=[query_embedding], n_results=2)
37
+
38
+ retrieved_texts = [doc for sublist in results.get("documents", []) for doc in sublist if isinstance(doc, str)]
39
+
40
+ # If no result from ChromaDB, try searching in the file
41
+ if not retrieved_texts:
42
+ return search_in_file(query)
43
+
44
+ return "\n".join(retrieved_texts)
45
+
46
+ # ✅ Chatbot Function with Optimized Retrieval
47
+ def chatbot_response(user_input):
48
+ context = retrieve_context(user_input)
49
+
50
+ messages = [
51
+ {"role": "system", "content": """You are an expert on the Estonian folk band Curly Strings.
52
+ - Use the **retrieved knowledge** from ChromaDB or the file to answer.
53
+ - If a **song** is mentioned, provide its name and **suggest similar tracks**.
54
+ - If no match is found, say "I couldn’t find details, but here’s what I know."."""},
55
+ {"role": "user", "content": user_input},
56
+ {"role": "assistant", "content": f"Retrieved Context:\n{context}"},
57
+ ]
58
+
59
+ response = llm.create_chat_completion(
60
+ messages=messages,
61
+ temperature=0.4,
62
+ max_tokens=300,
63
+ top_p=0.9,
64
+ frequency_penalty=0.7,
65
+ )
66
+
67
+ return response["choices"][0]["message"]["content"].strip()
68
+
69
+ # ✅ Gradio Chatbot Interface
70
+ iface = gr.Interface(
71
+ fn=chatbot_response,
72
+ inputs=gr.Textbox(label="Ask me about Curly Strings 🎻"),
73
+ outputs=gr.Textbox(label="Bot Response 🎶"),
74
+ title="Curly Strings Chatbot",
75
+ description="Ask about the Estonian folk band Curly Strings! Now also searches in 'merged_output.txt'.",
76
+ )
77
+
78
+ iface.launch()