SakshamSna commited on
Commit
7c377b6
·
1 Parent(s): c8f1108

added all files

Browse files
Files changed (3) hide show
  1. agent.py +87 -0
  2. app.py +46 -0
  3. requirements.txt +6 -0
agent.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ import faiss
4
+ import torch
5
+ import sqlite3
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ class CodingAgent:
10
+ def __init__(self):
11
+ # Load TinyLlama (CPU-friendly)
12
+ model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
13
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+ self.model = AutoModelForCausalLM.from_pretrained(model_id)
15
+ self.llm = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer, max_new_tokens=512, device=-1)
16
+
17
+ # Embedding model + FAISS index
18
+ self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
19
+ self.index = faiss.IndexFlatL2(384)
20
+ self.docs = []
21
+ self.id_map = []
22
+
23
+ # SQLite for session memory
24
+ self.conn = sqlite3.connect("memory.db", check_same_thread=False)
25
+ self.conn.execute("""CREATE TABLE IF NOT EXISTS memory (id INTEGER PRIMARY KEY, query TEXT, response TEXT)""")
26
+
27
+ def embed_chunks(self, texts):
28
+ return self.embedder.encode(texts)
29
+
30
+ def ingest_file(self, filepath):
31
+ chunks = []
32
+ if filepath.endswith(".pdf"):
33
+ doc = fitz.open(filepath)
34
+ for page in doc:
35
+ text = page.get_text()
36
+ words = text.split()
37
+ for i in range(0, len(words), 300):
38
+ chunk = " ".join(words[i:i+300])
39
+ if len(chunk) > 100:
40
+ chunks.append(chunk)
41
+ elif filepath.endswith(".py"):
42
+ with open(filepath, 'r', encoding='utf-8') as f:
43
+ code = f.read()
44
+ lines = code.splitlines()
45
+ for i in range(0, len(lines), 20):
46
+ chunk = "\n".join(lines[i:i+20])
47
+ chunks.append(chunk)
48
+ else:
49
+ return "Unsupported file format."
50
+
51
+ embeddings = self.embed_chunks(chunks)
52
+ self.index.add(embeddings)
53
+ self.docs.extend(chunks)
54
+ self.id_map.extend(range(len(self.docs)-len(chunks), len(self.docs)))
55
+ return f"Added {len(chunks)} chunks."
56
+
57
+ def retrieve_context(self, query, top_k=3):
58
+ if self.index.ntotal == 0:
59
+ return ""
60
+ query_emb = self.embed_chunks([query])[0]
61
+ D, I = self.index.search([query_emb], top_k)
62
+ return "\n\n".join([self.docs[i] for i in I[0]])
63
+
64
+ def answer(self, query):
65
+ # Check memory
66
+ cursor = self.conn.execute("SELECT response FROM memory WHERE query = ?", (query,))
67
+ result = cursor.fetchone()
68
+ if result:
69
+ return f"[From memory] {result[0]}"
70
+
71
+ context = self.retrieve_context(query)
72
+ prompt = f"You are a coding assistant. Answer the following:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"
73
+ result = self.llm(prompt)[0]['generated_text'].split("Answer:")[-1].strip()
74
+
75
+ self.conn.execute("INSERT INTO memory (query, response) VALUES (?, ?)", (query, result))
76
+ self.conn.commit()
77
+ return result
78
+
79
+ def clear_context(self):
80
+ self.conn.execute("DELETE FROM memory")
81
+ self.conn.commit()
82
+ return "Cleared memory."
83
+
84
+ def get_stats(self):
85
+ cursor = self.conn.execute("SELECT COUNT(*) FROM memory")
86
+ count = cursor.fetchone()[0]
87
+ return f"Stored answers: {count}\nDocuments: {len(self.docs)}"
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from agent import CodingAgent
3
+
4
+ agent = CodingAgent()
5
+
6
+ def handle_query(message, history):
7
+ response = agent.answer(message)
8
+ history.append((message, response))
9
+ return history, ""
10
+
11
+ def upload_file(file):
12
+ return agent.ingest_file(file.name)
13
+
14
+ def clear_memory():
15
+ return agent.clear_context()
16
+
17
+ def get_info():
18
+ return agent.get_stats()
19
+
20
+ with gr.Blocks(title="LLaMA-3 Coding Agent") as demo:
21
+ gr.Markdown("# 🦙 TinyLlama Coding Agent\nSupports code Q&A + PDF/code file RAG")
22
+
23
+ with gr.Tab("Chat"):
24
+ chatbot = gr.Chatbot()
25
+ with gr.Row():
26
+ msg = gr.Textbox(placeholder="Ask a coding question")
27
+ send = gr.Button("Send")
28
+ send.click(handle_query, [msg, chatbot], [chatbot, msg])
29
+ msg.submit(handle_query, [msg, chatbot], [chatbot, msg])
30
+
31
+ with gr.Tab("Upload PDF / .py"):
32
+ file_input = gr.File(label="Upload PDF or Python File", file_types=[".pdf", ".py"])
33
+ upload_btn = gr.Button("Upload")
34
+ output = gr.Textbox()
35
+ upload_btn.click(upload_file, file_input, output)
36
+
37
+ with gr.Tab("System"):
38
+ info_btn = gr.Button("Get Info")
39
+ clear_btn = gr.Button("Clear Memory")
40
+ info_box = gr.Textbox()
41
+ status_box = gr.Textbox()
42
+ info_btn.click(get_info, outputs=info_box)
43
+ clear_btn.click(clear_memory, outputs=status_box)
44
+
45
+ if __name__ == "__main__":
46
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ sentence-transformers
5
+ faiss-cpu
6
+ PyMuPDF