knightscode139 commited on
Commit
85ac87c
·
verified ·
1 Parent(s): 864140d

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +4 -0
  2. requirements.txt +8 -0
  3. src/__init__.py +0 -0
  4. src/app.py +128 -0
  5. src/config.py +22 -0
app.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.app import *
2
+
3
+ if __name__ == "__main__":
4
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain==1.2.2
2
+ langchain-anthropic==1.3.1
3
+ langchain-pinecone==0.2.13
4
+ langchain-text-splitters==1.1.0
5
+ pinecone==7.3.0
6
+ python-dotenv==1.2.1
7
+ requests==2.32.5
8
+ sentence-transformers==5.2.0
src/__init__.py ADDED
File without changes
src/app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pinecone import Pinecone
2
+ from sentence_transformers import SentenceTransformer
3
+ from anthropic import Anthropic
4
+ import gradio as gr
5
+ from langchain.agents import create_agent
6
+ from langchain.tools import tool
7
+ from langchain.chat_models import init_chat_model
8
+ from src.config import *
9
+
10
+ # Initialize Pinecone
11
+ print("Connecting to Pinecone...")
12
+ pc = Pinecone(api_key=PINECONE_API_KEY)
13
+ index = pc.Index(PINECONE_INDEX_NAME)
14
+
15
+ # Load embedding model
16
+ print("Loading embedding model...")
17
+ embedding_model = SentenceTransformer(EMBEDDING_MODEL)
18
+
19
+ # Get portfolio stats
20
+ stats = index.describe_index_stats()
21
+ total_vectors = stats['total_vector_count']
22
+ print(f"Connected! Index contains {total_vectors} vectors")
23
+
24
+
25
+ @tool(response_format="content_and_artifact")
26
+ def retrieve_code_context(query: str):
27
+ """Search through my GitHub repositories to find relevant code and project information."""
28
+ # Convert query to embedding
29
+ query_embedding = embedding_model.encode(query).tolist()
30
+
31
+ # Search Pinecone
32
+ results = index.query(
33
+ vector=query_embedding,
34
+ top_k=3,
35
+ include_metadata=True
36
+ )
37
+
38
+ # Format results for the LLM
39
+ context_parts = []
40
+ for match in results['matches']:
41
+ repo = match['metadata']['repo']
42
+ path = match['metadata']['path']
43
+ text = match['metadata']['text']
44
+ score = match['score']
45
+ context_parts.append(f"[Repo: {repo}, File: {path}, Relevance: {score:.2f}]\n{text}")
46
+
47
+ serialized = "\n\n---\n\n".join(context_parts)
48
+ return serialized, results['matches']
49
+
50
+
51
+ # Initialize Claude for LangChain
52
+ print("Initializing Claude agent...")
53
+ model = init_chat_model(
54
+ "claude-sonnet-4-20250514",
55
+ model_provider="anthropic",
56
+ api_key=ANTHROPIC_API_KEY
57
+ )
58
+
59
+ # Create RAG agent with retrieval tool
60
+ tools = [retrieve_code_context]
61
+ system_prompt = f"""You are knightscode139's AI portfolio assistant. You have access to a tool that searches through {total_vectors} code chunks from his GitHub repositories.
62
+
63
+ CRITICAL RULES:
64
+ 1. Use the search tool to find relevant code before answering technical questions
65
+ 2. Answer in FIRST PERSON as knightscode139
66
+ 3. ONLY state what is EXPLICITLY shown in the retrieved code
67
+ 4. If code doesn't contain specific details, say "I don't see that in my code"
68
+ 5. Be CONCISE (2-4 sentences unless asked for more detail)
69
+ 6. Decline off-topic questions politely
70
+
71
+ When you retrieve code, cite the repo and file name naturally in your response."""
72
+
73
+ agent = create_agent(model, tools, system_prompt=system_prompt)
74
+ print("Agent ready!")
75
+
76
+
77
+ def answer_question(question, history):
78
+ """Handle user questions with the RAG agent."""
79
+
80
+ try:
81
+ # Convert Gradio history to LangChain messages
82
+ messages = []
83
+ for msg in history:
84
+ messages.append({
85
+ "role": msg['role'],
86
+ "content": msg['content'][0]['text'] # Extract text from nested structure
87
+ })
88
+
89
+ # Add current question
90
+ messages.append({"role": "user", "content": question})
91
+
92
+ # Stream agent responses
93
+ response_text = ""
94
+ for event in agent.stream(
95
+ {"messages": messages},
96
+ stream_mode="values"
97
+ ):
98
+ last_message = event["messages"][-1]
99
+ if hasattr(last_message, 'content') and isinstance(last_message.content, str):
100
+ response_text = last_message.content
101
+
102
+ return response_text
103
+
104
+ except Exception as e:
105
+ return f"Error: {str(e)}. Please try again."
106
+
107
+
108
+ # Create Gradio ChatInterface
109
+ demo = gr.ChatInterface(
110
+ fn=answer_question,
111
+ title="🤖 knightscode139's GitHub Portfolio Assistant",
112
+ description=f"""Ask questions about my code and projects! Powered by LangChain RAG Agent + Claude Sonnet 4.
113
+
114
+ **Currently indexed:** {total_vectors} code chunks from my GitHub repositories.
115
+
116
+ The agent can search through my code multiple times to give you accurate answers.""",
117
+ examples=[
118
+ "What projects do you have?",
119
+ "How did you handle data preprocessing?",
120
+ "Show me your experience with machine learning",
121
+ "What accuracy did you achieve in your models?",
122
+ "Do you have any NLP projects?"
123
+ ],
124
+ )
125
+
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch()
src/config.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables from .env
5
+ load_dotenv()
6
+
7
+ # GitHub Configuration
8
+ GITHUB_USERNAME = "knightscode139"
9
+ TOKEN_GITHUB = os.getenv("TOKEN_GITHUB")
10
+
11
+ # Pinecone Configuration
12
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
13
+ PINECONE_INDEX_NAME = "github-repos"
14
+
15
+ # Anthropic Configuration
16
+ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
17
+
18
+ # OPENAI Configuration
19
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+
21
+ # Embedding Model
22
+ EMBEDDING_MODEL = "all-MiniLM-L6-v2"