PraneshJs commited on
Commit
d7c6008
Β·
verified Β·
1 Parent(s): 32f48d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +248 -0
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, tempfile, subprocess, shutil, fnmatch, json, requests, re, pickle, time, threading
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+
5
+ # ---------------------------------
6
+ # Setup
7
+ # ---------------------------------
8
+ load_dotenv()
9
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
10
+ MEMORY_FILE = "repogpt_memory.pkl"
11
+ MEMORY_TTL = 600 # 10 minutes (600 seconds)
12
+ _last_active_time = time.time()
13
+
14
+ # ---------------------------------
15
+ # Persistent Memory Management
16
+ # ---------------------------------
17
+ def save_memory(data):
18
+ data["_timestamp"] = time.time()
19
+ with open(MEMORY_FILE, "wb") as f:
20
+ pickle.dump(data, f)
21
+
22
+ def load_memory():
23
+ if os.path.exists(MEMORY_FILE):
24
+ try:
25
+ with open(MEMORY_FILE, "rb") as f:
26
+ data = pickle.load(f)
27
+ # Expire memory if inactive > TTL
28
+ if time.time() - data.get("_timestamp", 0) > MEMORY_TTL:
29
+ print("🧹 Memory expired (10 min inactive). Resetting RepoGPT state.")
30
+ os.remove(MEMORY_FILE)
31
+ return {"chat_history": [], "repo_context": ""}
32
+ return data
33
+ except Exception:
34
+ return {"chat_history": [], "repo_context": ""}
35
+ return {"chat_history": [], "repo_context": ""}
36
+
37
+ def clear_memory_periodically():
38
+ """Background thread to auto-clear memory after TTL."""
39
+ global _last_active_time
40
+ while True:
41
+ time.sleep(60)
42
+ if time.time() - _last_active_time > MEMORY_TTL:
43
+ if os.path.exists(MEMORY_FILE):
44
+ try:
45
+ os.remove(MEMORY_FILE)
46
+ print("🧹 Auto-cleared RepoGPT memory after 10 minutes of inactivity.")
47
+ except:
48
+ pass
49
+ break
50
+
51
+ MEMORY = load_memory()
52
+ threading.Thread(target=clear_memory_periodically, daemon=True).start()
53
+
54
+ # ---------------------------------
55
+ # Repo Handling
56
+ # ---------------------------------
57
+ def clone_repo(repo_url):
58
+ tmp_dir = tempfile.mkdtemp()
59
+ subprocess.run(
60
+ ["git", "clone", "--depth", "1", repo_url, tmp_dir],
61
+ stdout=subprocess.PIPE,
62
+ stderr=subprocess.PIPE
63
+ )
64
+ return tmp_dir
65
+
66
+ def read_repo_files(repo_dir):
67
+ patterns = ["*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.html", "*.json",
68
+ "*.go", "*.java", "*.vue", "*.md", "*.css"]
69
+ repo_content = []
70
+ for root, _, files in os.walk(repo_dir):
71
+ for pattern in patterns:
72
+ for filename in fnmatch.filter(files, pattern):
73
+ filepath = os.path.join(root, filename)
74
+ try:
75
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
76
+ content = f.read()
77
+ if len(content) > 4000:
78
+ content = content[:4000]
79
+ repo_content.append({"filename": filename, "content": content})
80
+ except:
81
+ pass
82
+ return repo_content
83
+
84
+ # ---------------------------------
85
+ # Tech Stack Auto-Detection
86
+ # ---------------------------------
87
+ def detect_tech_stack(repo_files):
88
+ all_text = " ".join(f["content"] for f in repo_files).lower()
89
+ stack = set()
90
+ tech_keywords = {
91
+ "python": ["def ", "import ", "flask", "django", "fastapi"],
92
+ "javascript": ["function", "const ", "react", "vue", "next.js", "node"],
93
+ "typescript": ["typescript", "tsconfig", ".tsx"],
94
+ "java": ["public class", "springboot", "maven", "gradle"],
95
+ "go": ["package main", "func main", "go mod"],
96
+ "html": ["<html", "<body", "<div"],
97
+ "css": ["color:", "background", "font-family"],
98
+ "docker": ["from ", "dockerfile"],
99
+ "database": ["sql", "select", "insert", "mongodb", "mongoose", "prisma"],
100
+ "ai/ml": ["torch", "tensorflow", "sklearn", "transformers", "model"],
101
+ }
102
+ for tech, keywords in tech_keywords.items():
103
+ if any(k in all_text for k in keywords):
104
+ stack.add(tech)
105
+ if not stack:
106
+ return "No clear tech stack detected."
107
+ return "🧰 Detected Tech Stack: " + ", ".join(sorted(stack))
108
+
109
+ # ---------------------------------
110
+ # Code Search
111
+ # ---------------------------------
112
+ def search_code_snippets(repo_files, query, top_k=3):
113
+ query = query.lower()
114
+ matches = []
115
+ for file in repo_files:
116
+ lines = file["content"].splitlines()
117
+ for i, line in enumerate(lines):
118
+ if query in line.lower():
119
+ start = max(0, i - 2)
120
+ end = min(len(lines), i + 3)
121
+ snippet = "\n".join(lines[start:end])
122
+ matches.append((file["filename"], snippet))
123
+ matches = sorted(matches, key=lambda x: len(x[1]), reverse=True)[:top_k]
124
+ if not matches:
125
+ return ""
126
+ return "\n\n".join([f"πŸ“„ {m[0]}:\n{m[1]}" for m in matches])
127
+
128
+ # ---------------------------------
129
+ # OpenRouter API
130
+ # ---------------------------------
131
+ def call_openrouter(messages, model="google/gemma-3-27b-it:free"):
132
+ try:
133
+ res = requests.post(
134
+ "https://openrouter.ai/api/v1/chat/completions",
135
+ headers={
136
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
137
+ "Content-Type": "application/json"
138
+ },
139
+ json={"model": model, "messages": messages},
140
+ timeout=180
141
+ )
142
+ res.raise_for_status()
143
+ return res.json()["choices"][0]["message"]["content"]
144
+ except Exception as e:
145
+ return f"⚠️ API Error: {e}"
146
+
147
+ # ---------------------------------
148
+ # Chat Logic
149
+ # ---------------------------------
150
+ def create_system_prompt(repo_context=None):
151
+ base = (
152
+ "You are RepoGPT β€” a helpful AI assistant for GitHub repositories. "
153
+ "You can explain code, summarize architecture, identify frameworks, search code, and generate ideas. "
154
+ "Be conversational and beginner-friendly."
155
+ )
156
+ if repo_context:
157
+ base += f"\n\nRepository context:\n{repo_context[:8000]}"
158
+ return {"role": "system", "content": base}
159
+
160
+ def load_repository(repo_url):
161
+ global _last_active_time
162
+ _last_active_time = time.time()
163
+ if not repo_url or "github.com" not in repo_url:
164
+ return "⚠️ Please provide a valid GitHub repository URL.", "", ""
165
+ repo_dir = None
166
+ try:
167
+ repo_dir = clone_repo(repo_url)
168
+ repo_files = read_repo_files(repo_dir)
169
+ stack_info = detect_tech_stack(repo_files)
170
+ file_summary = "\n".join(f["filename"] for f in repo_files[:50])
171
+ context = f"{stack_info}\n\nFiles found:\n{file_summary}"
172
+ MEMORY["repo_context"] = "\n".join(f["content"] for f in repo_files)
173
+ save_memory(MEMORY)
174
+ return f"βœ… Repo loaded successfully!\n\n{stack_info}", context, repo_files
175
+ except Exception as e:
176
+ return f"❌ Failed to load repo: {e}", "", []
177
+ finally:
178
+ if repo_dir and os.path.exists(repo_dir):
179
+ shutil.rmtree(repo_dir, ignore_errors=True)
180
+
181
+ def chat_with_repo(message, history, repo_context, repo_files):
182
+ global _last_active_time
183
+ _last_active_time = time.time()
184
+
185
+ MEMORY["chat_history"] = history
186
+ save_memory(MEMORY)
187
+
188
+ code_snippet = ""
189
+ if any(k in message.lower() for k in ["function", "class", "api", "code", "def ", "explain"]):
190
+ code_snippet = search_code_snippets(repo_files, message)
191
+
192
+ messages = [create_system_prompt(repo_context)]
193
+ for user, bot in history[-6:]:
194
+ messages.append({"role": "user", "content": user})
195
+ messages.append({"role": "assistant", "content": bot})
196
+ if code_snippet:
197
+ message += f"\n\nRelevant Code Snippet:\n{code_snippet}"
198
+ messages.append({"role": "user", "content": message})
199
+
200
+ reply = call_openrouter(messages)
201
+ MEMORY["chat_history"].append((message, reply))
202
+ save_memory(MEMORY)
203
+ return reply
204
+
205
+ # ---------------------------------
206
+ # Gradio UI
207
+ # ---------------------------------
208
+ with gr.Blocks(theme=gr.themes.Soft(), title="RepoGPT") as demo:
209
+ gr.Markdown("""
210
+ # πŸ€– RepoGPT
211
+ **ChatGPT for GitHub Repositories**
212
+
213
+ 🧩 Features:
214
+ - πŸ” Smart Code Search
215
+ - πŸ’Ύ Persistent Memory (auto-clears after 10 min)
216
+ - 🧠 Tech-Stack Detection
217
+ """)
218
+
219
+ with gr.Row():
220
+ repo_input = gr.Textbox(label="πŸ”— GitHub Repository URL", placeholder="https://github.com/yourname/yourproject")
221
+ load_btn = gr.Button("πŸ“‚ Load Repo")
222
+
223
+ repo_status = gr.Markdown("ℹ️ No repository loaded yet.")
224
+ repo_context_state = gr.State(MEMORY.get("repo_context", ""))
225
+ repo_files_state = gr.State([])
226
+
227
+ def on_load_repo(link):
228
+ return load_repository(link)
229
+
230
+ load_btn.click(on_load_repo, inputs=[repo_input], outputs=[repo_status, repo_context_state, repo_files_state])
231
+
232
+ chatbot = gr.ChatInterface(
233
+ fn=lambda msg, history, repo_ctx, repo_files: chat_with_repo(msg, history, repo_ctx, repo_files),
234
+ additional_inputs=[repo_context_state, repo_files_state],
235
+ title="RepoGPT",
236
+ description="Your AI companion for understanding and improving GitHub projects.",
237
+ examples=[
238
+ ["Summarize this repo."],
239
+ ["Explain the backend structure."],
240
+ ["Find where login is implemented."],
241
+ ["What technologies are used here?"],
242
+ ["Generate a LinkedIn post for this project."]
243
+ ],
244
+ )
245
+
246
+ if __name__ == "__main__":
247
+ port = int(os.environ.get("PORT", 7860))
248
+ demo.launch(server_name="0.0.0.0", server_port=port, share=False)