Not-Grim-Refer commited on
Commit
168366e
·
1 Parent(s): 2d53b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -332
app.py CHANGED
@@ -1,335 +1,39 @@
1
- import gradio as gr
2
- import os
3
- import shutil
4
  import requests
5
- import zipfile
6
- from PyPDF2 import PdfFileReader, PdfFileWriter
7
- import PyPDF2
8
- from io import BytesIO
9
- from reportlab.lib.pagesizes import letter
10
- from reportlab.platypus import SimpleDocTemplate,Preformatted
11
- from reportlab.platypus import Image as RLImage
12
- from reportlab.platypus import Paragraph, Spacer
13
- from reportlab.lib.styles import getSampleStyleSheet
14
- from reportlab.lib.utils import ImageReader
15
- from PIL import Image
16
- import os
17
- from langchain.indexes.vectorstore import VectorstoreIndexCreator
18
- from langchain.chains import VectorDBQA,VectorDBQAWithSourcesChain
19
- from langchain import OpenAI
20
- from langchain.document_loaders import UnstructuredPDFLoader
21
- from langchain.vectorstores.faiss import FAISS
22
- from langchain.embeddings.openai import OpenAIEmbeddings
23
- from flask import send_file
24
- # from IPython.display import Markdown, display
25
-
26
-
27
- class REPOGPT:
28
- def __init__(self) -> None:
29
-
30
- self.repo_link = None
31
- self.api_key = None
32
-
33
- def init_agent(self, api_key, repo_link = None, load_vectorstore = None):
34
- try:
35
- os.remove('merged.pdf')
36
- except:
37
- pass
38
- self.repo_link = repo_link
39
- self.api_key = api_key
40
- self.load_vectorstore = load_vectorstore
41
- #assert if api key is valid
42
- assert self.api_key != None, "You need to provide an API key"
43
- self.REPOGPT_Initialized()
44
- return gr.update(visible = True),'Initialize Finished'
45
-
46
-
47
-
48
- def REPOGPT_Initialized(self,image_included = False):
49
-
50
-
51
- os.environ["OPENAI_API_KEY"] = self.api_key
52
- if self.load_vectorstore == None:
53
- loader = UnstructuredPDFLoader( self.create_repo_pdf(self.repo_link,image_included = image_included))
54
- # pages = loader.load_and_split()
55
- self.index = VectorstoreIndexCreator(vectorstore_cls = FAISS).from_loaders([loader])
56
- self.vectorstore = self.index.vectorstore
57
- print(' vectorstore created')
58
- else:
59
- embeddings = OpenAIEmbeddings()
60
- self.vectorstore = FAISS.load_local(self.load_vectorstore,embeddings =embeddings)
61
- print(' vectorstore loaded')
62
-
63
- self.qa = VectorDBQA.from_chain_type(llm =OpenAI(temperature=0, model_name="gpt-3.5-turbo"), chain_type = "stuff",vectorstore = self.vectorstore )
64
-
65
-
66
-
67
-
68
-
69
- def download_repo_zip(self, link, output_folder = "main.zip"):
70
- username = link.split('/')[3]
71
- repo = link.split('/')[4]
72
- zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/master.zip"
73
- self.zip_url = zip_url
74
- response = requests.get(zip_url)
75
- response.raise_for_status()
76
- #down load the zip file
77
- with open('main.zip', 'wb') as f:
78
- f.write(response.content)
79
- # return the name of the extracted folder
80
- # return self.extract_zip("main.zip", output_folder)
81
- # return BytesIO(response.content)
82
-
83
- def extract_zip(self, zip_file, destination_folder):
84
- with zipfile.ZipFile(zip_file) as zf:
85
- zf.extractall(destination_folder)
86
- #get the name of the extracted folder
87
- folder_name = zf.namelist()[0]
88
- return folder_name
89
-
90
- def convert_to_pdf(self, input_path, output_path):
91
- if input_path.endswith(".pdf"):
92
- # Create a new PDF with the file path heading
93
- buffer = BytesIO()
94
- doc = SimpleDocTemplate(buffer, pagesize=letter)
95
- styles = getSampleStyleSheet()
96
- elements = []
97
- heading = Paragraph(f"File path: {input_path}", styles["Heading2"])
98
- elements.append(heading)
99
- elements.append(Spacer(1, 12))
100
- doc.build(elements)
101
-
102
- # Read the newly created PDF with heading
103
- buffer.seek(0)
104
- new_pdf = PdfFileReader(buffer)
105
-
106
- # Read the input PDF
107
- with open(input_path, "rb") as f:
108
- input_pdf = PdfFileReader(f)
109
-
110
- # Merge the new PDF with heading and the input PDF
111
- pdf_writer = PdfFileWriter()
112
- for page_num in range(new_pdf.getNumPages()):
113
- pdf_writer.addPage(new_pdf.getPage(page_num))
114
-
115
- for page_num in range(input_pdf.getNumPages()):
116
- pdf_writer.addPage(input_pdf.getPage(page_num))
117
-
118
- # Save the merged PDF to the output file
119
- with open(output_path, "wb") as f:
120
- pdf_writer.write(f)
121
-
122
- elif input_path.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff")):
123
- img = Image.open(input_path)
124
- img_reader = ImageReader(img)
125
- img_width, img_height = img.size
126
- aspect_ratio = img_height / img_width
127
-
128
-
129
- max_pdf_width = letter[0] - 2 * 72 # 1 inch margin on each side
130
- max_pdf_height = letter[1] - 2 * 72 # 1 inch margin on top and bottom
131
-
132
- if img_width > max_pdf_width:
133
- img_width = max_pdf_width
134
- img_height = img_width * aspect_ratio
135
- if img_height > max_pdf_height:
136
- img_height = max_pdf_height
137
- img_width = img_height / aspect_ratio
138
- img_width = int(img_width)
139
- img_height = int(img_height)
140
- # Resize the image
141
- img = img.resize((int(img_width), int(img_height)))
142
-
143
- img = img.resize((int(img_width), int(img_height)))
144
-
145
- img.save(output_path, "PNG")
146
- # Create a new PDF with the image
147
- doc = SimpleDocTemplate(output_path, pagesize=letter)
148
- styles = getSampleStyleSheet()
149
-
150
- elements = []
151
- heading = Paragraph(f" {input_path}", styles["Heading2"])
152
- elements.append(heading)
153
- elements.append(Spacer(1, 12))
154
-
155
- img_rl = RLImage(input_path, width=img_width, height=img_height, kind='proportional')
156
- elements.append(img_rl)
157
-
158
- doc.build(elements)
159
-
160
  else:
161
- with open(input_path, "r") as f:
162
- content = f.read()
163
-
164
- doc = SimpleDocTemplate(output_path, pagesize=letter)
165
- styles = getSampleStyleSheet()
166
- elements = []
167
-
168
- # Add the file path heading
169
- heading = Paragraph(f"{input_path}", styles["Heading2"])
170
- elements.append(heading)
171
- elements.append(Spacer(1, 12))
172
-
173
- # Add the content as Preformatted text
174
- text = Preformatted(content, style=styles["Code"])
175
- elements.append(text)
176
-
177
- doc.build(elements)
178
-
179
- def merge_pdfs(self, pdf_files, output_path):
180
- pdf_writer = PyPDF2.PdfWriter()
181
- for pdf_file in pdf_files:
182
- with open(pdf_file, "rb") as f:
183
- try:
184
- pdf_reader = PyPDF2.PdfReader(f)
185
- if pdf_reader.is_encrypted:
186
- print(f"{pdf_file} is encrypted. Skipping.")
187
- continue
188
- except:
189
- print(f"{pdf_file} is not a valid PDF. Skipping.")
190
- continue
191
-
192
-
193
- for page_num in range(len(pdf_reader.pages)):
194
- pdf_writer.add_page(pdf_reader.pages[page_num])
195
-
196
- with open(output_path, "wb") as f:
197
- pdf_writer.write(f)
198
-
199
- def get_pdf(self):
200
- return self.merged_pdf_path
201
-
202
- def save_indexDB(self,save_path = 'indexDB.json'):
203
- self.vectorstore.save_local(save_path)
204
- print("indexDB saved at: ", save_path)
205
-
206
-
207
-
208
- def create_repo_pdf(self, repo_link, image_included = False, merged_pdf = "temp_merged.pdf"):
209
- self.merged_pdf_path = merged_pdf
210
- self.download_repo_zip(repo_link)
211
- folder_name = self.extract_zip('./main.zip', './')
212
- ingnore_list = ['__pycache__',]
213
- if not image_included:
214
- ingnore_list.append('.jpg')
215
- ingnore_list.append('.png')
216
- ingnore_list.append('.jpeg')
217
- ingnore_list.append('.gif')
218
- ingnore_list.append('.bmp')
219
- ingnore_list.append('.tiff')
220
-
221
- print('folder_name: ', folder_name)
222
- pdf_files = []
223
- for root, dirs, files in os.walk(folder_name):
224
- for file in files:
225
-
226
- input_file = os.path.join(root, file)
227
- #if the file contains any of the strings in the ignore list, skip it
228
- if any(x in input_file for x in ingnore_list):
229
- continue
230
- #create a temp folder to store the pdf files
231
- os.makedirs("temp", exist_ok=True)
232
- output_file = os.path.join("temp", os.path.splitext(file)[0] + ".pdf")
233
-
234
- try:
235
- self.convert_to_pdf(input_file, output_file)
236
- except:
237
- print("Error converting file: ", input_file)
238
- continue
239
- pdf_files.append(output_file)
240
-
241
-
242
-
243
- self.merge_pdfs(pdf_files, self.merged_pdf_path)
244
- #clean up the temp folder and downloaded zip file
245
- os.remove("main.zip")
246
- shutil.rmtree(folder_name)
247
- shutil.rmtree("temp")
248
-
249
- return self.merged_pdf_path
250
-
251
-
252
- def Answer_quetsion(self, question):
253
- return self.qa.run(question)
254
-
255
- def Answer_quetsion_with_source(self, question):
256
- return self.qa({"question": question}, return_only_outputs = True)
257
-
258
-
259
-
260
- def call_output(string = 'REPOGPT Initializing'):
261
- return string
262
-
263
- def download_file(filename = 'merged.pdf'):
264
- # filename = repogpt.get_pdf()
265
- return send_file(filename, as_attachment=True)
266
-
267
-
268
- repogpt = REPOGPT()
269
-
270
-
271
- with gr.Blocks() as demo:
272
- with gr.Row():
273
- gr.Markdown("<h3><center>REPOGPT</center></h3>")
274
- gr.Markdown(
275
- """This is a demo to the work [REPOGPT](https://github.com/wuchangsheng951/RepoGPT).<br>
276
- This space connects ChatGPT and RepoGPT is a Python library that allows you to search and answer questions about a GitHub repository's content.<br>
277
- """
278
- )
279
- with gr.Row():
280
- apikey = gr.Textbox(
281
- placeholder="Paste your OpenAI API key here to start Visual ChatGPT(sk-...) and press Enter ↵️",
282
- show_label=True,
283
- label = 'OpenAI API key',
284
- lines=1,
285
- type="password",
286
- )
287
- with gr.Row():
288
- repo_link = gr.Textbox(
289
- placeholder="Paste your repo_link and press Enter ↵️",
290
- label = 'repo_link like: https://github.com/wuchangsheng951/RepoGPT',
291
-
292
- show_label=True,
293
- lines=1,
294
- )
295
-
296
- with gr.Column(scale=0.7):
297
- Initialize = gr.Button("Initialize RepoGPT")
298
-
299
- output = gr.Textbox(label="Output Box")
300
-
301
- with gr.Row(visible=False) as input_raws:
302
- with gr.Column(scale=0.7):
303
- txt = gr.Textbox(show_label=False, placeholder="Enter your question").style(container=False)
304
-
305
- with gr.Column(scale=0.4):
306
- AQ = gr.Button("Ask a Question").style(container=False)
307
-
308
- # with gr.Row():
309
- # Download = gr.Button("Download PDF")
310
-
311
-
312
- gr.Examples(
313
- examples=["Whats the name of this repo?",
314
- "Whats this repo for?",
315
- "How can I use this. Example code ? Step by step",
316
- "how can I use this Experiment trackers ? Step by step",
317
- "how can I Performing gradient accumulation with Accelerate? Step by step?",
318
- "Make it like water-color painting",
319
- "What is the background color",
320
- "Describe this image",
321
- "please detect the depth of this image",
322
- "Can you use this depth image to generate a cute dog",
323
- ],
324
- inputs=txt
325
- )
326
-
327
- apikey.submit(repogpt.init_agent, [apikey,repo_link], [input_raws, output])
328
- Initialize.click(repogpt.init_agent, [apikey,repo_link], [input_raws, output])
329
- apikey.submit(call_output, [],[output])
330
- txt.submit(repogpt.Answer_quetsion, [txt], [output])
331
- AQ.click(repogpt.Answer_quetsion, [txt], [output])
332
- # Download.click(download_file, [], [Download])
333
-
334
 
335
- demo.launch()
 
 
1
+ import streamlit as st
 
 
2
  import requests
3
+ from github import Github
4
+
5
+ st.title("GitHub Repo Forks File Comparison")
6
+
7
+ repo_url = st.text_input("GitHub Repository URL", value="")
8
+ file_name = st.text_input("File Name", value="")
9
+
10
+ if repo_url and file_name:
11
+ try:
12
+ access_token = "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" # Replace with your personal access token
13
+ g = Github(access_token)
14
+ repo = g.get_repo(repo_url.replace("https://github.com/", ""))
15
+ forks = repo.get_forks()
16
+ st.write(f"Original repo: {repo_url}")
17
+ base_file = requests.get(repo.get_contents(file_name).download_url).text
18
+ st.write(f"Number of forks: {len(list(forks))}")
19
+
20
+ diffs = []
21
+ for fork in forks:
22
+ try:
23
+ fork_file = requests.get(fork.get_contents(file_name).download_url).text
24
+ if fork_file != base_file:
25
+ diff_lines = [line for line in base_file.splitlines() if line not in fork_file.splitlines()]
26
+ diffs.append((fork.html_url, len(diff_lines), diff_lines))
27
+ except Exception as e:
28
+ st.write(f"Error accessing file in fork {fork.html_url}: {e}")
29
+
30
+ if diffs:
31
+ st.write("Forks with differences in the specified file:")
32
+ diffs.sort(key=lambda x: x[1], reverse=True)
33
+ for diff in diffs:
34
+ st.write(f"{diff[0]} - {diff[1]} different lines")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  else:
36
+ st.write("All forks have the same content in the specified file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ except Exception as e:
39
+ st.write(f"Error: {e}")