wuchangsheng951
commited on
Commit
·
99fb858
1
Parent(s):
15026db
Add application file
Browse files
app.py
CHANGED
|
@@ -15,12 +15,13 @@ from reportlab.lib.utils import ImageReader
|
|
| 15 |
from PIL import Image
|
| 16 |
import os
|
| 17 |
from langchain.indexes.vectorstore import VectorstoreIndexCreator
|
| 18 |
-
from langchain.chains import VectorDBQA
|
| 19 |
from langchain import OpenAI
|
| 20 |
from langchain.document_loaders import UnstructuredPDFLoader
|
| 21 |
from langchain.vectorstores.faiss import FAISS
|
| 22 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 23 |
from flask import send_file
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class REPOGPT:
|
|
@@ -30,6 +31,10 @@ class REPOGPT:
|
|
| 30 |
self.api_key = None
|
| 31 |
|
| 32 |
def init_agent(self, api_key, repo_link = None, load_vectorstore = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
self.repo_link = repo_link
|
| 34 |
self.api_key = api_key
|
| 35 |
self.load_vectorstore = load_vectorstore
|
|
@@ -45,14 +50,15 @@ class REPOGPT:
|
|
| 45 |
|
| 46 |
os.environ["OPENAI_API_KEY"] = self.api_key
|
| 47 |
if self.load_vectorstore == None:
|
| 48 |
-
|
| 49 |
loader = UnstructuredPDFLoader( self.create_repo_pdf(self.repo_link,image_included = image_included))
|
| 50 |
-
pages = loader.load_and_split()
|
| 51 |
self.index = VectorstoreIndexCreator(vectorstore_cls = FAISS).from_loaders([loader])
|
| 52 |
self.vectorstore = self.index.vectorstore
|
|
|
|
| 53 |
else:
|
| 54 |
embeddings = OpenAIEmbeddings()
|
| 55 |
-
self.vectorstore = FAISS.load_local(
|
|
|
|
| 56 |
|
| 57 |
self.qa = VectorDBQA.from_chain_type(llm =OpenAI(temperature=0, model_name="gpt-3.5-turbo"), chain_type = "stuff",vectorstore = self.vectorstore )
|
| 58 |
|
|
@@ -63,7 +69,6 @@ class REPOGPT:
|
|
| 63 |
def download_repo_zip(self, link, output_folder = "main.zip"):
|
| 64 |
username = link.split('/')[3]
|
| 65 |
repo = link.split('/')[4]
|
| 66 |
-
# zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/main.zip"
|
| 67 |
zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/master.zip"
|
| 68 |
self.zip_url = zip_url
|
| 69 |
response = requests.get(zip_url)
|
|
@@ -71,6 +76,8 @@ class REPOGPT:
|
|
| 71 |
#down load the zip file
|
| 72 |
with open('main.zip', 'wb') as f:
|
| 73 |
f.write(response.content)
|
|
|
|
|
|
|
| 74 |
# return BytesIO(response.content)
|
| 75 |
|
| 76 |
def extract_zip(self, zip_file, destination_folder):
|
|
@@ -198,7 +205,7 @@ class REPOGPT:
|
|
| 198 |
|
| 199 |
|
| 200 |
|
| 201 |
-
def create_repo_pdf(self, repo_link, image_included = False, merged_pdf = "
|
| 202 |
self.merged_pdf_path = merged_pdf
|
| 203 |
self.download_repo_zip(repo_link)
|
| 204 |
folder_name = self.extract_zip('./main.zip', './')
|
|
@@ -211,6 +218,7 @@ class REPOGPT:
|
|
| 211 |
ingnore_list.append('.bmp')
|
| 212 |
ingnore_list.append('.tiff')
|
| 213 |
|
|
|
|
| 214 |
pdf_files = []
|
| 215 |
for root, dirs, files in os.walk(folder_name):
|
| 216 |
for file in files:
|
|
@@ -238,13 +246,15 @@ class REPOGPT:
|
|
| 238 |
shutil.rmtree(folder_name)
|
| 239 |
shutil.rmtree("temp")
|
| 240 |
|
| 241 |
-
return
|
| 242 |
|
| 243 |
|
| 244 |
def Answer_quetsion(self, question):
|
| 245 |
return self.qa.run(question)
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
-
repogpt = REPOGPT()
|
| 248 |
|
| 249 |
|
| 250 |
def call_output(string = 'REPOGPT Initializing'):
|
|
@@ -254,6 +264,10 @@ def download_file(filename = 'merged.pdf'):
|
|
| 254 |
# filename = repogpt.get_pdf()
|
| 255 |
return send_file(filename, as_attachment=True)
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
with gr.Blocks() as demo:
|
| 258 |
with gr.Row():
|
| 259 |
gr.Markdown("<h3><center>REPOGPT</center></h3>")
|
|
@@ -273,7 +287,7 @@ with gr.Blocks() as demo:
|
|
| 273 |
with gr.Row():
|
| 274 |
repo_link = gr.Textbox(
|
| 275 |
placeholder="Paste your repo_link and press Enter ↵️",
|
| 276 |
-
label = '
|
| 277 |
|
| 278 |
show_label=True,
|
| 279 |
lines=1,
|
|
@@ -298,12 +312,14 @@ with gr.Blocks() as demo:
|
|
| 298 |
gr.Examples(
|
| 299 |
examples=["Whats the name of this repo?",
|
| 300 |
"Whats this repo for?",
|
| 301 |
-
"How can I use this Example code ? Step by step",
|
| 302 |
"how can I use this Experiment trackers ? Step by step",
|
| 303 |
"how can I Performing gradient accumulation with Accelerate? Step by step?",
|
| 304 |
-
"
|
| 305 |
-
"
|
| 306 |
-
"
|
|
|
|
|
|
|
| 307 |
],
|
| 308 |
inputs=txt
|
| 309 |
)
|
|
|
|
| 15 |
from PIL import Image
|
| 16 |
import os
|
| 17 |
from langchain.indexes.vectorstore import VectorstoreIndexCreator
|
| 18 |
+
from langchain.chains import VectorDBQA,VectorDBQAWithSourcesChain
|
| 19 |
from langchain import OpenAI
|
| 20 |
from langchain.document_loaders import UnstructuredPDFLoader
|
| 21 |
from langchain.vectorstores.faiss import FAISS
|
| 22 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 23 |
from flask import send_file
|
| 24 |
+
from IPython.display import Markdown, display
|
| 25 |
|
| 26 |
|
| 27 |
class REPOGPT:
|
|
|
|
| 31 |
self.api_key = None
|
| 32 |
|
| 33 |
def init_agent(self, api_key, repo_link = None, load_vectorstore = None):
|
| 34 |
+
try:
|
| 35 |
+
os.remove('merged.pdf')
|
| 36 |
+
except:
|
| 37 |
+
pass
|
| 38 |
self.repo_link = repo_link
|
| 39 |
self.api_key = api_key
|
| 40 |
self.load_vectorstore = load_vectorstore
|
|
|
|
| 50 |
|
| 51 |
os.environ["OPENAI_API_KEY"] = self.api_key
|
| 52 |
if self.load_vectorstore == None:
|
|
|
|
| 53 |
loader = UnstructuredPDFLoader( self.create_repo_pdf(self.repo_link,image_included = image_included))
|
| 54 |
+
# pages = loader.load_and_split()
|
| 55 |
self.index = VectorstoreIndexCreator(vectorstore_cls = FAISS).from_loaders([loader])
|
| 56 |
self.vectorstore = self.index.vectorstore
|
| 57 |
+
print(' vectorstore created')
|
| 58 |
else:
|
| 59 |
embeddings = OpenAIEmbeddings()
|
| 60 |
+
self.vectorstore = FAISS.load_local(self.load_vectorstore,embeddings =embeddings)
|
| 61 |
+
print(' vectorstore loaded')
|
| 62 |
|
| 63 |
self.qa = VectorDBQA.from_chain_type(llm =OpenAI(temperature=0, model_name="gpt-3.5-turbo"), chain_type = "stuff",vectorstore = self.vectorstore )
|
| 64 |
|
|
|
|
| 69 |
def download_repo_zip(self, link, output_folder = "main.zip"):
|
| 70 |
username = link.split('/')[3]
|
| 71 |
repo = link.split('/')[4]
|
|
|
|
| 72 |
zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/master.zip"
|
| 73 |
self.zip_url = zip_url
|
| 74 |
response = requests.get(zip_url)
|
|
|
|
| 76 |
#down load the zip file
|
| 77 |
with open('main.zip', 'wb') as f:
|
| 78 |
f.write(response.content)
|
| 79 |
+
# return the name of the extracted folder
|
| 80 |
+
# return self.extract_zip("main.zip", output_folder)
|
| 81 |
# return BytesIO(response.content)
|
| 82 |
|
| 83 |
def extract_zip(self, zip_file, destination_folder):
|
|
|
|
| 205 |
|
| 206 |
|
| 207 |
|
| 208 |
+
def create_repo_pdf(self, repo_link, image_included = False, merged_pdf = "temp_merged.pdf"):
|
| 209 |
self.merged_pdf_path = merged_pdf
|
| 210 |
self.download_repo_zip(repo_link)
|
| 211 |
folder_name = self.extract_zip('./main.zip', './')
|
|
|
|
| 218 |
ingnore_list.append('.bmp')
|
| 219 |
ingnore_list.append('.tiff')
|
| 220 |
|
| 221 |
+
print('folder_name: ', folder_name)
|
| 222 |
pdf_files = []
|
| 223 |
for root, dirs, files in os.walk(folder_name):
|
| 224 |
for file in files:
|
|
|
|
| 246 |
shutil.rmtree(folder_name)
|
| 247 |
shutil.rmtree("temp")
|
| 248 |
|
| 249 |
+
return self.merged_pdf_path
|
| 250 |
|
| 251 |
|
| 252 |
def Answer_quetsion(self, question):
|
| 253 |
return self.qa.run(question)
|
| 254 |
+
|
| 255 |
+
def Answer_quetsion_with_source(self, question):
|
| 256 |
+
return self.qa({"question": question}, return_only_outputs = True)
|
| 257 |
|
|
|
|
| 258 |
|
| 259 |
|
| 260 |
def call_output(string = 'REPOGPT Initializing'):
|
|
|
|
| 264 |
# filename = repogpt.get_pdf()
|
| 265 |
return send_file(filename, as_attachment=True)
|
| 266 |
|
| 267 |
+
|
| 268 |
+
repogpt = REPOGPT()
|
| 269 |
+
|
| 270 |
+
|
| 271 |
with gr.Blocks() as demo:
|
| 272 |
with gr.Row():
|
| 273 |
gr.Markdown("<h3><center>REPOGPT</center></h3>")
|
|
|
|
| 287 |
with gr.Row():
|
| 288 |
repo_link = gr.Textbox(
|
| 289 |
placeholder="Paste your repo_link and press Enter ↵️",
|
| 290 |
+
label = 'repo_link',
|
| 291 |
|
| 292 |
show_label=True,
|
| 293 |
lines=1,
|
|
|
|
| 312 |
gr.Examples(
|
| 313 |
examples=["Whats the name of this repo?",
|
| 314 |
"Whats this repo for?",
|
| 315 |
+
"How can I use this. Example code ? Step by step",
|
| 316 |
"how can I use this Experiment trackers ? Step by step",
|
| 317 |
"how can I Performing gradient accumulation with Accelerate? Step by step?",
|
| 318 |
+
"Make it like water-color painting",
|
| 319 |
+
"What is the background color",
|
| 320 |
+
"Describe this image",
|
| 321 |
+
"please detect the depth of this image",
|
| 322 |
+
"Can you use this depth image to generate a cute dog",
|
| 323 |
],
|
| 324 |
inputs=txt
|
| 325 |
)
|