cogcorp commited on
Commit
aec4df8
·
0 Parent(s):

Duplicate from cogcorp/assignment1

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +89 -0
  4. logo.jpg +0 -0
  5. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ExpertChat
3
+ emoji: 👀
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: cogcorp/assignment1
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ import zipfile
4
+ import os
5
+ import io
6
+ import nltk
7
+ import openai
8
+ import time
9
+
10
+
11
+ import pip
12
+ import subprocess
13
+ import sys
14
+
15
+ # install required libraries
16
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
17
+
18
+ # download required NLTK data packages
19
+ nltk.download('punkt')
20
+ nltk.download('all') # or any other packages your project depends on
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+ # Put your OpenAI API key here
30
+ openai.api_key = os.getenv('OpenAPI')
31
+
32
+ def call_openai_api(text, user_prompt):
33
+ max_retries = 5
34
+ for attempt in range(max_retries):
35
+ try:
36
+ response = openai.ChatCompletion.create(
37
+ model="gpt-3.5-turbo",
38
+ messages=[
39
+ {"role": "system", "content": "You are a research assistant. Provide factual short answers to your prompts. The user will provide reference data followed by instructions."},
40
+ {"role": "user", "content": f"{text}\n\n{user_prompt}"},
41
+ ]
42
+ )
43
+ return response['choices'][0]['message']['content']
44
+ except Exception as e:
45
+ if attempt < max_retries - 1: # if it's not the last attempt
46
+ time.sleep(1) # wait for 1 seconds before retrying
47
+ continue
48
+ else:
49
+ return str(e) # return the exception message after the last attempt
50
+
51
+ def pdf_to_text(file, user_prompt):
52
+ z = zipfile.ZipFile(file.name, 'r')
53
+ texts = []
54
+ for filename in z.namelist():
55
+ if filename.endswith('.pdf'):
56
+ pdf_file_data = z.read(filename)
57
+ pdf_file_io = io.BytesIO(pdf_file_data)
58
+ pdf = PdfReader(pdf_file_io)
59
+ text = ''
60
+ for page in pdf.pages:
61
+ text += page.extract_text()
62
+ # Tokenize text
63
+ tokens = nltk.word_tokenize(text)
64
+ # If tokens are more than 2000, split into chunks
65
+ if len(tokens) > 2000:
66
+ for i in range(0, len(tokens), 2000):
67
+ chunk = tokens[i:i + 2000]
68
+ chunk_str = ' '.join(chunk)
69
+ # Using OpenAI API
70
+ response = call_openai_api(chunk_str, user_prompt)
71
+ texts.append(response)
72
+ else:
73
+ # Using OpenAI API
74
+ response = call_openai_api(text, user_prompt)
75
+ texts.append(response)
76
+ return '\n'.join(texts)
77
+
78
+ iface = gr.Interface(
79
+ fn=pdf_to_text,
80
+ inputs=[
81
+ gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
82
+ gr.inputs.Textbox(label="User Prompt (Enter a prompt to guide the AI's responses)")
83
+ ],
84
+ outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
85
+ title="Ask An Expert Proof Of Concept",
86
+ description="This app extracts knowledge from the uploaded Zip files. Using a Cognitive Agent you can interact with that knowledge."
87
+ )
88
+ iface.launch(share=False)
89
+
logo.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ PyMuPDF
2
+ openai
3
+ gradio
4
+ PyPDF2
5
+ nltk