Spaces:
Runtime error
Runtime error
Vilson Doda commited on
Commit ·
f972310
1
Parent(s): 4b08ee8
initial
Browse files- app.py +65 -0
- functions.py +64 -0
- requirements.txt +145 -0
app.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from langchain.llms import LlamaCpp
|
| 4 |
+
from langchain.embeddings import LlamaCppEmbeddings,OpenAIEmbeddings
|
| 5 |
+
from langchain.prompts import PromptTemplate
|
| 6 |
+
from langchain.chains import LLMChain
|
| 7 |
+
from langchain.document_loaders import TextLoader
|
| 8 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 9 |
+
from langchain.vectorstores import Chroma
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from langchain.chains import LLMChain, RetrievalQA
|
| 12 |
+
from langchain.vectorstores import Chroma, FAISS
|
| 13 |
+
from functions import *
|
| 14 |
+
from pydrive.auth import GoogleAuth
|
| 15 |
+
from pydrive.drive import GoogleDrive
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
gauth = GoogleAuth()
|
| 19 |
+
gauth.LocalWebserverAuth()
|
| 20 |
+
|
| 21 |
+
drive = GoogleDrive(gauth)
|
| 22 |
+
|
| 23 |
+
file_list = drive.ListFile().GetList()
|
| 24 |
+
modello=file_list[0]['title']
|
| 25 |
+
print("MODELLO::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: ", modello)
|
| 26 |
+
|
| 27 |
+
prompt_template = """Use the following pieces of context to answer the question at the end.
|
| 28 |
+
You are a helpful,highly skilled AI writer with world wide access license, whose purpose is to retrieve information from the web. If not possible, you must access the info on your Database. is If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
| 29 |
+
{context}
|
| 30 |
+
Question: {question}
|
| 31 |
+
Answer:"""
|
| 32 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
| 33 |
+
|
| 34 |
+
#newline
|
| 35 |
+
|
| 36 |
+
drive_path="https://drive.google.com/file/d/1ssWtdONQ_lEc870GATeiAhZHlsDzhLQ3/view?usp=sharing"
|
| 37 |
+
|
| 38 |
+
transcription = file_path = "output_path/file.txt"
|
| 39 |
+
#{drive_path}
|
| 40 |
+
|
| 41 |
+
llm = LlamaCpp(model_path=f"{drive_path}")
|
| 42 |
+
embeddings = LlamaCppEmbeddings(model_path=f"{drive_path}")
|
| 43 |
+
llm_chain = LLMChain(llm=llm, prompt=prompt)
|
| 44 |
+
new_db = Chroma(persist_directory="/Witc",embedding_function=embeddings)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
qa = RetrievalQA.from_chain_type(
|
| 48 |
+
retriever = new_db.as_retriever(),
|
| 49 |
+
llm=llm,
|
| 50 |
+
chain_type="stuff",)
|
| 51 |
+
#retriever=retriever)
|
| 52 |
+
|
| 53 |
+
question = st.text_input("Ask me")
|
| 54 |
+
start=today()
|
| 55 |
+
|
| 56 |
+
result = qa({"query": question})
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
end = today()
|
| 60 |
+
|
| 61 |
+
st.write(result['result'])
|
| 62 |
+
print('inizio: ', start)
|
| 63 |
+
print('fine: ', end)
|
| 64 |
+
|
| 65 |
+
|
functions.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
#from pypdf import PdfReader
|
| 3 |
+
|
| 4 |
+
from langchain.chains import LLMChain, RetrievalQA
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
#def pdf_to_text(pdf_file,name='Readme'):
|
| 18 |
+
# """
|
| 19 |
+
# Function that reads a pdf file and saves it to txt format.
|
| 20 |
+
# ---------------------------------------------------------
|
| 21 |
+
# Parameters:
|
| 22 |
+
# pdf_file: str.
|
| 23 |
+
# contains file name or path to the file in .pdf format
|
| 24 |
+
# name: str.
|
| 25 |
+
# contains the name to save the .txt file with
|
| 26 |
+
# Default: Readme.txt
|
| 27 |
+
# ---------------------------------------------------------
|
| 28 |
+
# Returns:
|
| 29 |
+
# testo: list.
|
| 30 |
+
# list with the text extracted on each page of the .pdf file
|
| 31 |
+
# ---------------------------------------------------------
|
| 32 |
+
# """
|
| 33 |
+
# testo=[]
|
| 34 |
+
# reader=PdfReader(pdf_file)
|
| 35 |
+
# for i in range(0,len(reader.pages)):
|
| 36 |
+
# text = reader.pages[i].extract_text()
|
| 37 |
+
# testo.append(text)
|
| 38 |
+
# try:
|
| 39 |
+
# with open(f'{name}.txt', 'w', encoding='utf-8') as f:
|
| 40 |
+
# f.writelines(testo)
|
| 41 |
+
# f.close()
|
| 42 |
+
# except Exception as e:
|
| 43 |
+
# print(f"passed on exception because {e}")
|
| 44 |
+
# return testo
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def today():
|
| 48 |
+
today= datetime.today()
|
| 49 |
+
return today.strftime('%Y-%m-%d %H:%M:%S')
|
| 50 |
+
|
| 51 |
+
def ask(query):
|
| 52 |
+
result = RetrievalQA({"query": query})
|
| 53 |
+
return result['result']
|
| 54 |
+
|
| 55 |
+
def write_text_file(content, file_path):
|
| 56 |
+
try:
|
| 57 |
+
with open(file_path, 'w') as file:
|
| 58 |
+
file.write(content)
|
| 59 |
+
return True
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"Error occurred while writing the file: {e}")
|
| 62 |
+
return False
|
| 63 |
+
|
| 64 |
+
google_api_key="AIzaSyD5zrx-SLKPMzb3TliNgRWXevvnzgiAayE"
|
requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp==3.8.5
|
| 2 |
+
aiosignal==1.3.1
|
| 3 |
+
altair==5.0.1
|
| 4 |
+
anyio==3.7.1
|
| 5 |
+
aspose-pdf==23.7.0
|
| 6 |
+
async-timeout==4.0.2
|
| 7 |
+
attrs==23.1.0
|
| 8 |
+
backoff==2.2.1
|
| 9 |
+
blinker==1.6.2
|
| 10 |
+
cachetools==5.3.1
|
| 11 |
+
certifi==2023.5.7
|
| 12 |
+
cffi==1.15.1
|
| 13 |
+
chardet==5.2.0
|
| 14 |
+
charset-normalizer==3.2.0
|
| 15 |
+
chroma-hnswlib==0.7.2
|
| 16 |
+
chromadb==0.3.29
|
| 17 |
+
click==8.1.6
|
| 18 |
+
clickhouse-connect==0.6.8
|
| 19 |
+
colorama==0.4.6
|
| 20 |
+
coloredlogs==15.0.1
|
| 21 |
+
comtypes==1.2.0
|
| 22 |
+
cryptography==41.0.3
|
| 23 |
+
dataclasses-json==0.5.14
|
| 24 |
+
decorator==5.1.1
|
| 25 |
+
diskcache==5.6.1
|
| 26 |
+
duckdb==0.8.1
|
| 27 |
+
exceptiongroup==1.1.2
|
| 28 |
+
faiss-cpu==1.7.4
|
| 29 |
+
fastapi==0.85.1
|
| 30 |
+
filetype==1.2.0
|
| 31 |
+
flatbuffers==23.5.26
|
| 32 |
+
frozenlist==1.4.0
|
| 33 |
+
gitdb==4.0.10
|
| 34 |
+
GitPython==3.1.32
|
| 35 |
+
google-api-core==2.11.1
|
| 36 |
+
google-api-python-client==2.97.0
|
| 37 |
+
google-auth==2.22.0
|
| 38 |
+
google-auth-httplib2==0.1.0
|
| 39 |
+
google-cloud-speech==2.21.0
|
| 40 |
+
googleapis-common-protos==1.59.1
|
| 41 |
+
greenlet==2.0.2
|
| 42 |
+
grpcio==1.56.0
|
| 43 |
+
grpcio-status==1.56.0
|
| 44 |
+
h11==0.14.0
|
| 45 |
+
hnswlib==0.7.0
|
| 46 |
+
httplib2==0.22.0
|
| 47 |
+
httptools==0.6.0
|
| 48 |
+
humanfriendly==10.0
|
| 49 |
+
idna==3.4
|
| 50 |
+
importlib-metadata==6.8.0
|
| 51 |
+
importlib-resources==6.0.0
|
| 52 |
+
Jinja2==3.1.2
|
| 53 |
+
joblib==1.3.1
|
| 54 |
+
jsonschema==4.18.6
|
| 55 |
+
jsonschema-specifications==2023.7.1
|
| 56 |
+
langchain==0.0.250
|
| 57 |
+
langsmith==0.0.18
|
| 58 |
+
llama-cpp-python==0.1.77
|
| 59 |
+
lxml==4.9.3
|
| 60 |
+
lz4==4.3.2
|
| 61 |
+
markdown-it-py==3.0.0
|
| 62 |
+
MarkupSafe==2.1.3
|
| 63 |
+
marshmallow==3.20.1
|
| 64 |
+
mdurl==0.1.2
|
| 65 |
+
monotonic==1.6
|
| 66 |
+
mpmath==1.3.0
|
| 67 |
+
multidict==6.0.4
|
| 68 |
+
mypy-extensions==1.0.0
|
| 69 |
+
nltk==3.8.1
|
| 70 |
+
numexpr==2.8.4
|
| 71 |
+
numpy==1.25.2
|
| 72 |
+
oauth2client==4.1.3
|
| 73 |
+
onnxruntime==1.15.1
|
| 74 |
+
openai==0.27.8
|
| 75 |
+
openapi-schema-pydantic==1.2.4
|
| 76 |
+
overrides==7.3.1
|
| 77 |
+
packaging==23.1
|
| 78 |
+
pandas==2.0.3
|
| 79 |
+
pdf2image==1.16.3
|
| 80 |
+
pdfminer.six==20221105
|
| 81 |
+
pdfplumber==0.10.2
|
| 82 |
+
Pillow==9.5.0
|
| 83 |
+
posthog==3.0.1
|
| 84 |
+
proto-plus==1.22.3
|
| 85 |
+
protobuf==4.23.4
|
| 86 |
+
pulsar-client==3.2.0
|
| 87 |
+
pyarrow==12.0.1
|
| 88 |
+
pyasn1==0.5.0
|
| 89 |
+
pyasn1-modules==0.3.0
|
| 90 |
+
pycparser==2.21
|
| 91 |
+
pydantic==1.10.12
|
| 92 |
+
pydeck==0.8.0
|
| 93 |
+
PyDrive==1.3.1
|
| 94 |
+
Pygments==2.15.1
|
| 95 |
+
Pympler==1.0.1
|
| 96 |
+
PyMuPDF==1.22.5
|
| 97 |
+
pyparsing==3.1.1
|
| 98 |
+
pypdf==3.14.0
|
| 99 |
+
pypdfium2==4.18.0
|
| 100 |
+
PyPika==0.48.9
|
| 101 |
+
pyreadline3==3.4.1
|
| 102 |
+
python-dateutil==2.8.2
|
| 103 |
+
python-dotenv==1.0.0
|
| 104 |
+
python-magic==0.4.27
|
| 105 |
+
pytz==2023.3
|
| 106 |
+
pytz-deprecation-shim==0.1.0.post0
|
| 107 |
+
PyYAML==6.0.1
|
| 108 |
+
referencing==0.30.0
|
| 109 |
+
regex==2023.6.3
|
| 110 |
+
requests==2.31.0
|
| 111 |
+
rich==13.5.2
|
| 112 |
+
rpds-py==0.9.2
|
| 113 |
+
rsa==4.9
|
| 114 |
+
six==1.16.0
|
| 115 |
+
smmap==5.0.0
|
| 116 |
+
sniffio==1.3.0
|
| 117 |
+
SpeechRecognition==3.10.0
|
| 118 |
+
SQLAlchemy==2.0.19
|
| 119 |
+
sqlite3-api==2.0.4
|
| 120 |
+
starlette==0.20.4
|
| 121 |
+
streamlit==1.25.0
|
| 122 |
+
sympy==1.12
|
| 123 |
+
tabulate==0.9.0
|
| 124 |
+
tenacity==8.2.2
|
| 125 |
+
tiktoken==0.4.0
|
| 126 |
+
tokenizers==0.13.3
|
| 127 |
+
toml==0.10.2
|
| 128 |
+
toolz==0.12.0
|
| 129 |
+
tornado==6.3.2
|
| 130 |
+
tqdm==4.65.0
|
| 131 |
+
typing-inspect==0.9.0
|
| 132 |
+
typing_extensions==4.7.1
|
| 133 |
+
tzdata==2023.3
|
| 134 |
+
tzlocal==4.3.1
|
| 135 |
+
unstructured==0.9.0
|
| 136 |
+
uritemplate==4.1.1
|
| 137 |
+
urllib3==1.26.16
|
| 138 |
+
uvicorn==0.23.2
|
| 139 |
+
validators==0.20.0
|
| 140 |
+
watchdog==3.0.0
|
| 141 |
+
watchfiles==0.19.0
|
| 142 |
+
websockets==11.0.3
|
| 143 |
+
yarl==1.9.2
|
| 144 |
+
zipp==3.16.2
|
| 145 |
+
zstandard==0.21.0
|