Vilson Doda commited on
Commit
f972310
·
1 Parent(s): 4b08ee8
Files changed (3) hide show
  1. app.py +65 -0
  2. functions.py +64 -0
  3. requirements.txt +145 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from langchain.llms import LlamaCpp
4
+ from langchain.embeddings import LlamaCppEmbeddings,OpenAIEmbeddings
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.chains import LLMChain
7
+ from langchain.document_loaders import TextLoader
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.vectorstores import Chroma
10
+ from datetime import datetime
11
+ from langchain.chains import LLMChain, RetrievalQA
12
+ from langchain.vectorstores import Chroma, FAISS
13
+ from functions import *
14
+ from pydrive.auth import GoogleAuth
15
+ from pydrive.drive import GoogleDrive
16
+
17
+
18
+ gauth = GoogleAuth()
19
+ gauth.LocalWebserverAuth()
20
+
21
+ drive = GoogleDrive(gauth)
22
+
23
+ file_list = drive.ListFile().GetList()
24
+ modello=file_list[0]['title']
25
+ print("MODELLO::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: ", modello)
26
+
27
+ prompt_template = """Use the following pieces of context to answer the question at the end.
28
+ You are a helpful,highly skilled AI writer with world wide access license, whose purpose is to retrieve information from the web. If not possible, you must access the info on your Database. is If you don't know the answer, just say that you don't know, don't try to make up an answer.
29
+ {context}
30
+ Question: {question}
31
+ Answer:"""
32
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
33
+
34
+ #newline
35
+
36
+ drive_path="https://drive.google.com/file/d/1ssWtdONQ_lEc870GATeiAhZHlsDzhLQ3/view?usp=sharing"
37
+
38
+ transcription = file_path = "output_path/file.txt"
39
+ #{drive_path}
40
+
41
+ llm = LlamaCpp(model_path=f"{drive_path}")
42
+ embeddings = LlamaCppEmbeddings(model_path=f"{drive_path}")
43
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
44
+ new_db = Chroma(persist_directory="/Witc",embedding_function=embeddings)
45
+
46
+
47
+ qa = RetrievalQA.from_chain_type(
48
+ retriever = new_db.as_retriever(),
49
+ llm=llm,
50
+ chain_type="stuff",)
51
+ #retriever=retriever)
52
+
53
+ question = st.text_input("Ask me")
54
+ start=today()
55
+
56
+ result = qa({"query": question})
57
+
58
+
59
+ end = today()
60
+
61
+ st.write(result['result'])
62
+ print('inizio: ', start)
63
+ print('fine: ', end)
64
+
65
+
functions.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ #from pypdf import PdfReader
3
+
4
+ from langchain.chains import LLMChain, RetrievalQA
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+ #def pdf_to_text(pdf_file,name='Readme'):
18
+ # """
19
+ # Function that reads a pdf file and saves it to txt format.
20
+ # ---------------------------------------------------------
21
+ # Parameters:
22
+ # pdf_file: str.
23
+ # contains file name or path to the file in .pdf format
24
+ # name: str.
25
+ # contains the name to save the .txt file with
26
+ # Default: Readme.txt
27
+ # ---------------------------------------------------------
28
+ # Returns:
29
+ # testo: list.
30
+ # list with the text extracted on each page of the .pdf file
31
+ # ---------------------------------------------------------
32
+ # """
33
+ # testo=[]
34
+ # reader=PdfReader(pdf_file)
35
+ # for i in range(0,len(reader.pages)):
36
+ # text = reader.pages[i].extract_text()
37
+ # testo.append(text)
38
+ # try:
39
+ # with open(f'{name}.txt', 'w', encoding='utf-8') as f:
40
+ # f.writelines(testo)
41
+ # f.close()
42
+ # except Exception as e:
43
+ # print(f"passed on exception because {e}")
44
+ # return testo
45
+
46
+
47
+ def today():
48
+ today= datetime.today()
49
+ return today.strftime('%Y-%m-%d %H:%M:%S')
50
+
51
+ def ask(query):
52
+ result = RetrievalQA({"query": query})
53
+ return result['result']
54
+
55
+ def write_text_file(content, file_path):
56
+ try:
57
+ with open(file_path, 'w') as file:
58
+ file.write(content)
59
+ return True
60
+ except Exception as e:
61
+ print(f"Error occurred while writing the file: {e}")
62
+ return False
63
+
64
+ google_api_key="AIzaSyD5zrx-SLKPMzb3TliNgRWXevvnzgiAayE"
requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.5
2
+ aiosignal==1.3.1
3
+ altair==5.0.1
4
+ anyio==3.7.1
5
+ aspose-pdf==23.7.0
6
+ async-timeout==4.0.2
7
+ attrs==23.1.0
8
+ backoff==2.2.1
9
+ blinker==1.6.2
10
+ cachetools==5.3.1
11
+ certifi==2023.5.7
12
+ cffi==1.15.1
13
+ chardet==5.2.0
14
+ charset-normalizer==3.2.0
15
+ chroma-hnswlib==0.7.2
16
+ chromadb==0.3.29
17
+ click==8.1.6
18
+ clickhouse-connect==0.6.8
19
+ colorama==0.4.6
20
+ coloredlogs==15.0.1
21
+ comtypes==1.2.0
22
+ cryptography==41.0.3
23
+ dataclasses-json==0.5.14
24
+ decorator==5.1.1
25
+ diskcache==5.6.1
26
+ duckdb==0.8.1
27
+ exceptiongroup==1.1.2
28
+ faiss-cpu==1.7.4
29
+ fastapi==0.85.1
30
+ filetype==1.2.0
31
+ flatbuffers==23.5.26
32
+ frozenlist==1.4.0
33
+ gitdb==4.0.10
34
+ GitPython==3.1.32
35
+ google-api-core==2.11.1
36
+ google-api-python-client==2.97.0
37
+ google-auth==2.22.0
38
+ google-auth-httplib2==0.1.0
39
+ google-cloud-speech==2.21.0
40
+ googleapis-common-protos==1.59.1
41
+ greenlet==2.0.2
42
+ grpcio==1.56.0
43
+ grpcio-status==1.56.0
44
+ h11==0.14.0
45
+ hnswlib==0.7.0
46
+ httplib2==0.22.0
47
+ httptools==0.6.0
48
+ humanfriendly==10.0
49
+ idna==3.4
50
+ importlib-metadata==6.8.0
51
+ importlib-resources==6.0.0
52
+ Jinja2==3.1.2
53
+ joblib==1.3.1
54
+ jsonschema==4.18.6
55
+ jsonschema-specifications==2023.7.1
56
+ langchain==0.0.250
57
+ langsmith==0.0.18
58
+ llama-cpp-python==0.1.77
59
+ lxml==4.9.3
60
+ lz4==4.3.2
61
+ markdown-it-py==3.0.0
62
+ MarkupSafe==2.1.3
63
+ marshmallow==3.20.1
64
+ mdurl==0.1.2
65
+ monotonic==1.6
66
+ mpmath==1.3.0
67
+ multidict==6.0.4
68
+ mypy-extensions==1.0.0
69
+ nltk==3.8.1
70
+ numexpr==2.8.4
71
+ numpy==1.25.2
72
+ oauth2client==4.1.3
73
+ onnxruntime==1.15.1
74
+ openai==0.27.8
75
+ openapi-schema-pydantic==1.2.4
76
+ overrides==7.3.1
77
+ packaging==23.1
78
+ pandas==2.0.3
79
+ pdf2image==1.16.3
80
+ pdfminer.six==20221105
81
+ pdfplumber==0.10.2
82
+ Pillow==9.5.0
83
+ posthog==3.0.1
84
+ proto-plus==1.22.3
85
+ protobuf==4.23.4
86
+ pulsar-client==3.2.0
87
+ pyarrow==12.0.1
88
+ pyasn1==0.5.0
89
+ pyasn1-modules==0.3.0
90
+ pycparser==2.21
91
+ pydantic==1.10.12
92
+ pydeck==0.8.0
93
+ PyDrive==1.3.1
94
+ Pygments==2.15.1
95
+ Pympler==1.0.1
96
+ PyMuPDF==1.22.5
97
+ pyparsing==3.1.1
98
+ pypdf==3.14.0
99
+ pypdfium2==4.18.0
100
+ PyPika==0.48.9
101
+ pyreadline3==3.4.1
102
+ python-dateutil==2.8.2
103
+ python-dotenv==1.0.0
104
+ python-magic==0.4.27
105
+ pytz==2023.3
106
+ pytz-deprecation-shim==0.1.0.post0
107
+ PyYAML==6.0.1
108
+ referencing==0.30.0
109
+ regex==2023.6.3
110
+ requests==2.31.0
111
+ rich==13.5.2
112
+ rpds-py==0.9.2
113
+ rsa==4.9
114
+ six==1.16.0
115
+ smmap==5.0.0
116
+ sniffio==1.3.0
117
+ SpeechRecognition==3.10.0
118
+ SQLAlchemy==2.0.19
119
+ sqlite3-api==2.0.4
120
+ starlette==0.20.4
121
+ streamlit==1.25.0
122
+ sympy==1.12
123
+ tabulate==0.9.0
124
+ tenacity==8.2.2
125
+ tiktoken==0.4.0
126
+ tokenizers==0.13.3
127
+ toml==0.10.2
128
+ toolz==0.12.0
129
+ tornado==6.3.2
130
+ tqdm==4.65.0
131
+ typing-inspect==0.9.0
132
+ typing_extensions==4.7.1
133
+ tzdata==2023.3
134
+ tzlocal==4.3.1
135
+ unstructured==0.9.0
136
+ uritemplate==4.1.1
137
+ urllib3==1.26.16
138
+ uvicorn==0.23.2
139
+ validators==0.20.0
140
+ watchdog==3.0.0
141
+ watchfiles==0.19.0
142
+ websockets==11.0.3
143
+ yarl==1.9.2
144
+ zipp==3.16.2
145
+ zstandard==0.21.0