Spaces:
Sleeping
Sleeping
TahaFawzyElshrif
commited on
Commit
·
2ebf9ad
1
Parent(s):
2e950a2
published first version
Browse files- Embedder/E5_Embeddedr.py +18 -0
- Embedder/Embedder.py +5 -0
- Embedder/__pycache__/E5_Embeddedr.cpython-311.pyc +0 -0
- Embedder/__pycache__/E5_Embeddedr.cpython-312.pyc +0 -0
- Embedder/__pycache__/Embedder.cpython-311.pyc +0 -0
- Embedder/__pycache__/Embedder.cpython-312.pyc +0 -0
- Models/GPT.py +25 -0
- Models/Gemini.py +32 -0
- Models/LLMModel.py +22 -0
- Models/Prompts.py +28 -0
- Models/Utils.py +16 -0
- Models/__pycache__/GPT.cpython-311.pyc +0 -0
- Models/__pycache__/GPT.cpython-312.pyc +0 -0
- Models/__pycache__/Gemini.cpython-311.pyc +0 -0
- Models/__pycache__/Gemini.cpython-312.pyc +0 -0
- Models/__pycache__/LLMModel.cpython-311.pyc +0 -0
- Models/__pycache__/LLMModel.cpython-312.pyc +0 -0
- Models/__pycache__/Prompts.cpython-312.pyc +0 -0
- Models/__pycache__/Utils.cpython-311.pyc +0 -0
- Models/__pycache__/Utils.cpython-312.pyc +0 -0
- OLAP_Conn/DuckConn.py +39 -0
- OLAP_Conn/OLAP_Connection.py +7 -0
- OLAP_Conn/__pycache__/DuckConn.cpython-312.pyc +0 -0
- OLAP_Conn/__pycache__/OLAP_Connection.cpython-312.pyc +0 -0
- RAG/RAG_Retrival.py +99 -0
- RAG/__pycache__/RAG_Retrival.cpython-312.pyc +0 -0
- app.py +20 -0
- requirements.txt +12 -0
Embedder/E5_Embeddedr.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer,util
|
| 2 |
+
from Embedder.Embedder import Embedder
|
| 3 |
+
|
| 4 |
+
class E5_Embeddedr(Embedder):
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.model_name = "intfloat/multilingual-e5-small"
|
| 7 |
+
self.model = SentenceTransformer(self.model_name)
|
| 8 |
+
self.embedding_size = 384 # Fixed fot this model
|
| 9 |
+
def embed(self,text):
|
| 10 |
+
'''
|
| 11 |
+
Embeds one text
|
| 12 |
+
|
| 13 |
+
Prefixed it with passage "passage" as e5 expect
|
| 14 |
+
'''
|
| 15 |
+
return self.model.encode(f"passage: {text}", normalize_embeddings=True)
|
| 16 |
+
|
| 17 |
+
#embed = E5_Embeddedr()
|
| 18 |
+
#embed.embed("مرحبا بك فى وى")
|
Embedder/Embedder.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Embedder:
|
| 2 |
+
def __init__(self) -> None:
|
| 3 |
+
pass
|
| 4 |
+
def embed(text):
|
| 5 |
+
pass
|
Embedder/__pycache__/E5_Embeddedr.cpython-311.pyc
ADDED
|
Binary file (1.31 kB). View file
|
|
|
Embedder/__pycache__/E5_Embeddedr.cpython-312.pyc
ADDED
|
Binary file (1.18 kB). View file
|
|
|
Embedder/__pycache__/Embedder.cpython-311.pyc
ADDED
|
Binary file (696 Bytes). View file
|
|
|
Embedder/__pycache__/Embedder.cpython-312.pyc
ADDED
|
Binary file (603 Bytes). View file
|
|
|
Models/GPT.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import os
|
| 3 |
+
from Models.LLMModel import LLMModel
|
| 4 |
+
base_gpt_url = "https://router.huggingface.co/v1"
|
| 5 |
+
|
| 6 |
+
class GPT(LLMModel):
|
| 7 |
+
def __init__(self,model_name):
|
| 8 |
+
"""
|
| 9 |
+
Top_k , stop_sequences is not supported by GPT
|
| 10 |
+
|
| 11 |
+
"""
|
| 12 |
+
super().__init__()
|
| 13 |
+
self.model_name = model_name
|
| 14 |
+
self.client = OpenAI(base_url=base_gpt_url,api_key=os.environ["HF_TOKEN"])
|
| 15 |
+
|
| 16 |
+
def send_message(self,messages_json):
|
| 17 |
+
response = self.client.chat.completions.create(
|
| 18 |
+
model = self.model_name,
|
| 19 |
+
messages=messages_json,
|
| 20 |
+
temperature=self.temperature,
|
| 21 |
+
|
| 22 |
+
max_tokens=self.max_tokens )
|
| 23 |
+
|
| 24 |
+
return (response.choices[0].message.content)
|
| 25 |
+
|
Models/Gemini.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
from Models.LLMModel import LLMModel
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
class Gemini(LLMModel):
|
| 6 |
+
def __init__(self,model_name='gemini-1.5-flash'):
|
| 7 |
+
super().__init__()
|
| 8 |
+
self.model_name = model_name
|
| 9 |
+
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
| 10 |
+
self.model = genai.GenerativeModel(self.model_name)
|
| 11 |
+
self.set_config()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_config(self,temperature=1,top_k=40,top_p=.85,stop_sequences=None,max_tokens=200):
|
| 15 |
+
|
| 16 |
+
super().set_config(temperature,top_k,top_p,stop_sequences,max_tokens)
|
| 17 |
+
self.config = genai.types.GenerationConfig(
|
| 18 |
+
temperature=self.temperature,
|
| 19 |
+
max_output_tokens = self.max_tokens,
|
| 20 |
+
top_p =self.top_p,
|
| 21 |
+
top_k =self.top_k,
|
| 22 |
+
stop_sequences = self.stop_sequences
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def send_message(self,prompt):
|
| 27 |
+
if not isinstance(prompt,str):
|
| 28 |
+
prompt = str(prompt)
|
| 29 |
+
response = self.model.generate_content((prompt),generation_config=self.config)
|
| 30 |
+
return str(response.text)
|
| 31 |
+
|
| 32 |
+
|
Models/LLMModel.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class LLMModel:
|
| 2 |
+
def __init__(self):
|
| 3 |
+
self.model_name = ""
|
| 4 |
+
self.temperature=1
|
| 5 |
+
self.top_k=40
|
| 6 |
+
self.top_p=.85
|
| 7 |
+
self.stop_sequences=[]
|
| 8 |
+
self.max_tokens=200
|
| 9 |
+
|
| 10 |
+
def set_config(self,temperature=1,top_k=40,top_p=.85,stop_sequences=[],max_tokens=200):
|
| 11 |
+
"""
|
| 12 |
+
Set the configuration for the model (Some Parameters may not work according to model)
|
| 13 |
+
"""
|
| 14 |
+
self.temperature=temperature
|
| 15 |
+
self.top_k=top_k
|
| 16 |
+
self.top_p=top_p
|
| 17 |
+
self.stop_sequences=stop_sequences
|
| 18 |
+
self.max_tokens = max_tokens
|
| 19 |
+
|
| 20 |
+
def send_message(self,messages_json):
|
| 21 |
+
pass
|
| 22 |
+
|
Models/Prompts.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PromptHead = """أنت مساعد متخصص في الإسعافات الأولية والطوارئ الطبية.
|
| 2 |
+
وظيفتك هي تقديم نصائح وإرشادات آمنة وعملية لمواجهة حالات الطوارئ الصحية البسيطة، مثل الجروح، الحروق، الاختناق، النزيف، الإغماء، أو أي حادث منزلي أو خارجي.
|
| 3 |
+
|
| 4 |
+
عند الإجابة:
|
| 5 |
+
1. قدم خطوات واضحة ومرتبة (مثل خطوة 1، خطوة 2...).
|
| 6 |
+
2. لا تعطي تعليمات قد تكون خطيرة بدون تحذير واضح.
|
| 7 |
+
3. شجع المستخدم على الاتصال بالإسعاف أو الطبيب إذا كانت الحالة خطيرة.
|
| 8 |
+
4. لا تكتب معلومات طبية متقدمة أو تشخيصات، ركز فقط على الإسعافات الأولية.
|
| 9 |
+
5. استخدم لغة بسيطة وسهلة الفهم.
|
| 10 |
+
|
| 11 |
+
مثال على الاستجابة:
|
| 12 |
+
سؤال المستخدم: "كيف أوقف نزيف من جرح في الإصبع؟"
|
| 13 |
+
ردك:
|
| 14 |
+
1. نظف الجرح بلطف بالماء الجاري.
|
| 15 |
+
2. ضع قطعة شاش نظيفة على الجرح واضغط برفق لوقف النزيف.
|
| 16 |
+
3. إذا استمر النزيف أكثر من 10 دقائق، اتصل بالإسعاف فوراً.
|
| 17 |
+
4. غطِّ الجرح بضمادة نظيفة بعد توقف النزيف.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
get_summary_prompt = lambda x: "اعطنى ملخص فى سطر واحد او اثنين بالكثير للنص الاتى:\n" + x
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
final_prompt = lambda query, context: (
|
| 26 |
+
f"أجب على السؤال التالي: {query}\n"
|
| 27 |
+
f"اعتمد فقط على المعلومات الواردة في النص التالي: {context}\n"
|
| 28 |
+
)
|
Models/Utils.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from Models.Gemini import Gemini
|
| 2 |
+
from Models.GPT import GPT
|
| 3 |
+
|
| 4 |
+
message_user = lambda x: ({"role": "user", "content": x})
|
| 5 |
+
message_system = lambda x: ({"role": "system", "content": x})
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# Core Functions here ,if want more write in backend
|
| 9 |
+
def get_specific_model(model_name):
|
| 10 |
+
if model_name in ["gemini-1.5-flash","gemini"]:
|
| 11 |
+
return Gemini()
|
| 12 |
+
elif model_name in ["openai/gpt-oss-120b","gpt 120"]:
|
| 13 |
+
return GPT("openai/gpt-oss-20b")
|
| 14 |
+
else :
|
| 15 |
+
return GPT("openai/gpt-oss-120b")
|
| 16 |
+
|
Models/__pycache__/GPT.cpython-311.pyc
ADDED
|
Binary file (1.69 kB). View file
|
|
|
Models/__pycache__/GPT.cpython-312.pyc
ADDED
|
Binary file (1.59 kB). View file
|
|
|
Models/__pycache__/Gemini.cpython-311.pyc
ADDED
|
Binary file (2.41 kB). View file
|
|
|
Models/__pycache__/Gemini.cpython-312.pyc
ADDED
|
Binary file (2.17 kB). View file
|
|
|
Models/__pycache__/LLMModel.cpython-311.pyc
ADDED
|
Binary file (1.38 kB). View file
|
|
|
Models/__pycache__/LLMModel.cpython-312.pyc
ADDED
|
Binary file (1.28 kB). View file
|
|
|
Models/__pycache__/Prompts.cpython-312.pyc
ADDED
|
Binary file (2.15 kB). View file
|
|
|
Models/__pycache__/Utils.cpython-311.pyc
ADDED
|
Binary file (2.12 kB). View file
|
|
|
Models/__pycache__/Utils.cpython-312.pyc
ADDED
|
Binary file (904 Bytes). View file
|
|
|
OLAP_Conn/DuckConn.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import duckdb
|
| 2 |
+
from OLAP_Conn.OLAP_Connection import OLAP_Connection
|
| 3 |
+
from sentence_transformers import util
|
| 4 |
+
|
| 5 |
+
class DuckConn(OLAP_Connection):
|
| 6 |
+
def __init__(self,path_duckdb="first_aid.duckdb"):
|
| 7 |
+
super().__init__()
|
| 8 |
+
self.path_duckdb = path_duckdb
|
| 9 |
+
self.con = duckdb.connect(self.path_duckdb)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def make_data_frame(self,data_,name):
|
| 13 |
+
self.con.register(name, data_)
|
| 14 |
+
self.con.execute("CREATE TABLE IF NOT EXISTS documents AS SELECT * FROM "+name)
|
| 15 |
+
self.con.commit()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_relevant_docs(self, embedded_query, top_k=3,limit=100):
|
| 19 |
+
# Retrive docs
|
| 20 |
+
docs = self.con.execute(f"SELECT * FROM documents;").fetchall()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# Calcualte distance
|
| 24 |
+
scored_docs = []
|
| 25 |
+
for page_content, embedding_doc in docs:
|
| 26 |
+
score = util.cos_sim(embedded_query, embedding_doc)
|
| 27 |
+
scored_docs.append((page_content, score))
|
| 28 |
+
|
| 29 |
+
# Sort Desc
|
| 30 |
+
scored_docs.sort(key=lambda x: -x[1])
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Return top k result
|
| 35 |
+
return [doc[0] for doc in scored_docs[:top_k]]
|
| 36 |
+
|
| 37 |
+
def close(self):
|
| 38 |
+
self.con.commit()
|
| 39 |
+
self.con.close()
|
OLAP_Conn/OLAP_Connection.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class OLAP_Connection:
|
| 2 |
+
def __init__(self,):
|
| 3 |
+
pass
|
| 4 |
+
def make_data_frame(self,data_,name):
|
| 5 |
+
pass
|
| 6 |
+
def get_relevant_docs(self, embedded_query, top_k=3):
|
| 7 |
+
pass
|
OLAP_Conn/__pycache__/DuckConn.cpython-312.pyc
ADDED
|
Binary file (2.53 kB). View file
|
|
|
OLAP_Conn/__pycache__/OLAP_Connection.cpython-312.pyc
ADDED
|
Binary file (840 Bytes). View file
|
|
|
RAG/RAG_Retrival.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tqdm import tqdm
|
| 2 |
+
from tqdm.notebook import tqdm as tqdmk
|
| 3 |
+
from langchain.document_loaders import PyPDFLoader
|
| 4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain.llms import HuggingFaceHub
|
| 6 |
+
from langchain import PromptTemplate
|
| 7 |
+
from langchain.document_loaders import PyPDFLoader
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import duckdb
|
| 10 |
+
import numpy as np
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class RAG_Retrival:
|
| 15 |
+
def __init__(self,db,model,embedder):
|
| 16 |
+
self.conn = db
|
| 17 |
+
self.model = model
|
| 18 |
+
self.embedder = embedder
|
| 19 |
+
|
| 20 |
+
def read_data(self,path_data):
|
| 21 |
+
# Count total files first for tqdm's total
|
| 22 |
+
total_files = sum(len(files) for _, _, files in os.walk(path_data))
|
| 23 |
+
all_text = ""
|
| 24 |
+
with tqdm(total=total_files, desc="Reading files", unit="file") as pbar:
|
| 25 |
+
for root, dirs, files in os.walk(path_data):
|
| 26 |
+
for file in files:
|
| 27 |
+
full_path = os.path.join(root, file)
|
| 28 |
+
if full_path.endswith(".txt"):
|
| 29 |
+
all_text += self.load_text_file(full_path)
|
| 30 |
+
elif full_path.endswith(".pdf"):
|
| 31 |
+
all_text += self.load_pdf(full_path)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
pbar.update(1)
|
| 35 |
+
|
| 36 |
+
return all_text
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def load_text_file(self,path):
|
| 40 |
+
text = ""
|
| 41 |
+
with open(path, 'r') as file:
|
| 42 |
+
for line in file:
|
| 43 |
+
text += line
|
| 44 |
+
|
| 45 |
+
return text
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_pdf(self,pdf_folder):
|
| 49 |
+
loader = PyPDFLoader(pdf_folder)
|
| 50 |
+
pages = loader.load_and_split()
|
| 51 |
+
text = "\n".join([doc.page_content for doc in pages])
|
| 52 |
+
return text
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def text_splitter(self,text,chunk_size=1000,chunk_overlap=100,is_separator_regex=False):
|
| 56 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 57 |
+
chunk_size=chunk_size,
|
| 58 |
+
chunk_overlap=chunk_overlap,
|
| 59 |
+
length_function=len,
|
| 60 |
+
is_separator_regex=is_separator_regex,
|
| 61 |
+
)
|
| 62 |
+
docs = text_splitter.create_documents([text])
|
| 63 |
+
for i, d in enumerate(docs):
|
| 64 |
+
d.metadata = {"doc_id": i}
|
| 65 |
+
return docs
|
| 66 |
+
|
| 67 |
+
def prepare_text_df(self,docs):
|
| 68 |
+
# Get the page_content from the documents and create a new list
|
| 69 |
+
content_list = [doc.page_content for doc in docs]
|
| 70 |
+
# Send one page_content at a time
|
| 71 |
+
print("Making embedding...")
|
| 72 |
+
embeddings = [self.embedder.embed(content) for content in tqdmk(content_list)]
|
| 73 |
+
print("Finished embedding...")
|
| 74 |
+
|
| 75 |
+
# Create a dataframe to ingest it to the database
|
| 76 |
+
dataframe = pd.DataFrame({
|
| 77 |
+
'page_content': content_list,
|
| 78 |
+
'embeddings': embeddings})
|
| 79 |
+
return dataframe
|
| 80 |
+
|
| 81 |
+
def make_data_frame(self,path,chunk_size=1000,chunk_overlap=100,is_separator_regex=False):
|
| 82 |
+
all_texts = self.read_data(path)
|
| 83 |
+
docs = self.text_splitter(all_texts,chunk_size,chunk_overlap,is_separator_regex)
|
| 84 |
+
dataframe = self.prepare_text_df(docs)
|
| 85 |
+
self.upload_file(dataframe)
|
| 86 |
+
return dataframe
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def upload_file(self,embedding_df,name='first_aid'):
|
| 90 |
+
'''
|
| 91 |
+
Upload data and close database to be commited
|
| 92 |
+
'''
|
| 93 |
+
self.conn.make_data_frame(embedding_df,name)
|
| 94 |
+
self.conn.close()
|
| 95 |
+
|
| 96 |
+
def query_relevant(self,user_query):
|
| 97 |
+
embedded_query = self.embedder.embed(user_query)
|
| 98 |
+
result = self.conn.get_relevant_docs(embedded_query)
|
| 99 |
+
return result
|
RAG/__pycache__/RAG_Retrival.cpython-312.pyc
ADDED
|
Binary file (5.4 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
|
| 5 |
+
app = FastAPI()
|
| 6 |
+
|
| 7 |
+
# Define the request body model
|
| 8 |
+
class Message(BaseModel):
|
| 9 |
+
role: str
|
| 10 |
+
content: str
|
| 11 |
+
|
| 12 |
+
@app.post("/chat")
|
| 13 |
+
async def chat(messages: List[Message]):
|
| 14 |
+
# Convert Pydantic objects to dict
|
| 15 |
+
messages_data = [msg.dict() for msg in messages]
|
| 16 |
+
|
| 17 |
+
# Example: send to model (here just a placeholder)
|
| 18 |
+
response_text = f"Received {len(messages_data)} messages. First message: {messages_data[0]['content']}"
|
| 19 |
+
|
| 20 |
+
return {"status": "success", "response": response_text}
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
sentence_transformers
|
| 4 |
+
google.generativeai
|
| 5 |
+
openai
|
| 6 |
+
duckdb
|
| 7 |
+
tqdm
|
| 8 |
+
langchain
|
| 9 |
+
langchain-community
|
| 10 |
+
pypdf
|
| 11 |
+
pandas
|
| 12 |
+
numpy
|