File size: 2,792 Bytes
4e7c3a8
 
 
78362e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6d62fe
 
259a6cb
 
78362e2
 
 
 
 
 
 
 
 
 
f25320a
78362e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e7c3a8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

print('bdldjfld')

from flask import Flask, render_template, jsonify, request
from src.helper import download_embeddings
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema import Document
from dotenv import load_dotenv
from src.prompt import *
import os
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline

# Cleaning function for model output
import re

def clean_text(text):
    text = re.sub(r'-\n', '', text)
    text = text.replace('\n', ' ')
    text = re.sub(r' +', ' ', text)
    text = re.sub(r'/C\d+', '', text)
    # Remove lines with 'Reproduced by permission' or 'GALE ENCYCLOPEDIA OF MEDICINE'
    text = re.sub(r'Reproduced by permission[^\.\n]*[\.]?', '', text, flags=re.IGNORECASE)
    text = re.sub(r'GALE ENCYCLOPEDIA OF MEDICINE[^\.\n]*[\.]?', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Researchers,? Inc[^\.\n]*[\.]?', '', text, flags=re.IGNORECASE)
    return text.strip()


app = Flask(__name__)

# Add a greeting document to the Pinecone index



load_dotenv()

Pinecone_API_KEY = os.getenv("Pinecone_API_KEY")
os.environ["Pinecone_API_KEY"] = Pinecone_API_KEY
if not Pinecone_API_KEY:
    raise ValueError("Missing Pinecone API key. Set it in Hugging Face secrets.")


embeddings = download_embeddings()

index_name = "doctor-ai"
docsearch=PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)


retriever= docsearch.as_retriever(search_type="similarity",search_kwargs={"k": 3})


generator = pipeline(    "text2text-generation", model="google/flan-t5-large", device=-1)
chat_model = HuggingFacePipeline(pipeline=generator)
#prompt= ChatPromptTemplate.from_messages(
#    [
#        ("system", system_prompt),
#        ("human", "{input}")
#    ]
#)


prompt = ChatPromptTemplate.from_template(system_prompt)
question_answer_chain = create_stuff_documents_chain(chat_model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

#question_answer_chain = create_stuff_documents_chain(chat_model, prompt)
#rag_chain = create_retrieval_chain(retriever, question_answer_chain)


@app.route('/')
def index():
    return render_template('chat.html')

@app.route('/get', methods=["GET","POST"])
def chat():
    msg=request.form["msg"]
    input=msg
    print(input)
    response = rag_chain.invoke({"input": input})
    cleaned_answer = clean_text(response["answer"])
    print("Response:", cleaned_answer)
    return str(cleaned_answer)


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860, debug=True)