MRasheq commited on
Commit
0f3adc8
·
1 Parent(s): 749a5fb

Second Commit

Browse files
Files changed (2) hide show
  1. app.py +136 -72
  2. requirements.txt +23 -1
app.py CHANGED
@@ -1,82 +1,146 @@
1
- import gradio as gr
 
 
 
 
 
 
 
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
4
 
5
- # Load model and tokenizer
6
- model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
7
- tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
8
-
9
- # Move model to GPU if available
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model = model.to(device)
12
-
13
- def generate_response(
14
- message,
15
- history: list[tuple[str, str]],
16
- system_message,
17
- max_tokens,
18
- temperature,
19
- top_p,
20
- ):
21
- # Format conversation history
22
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- for user_msg, assistant_msg in history:
25
- if user_msg:
26
- messages.append({"role": "user", "content": user_msg})
27
- if assistant_msg:
28
- messages.append({"role": "assistant", "content": assistant_msg})
29
 
30
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Convert messages to model input format
33
- conversation = ""
34
- for msg in messages:
35
- role = msg["role"]
36
- content = msg["content"]
37
- if role == "system":
38
- conversation += f"System: {content}\n"
39
- elif role == "user":
40
- conversation += f"User: {content}\n"
41
- elif role == "assistant":
42
- conversation += f"Assistant: {content}\n"
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Tokenize input
45
- inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Generate response
48
- with torch.no_grad():
49
- outputs = model.generate(
50
- **inputs,
51
- max_new_tokens=max_tokens,
52
- do_sample=True,
53
- temperature=temperature,
54
- top_p=top_p,
55
- pad_token_id=tokenizer.eos_token_id,
56
- )
57
 
58
- # Decode and return response
59
- response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
60
- return response.strip()
61
-
62
- # Create Gradio interface
63
- demo = gr.ChatInterface(
64
- generate_response,
65
- additional_inputs=[
66
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
67
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
68
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
69
- gr.Slider(
70
- minimum=0.1,
71
- maximum=1.0,
72
- value=0.95,
73
- step=0.05,
74
- label="Top-p (nucleus sampling)",
75
- ),
76
- ],
77
- title="DeepSeek-R1 Chat",
78
- description="A chat interface powered by the DeepSeek-R1 language model.",
79
- )
80
 
81
  if __name__ == "__main__":
82
- demo.launch()
 
1
+ import os
2
+ from typing import List
3
+ from langchain.vectorstores.pgvector import PGVector
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chains import RetrievalQA
8
+ from sentence_transformers import SentenceTransformer
9
  import torch
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
11
+ import psycopg2
12
+ from psycopg2.extras import execute_values
13
+ import numpy as np
14
 
15
+ class RAGPipeline:
16
+ def __init__(self):
17
+ # Database connection string
18
+ self.connection_string = "postgresql://Data_owner:JsxygNDC15IO@ep-cool-hill-a5k13m05-pooler.us-east-2.aws.neon.tech/Data?sslmode=require"
19
+
20
+ # Initialize embedding model
21
+ self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
22
+
23
+ # Initialize LLM
24
+ self.llm_model = AutoModelForCausalLM.from_pretrained(
25
+ "deepseek-ai/DeepSeek-R1",
26
+ trust_remote_code=True,
27
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
28
+ )
29
+ self.llm_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
30
+
31
+ # Move model to GPU if available
32
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ self.llm_model = self.llm_model.to(self.device)
34
+
35
+ # Initialize prompt template
36
+ self.prompt_template = """
37
+ Use the following context to answer the question. If you cannot answer the question based on the context, say so.
38
+
39
+ Context: {context}
40
+
41
+ Question: {question}
42
+
43
+ Answer: Let me help you with that.
44
+ """
45
 
46
+ def generate_embedding(self, text: str) -> List[float]:
47
+ """Generate embeddings for input text."""
48
+ embedding = self.embedding_model.encode(text)
49
+ return embedding.tolist()
 
50
 
51
+ def similarity_search(self, query_embedding: List[float], top_k: int = 3) -> List[dict]:
52
+ """Perform similarity search in PostgreSQL using vector comparison."""
53
+ with psycopg2.connect(self.connection_string) as conn:
54
+ with conn.cursor() as cur:
55
+ # Convert query embedding to PostgreSQL array format
56
+ embedding_array = np.array(query_embedding)
57
+
58
+ # Perform similarity search using cosine distance
59
+ query = """
60
+ SELECT text, title, url,
61
+ 1 - (vector <=> %s) as similarity
62
+ FROM bents
63
+ ORDER BY vector <=> %s
64
+ LIMIT %s;
65
+ """
66
+ cur.execute(query, (embedding_array.tolist(), embedding_array.tolist(), top_k))
67
+ results = cur.fetchall()
68
+
69
+ # Format results
70
+ similar_docs = [
71
+ {
72
+ 'text': row[0],
73
+ 'title': row[1],
74
+ 'url': row[2],
75
+ 'similarity': row[3]
76
+ }
77
+ for row in results
78
+ ]
79
+
80
+ return similar_docs
81
 
82
+ def generate_response(self, query: str, context: str) -> str:
83
+ """Generate response using the LLM."""
84
+ # Format prompt with context and query
85
+ prompt = self.prompt_template.format(context=context, question=query)
86
+
87
+ # Tokenize input
88
+ inputs = self.llm_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(self.device)
89
+
90
+ # Generate response
91
+ with torch.no_grad():
92
+ outputs = self.llm_model.generate(
93
+ **inputs,
94
+ max_new_tokens=512,
95
+ do_sample=True,
96
+ temperature=0.7,
97
+ top_p=0.95,
98
+ pad_token_id=self.llm_tokenizer.eos_token_id,
99
+ )
100
+
101
+ # Decode and return response
102
+ response = self.llm_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
103
+ return response.strip()
104
 
105
+ def process_query(self, query: str) -> dict:
106
+ """Process user query through the complete RAG pipeline."""
107
+ # Generate embedding for query
108
+ query_embedding = self.generate_embedding(query)
109
+
110
+ # Perform similarity search
111
+ similar_docs = self.similarity_search(query_embedding)
112
+
113
+ # Combine relevant context
114
+ context = "\n".join([doc['text'] for doc in similar_docs])
115
+
116
+ # Generate response
117
+ response = self.generate_response(query, context)
118
+
119
+ return {
120
+ 'query': query,
121
+ 'response': response,
122
+ 'similar_documents': similar_docs
123
+ }
124
+
125
+ # Example usage
126
+ def main():
127
+ # Initialize RAG pipeline
128
+ rag = RAGPipeline()
129
 
130
+ # Example query
131
+ query = "What are some woodworking tools that every woodworker should have?"
 
 
 
 
 
 
 
 
132
 
133
+ # Process query
134
+ result = rag.process_query(query)
135
+
136
+ # Print results
137
+ print("\nQuery:", result['query'])
138
+ print("\nResponse:", result['response'])
139
+ print("\nRelevant Documents:")
140
+ for doc in result['similar_documents']:
141
+ print(f"\nTitle: {doc['title']}")
142
+ print(f"URL: {doc['url']}")
143
+ print(f"Similarity Score: {doc['similarity']:.4f}")
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  if __name__ == "__main__":
146
+ main()
requirements.txt CHANGED
@@ -1 +1,23 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ # Core dependencies
3
+ langchain>=0.1.0
4
+ torch>=2.0.0
5
+ transformers>=4.36.0
6
+ sentence-transformers>=2.2.2
7
+
8
+ # Database
9
+ psycopg2-binary>=2.9.9
10
+ pgvector>=0.2.3
11
+
12
+ # Data processing
13
+ numpy>=1.24.0
14
+ pandas>=2.0.0
15
+
16
+ # Deep learning
17
+ accelerate>=0.24.0
18
+ bitsandbytes>=0.41.0
19
+ safetensors>=0.4.0
20
+
21
+ # Utilities
22
+ tqdm>=4.65.0
23
+ python-dotenv>=1.0.0