Michtiii commited on
Commit
45126e4
·
verified ·
1 Parent(s): 67f6c5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -136
app.py CHANGED
@@ -1,146 +1,194 @@
1
- # ==============================
2
- # 🚖 Uber Driver Recommendation System
3
- # Hugging Face Ready | Single File | Clean Version
4
- # ==============================
5
-
6
  import numpy as np
7
- import pandas as pd
8
  import gradio as gr
9
- from sklearn.model_selection import train_test_split
10
- from sklearn.ensemble import RandomForestRegressor
11
-
12
- # ------------------------------
13
- # 1. Generate Synthetic Data
14
- # ------------------------------
15
- def generate_data(n=1000):
16
- np.random.seed(42)
17
- data = pd.DataFrame({
18
- "pickup_distance": np.random.uniform(0.5, 10, n),
19
- "trip_distance": np.random.uniform(1, 20, n),
20
- "fare": np.random.uniform(50, 500, n),
21
- "surge": np.random.choice([1, 1.5, 2], n),
22
- "rating": np.random.uniform(3, 5, n)
23
- })
24
- return data
25
-
26
- # ------------------------------
27
- # 2. Feature Engineering
28
- # ------------------------------
29
- def feature_engineering(df):
30
- df = df.copy()
31
- df["earning_per_km"] = df["fare"] / (df["trip_distance"] + 1)
32
- df["efficiency"] = (df["fare"] * df["surge"]) / (df["pickup_distance"] + df["trip_distance"])
33
- return df
34
-
35
- # ------------------------------
36
- # 3. Train Model (Lightweight)
37
- # ------------------------------
38
- def train_model():
39
- data = generate_data()
40
- data = feature_engineering(data)
41
-
42
- data["reward"] = data["efficiency"]
43
-
44
- X = data.drop("reward", axis=1)
45
- y = data["reward"]
46
-
47
- X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2)
48
-
49
- model = RandomForestRegressor(n_estimators=50, random_state=42)
50
- model.fit(X_train, y_train)
51
-
52
- return model
53
-
54
- model = train_model()
55
-
56
- # ------------------------------
57
- # 4. Generate Ride Options
58
- # ------------------------------
59
- def generate_rides(base_pickup, base_trip, base_fare, base_surge):
60
- rides = []
61
- for _ in range(5):
62
- rides.append({
63
- "pickup_distance": max(0.5, base_pickup + np.random.uniform(-1, 1)),
64
- "trip_distance": max(1, base_trip + np.random.uniform(-2, 2)),
65
- "fare": max(50, base_fare + np.random.uniform(-50, 50)),
66
- "surge": min(2, max(1, base_surge + np.random.choice([0, 0.5])))
67
- })
68
- return pd.DataFrame(rides)
69
-
70
- # ------------------------------
71
- # 5. Explainability
72
- # ------------------------------
73
- def explain(row):
74
- reasons = []
75
-
76
- if row["fare"] > 300:
77
- reasons.append("High Fare")
78
- if row["pickup_distance"] < 3:
79
- reasons.append("Close Pickup")
80
- if row["surge"] > 1:
81
- reasons.append("Surge Benefit")
82
- if row["trip_distance"] > 10:
83
- reasons.append("Long Trip")
84
-
85
- return ", ".join(reasons) if reasons else "Balanced Ride"
86
-
87
- # ------------------------------
88
- # 6. Recommendation Engine
89
- # ------------------------------
90
- def recommend(pickup, trip, fare, surge):
91
- rides = generate_rides(pickup, trip, fare, surge)
92
- rides = feature_engineering(rides)
93
-
94
- scores = model.predict(rides)
95
- rides["score"] = scores
96
-
97
- rides = rides.sort_values(by="score", ascending=False).head(3)
98
-
99
- # Format Output
100
- output = ""
101
- for i, row in rides.iterrows():
102
- explanation = explain(row)
103
-
104
- output += (
105
- f"🚖 Ride Option\n"
106
- f"Score: {round(row['score'],2)}\n"
107
- f"Fare: ₹{round(row['fare'],2)}\n"
108
- f"Pickup Distance: {round(row['pickup_distance'],2)} km\n"
109
- f"Trip Distance: {round(row['trip_distance'],2)} km\n"
110
- f"Surge: {row['surge']}\n"
111
- f"Why: {explanation}\n"
112
- f"-----------------------------\n"
113
- )
114
-
115
- return output
116
-
117
- # ------------------------------
118
- # 7. Gradio UI (Clean & Minimal)
119
- # ------------------------------
120
- with gr.Blocks() as demo:
121
- gr.Markdown("## 🚖 Uber Driver Recommendation System")
122
- gr.Markdown("Smart ride selection based on AI scoring")
123
 
124
- with gr.Row():
125
- pickup = gr.Slider(0.5, 10, value=2, label="Pickup Distance (km)")
126
- trip = gr.Slider(1, 20, value=5, label="Trip Distance (km)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- with gr.Row():
129
- fare = gr.Slider(50, 500, value=200, label="Fare (₹)")
130
- surge = gr.Slider(1, 2, value=1, step=0.5, label="Surge")
131
 
132
- btn = gr.Button("Get Recommendation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- output = gr.Textbox(label="Top Ride Recommendations", lines=15)
 
135
 
136
- btn.click(
137
- fn=recommend,
138
- inputs=[pickup, trip, fare, surge],
139
- outputs=output
140
- )
141
 
142
- # ------------------------------
143
- # 8. Launch
144
- # ------------------------------
145
  if __name__ == "__main__":
146
  demo.launch()
 
1
+ import os
2
+ import faiss
 
 
 
3
  import numpy as np
 
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ from sentence_transformers import SentenceTransformer
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
+ from PyPDF2 import PdfReader
9
+
10
+ # -----------------------------
11
+ # CONFIG
12
+ # -----------------------------
13
+ DATA_PATH = "docs"
14
+ TOP_K = 3
15
+ LLM_MODEL = "google/flan-t5-base"
16
+
17
+ # -----------------------------
18
+ # LOAD MODELS
19
+ # -----------------------------
20
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
23
+ llm_model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)
24
+
25
+ # -----------------------------
26
+ # FILE LOADER
27
+ # -----------------------------
28
+ def read_file(path):
29
+ if path.endswith(".txt") or path.endswith(".md"):
30
+ with open(path, "r", encoding="utf-8") as f:
31
+ return f.read()
32
+ elif path.endswith(".pdf"):
33
+ reader = PdfReader(path)
34
+ text = ""
35
+ for page in reader.pages:
36
+ text += page.extract_text() or ""
37
+ return text
38
+ return ""
39
+
40
+ def load_docs(folder):
41
+ texts = []
42
+ if not os.path.exists(folder):
43
+ return texts
44
+
45
+ for file in os.listdir(folder):
46
+ path = os.path.join(folder, file)
47
+ try:
48
+ txt = read_file(path)
49
+ if txt.strip():
50
+ texts.append(txt)
51
+ except:
52
+ continue
53
+ return texts
54
+
55
+ # -----------------------------
56
+ # CHUNKING
57
+ # -----------------------------
58
+ def chunk_text(text, size=300, overlap=50):
59
+ words = text.split()
60
+ return [" ".join(words[i:i + size]) for i in range(0, len(words), size - overlap)]
61
+
62
+ # -----------------------------
63
+ # BUILD VECTOR DB
64
+ # -----------------------------
65
+ def build_index(docs):
66
+ chunks = []
67
+ for doc in docs:
68
+ chunks.extend(chunk_text(doc))
69
+
70
+ if not chunks:
71
+ return None, []
72
+
73
+ embeddings = embedding_model.encode(chunks)
74
+ dim = embeddings.shape[1]
75
+
76
+ index = faiss.IndexFlatL2(dim)
77
+ index.add(np.array(embeddings))
78
+
79
+ return index, chunks
80
+
81
+ # -----------------------------
82
+ # RETRIEVE
83
+ # -----------------------------
84
+ def retrieve(query, index, chunks, k=TOP_K):
85
+ q_embed = embedding_model.encode([query])
86
+ D, I = index.search(np.array(q_embed), k)
87
+ return [chunks[i] for i in I[0]]
88
+
89
+ # -----------------------------
90
+ # GENERATE ANSWER (WITH MEMORY)
91
+ # -----------------------------
92
+ def generate_answer(query, contexts, history):
93
+ context = "\n\n".join(contexts)
94
+
95
+ history_text = ""
96
+ for h in history[-6:]:
97
+ history_text += f"{h['role']}: {h['content']}\n"
98
+
99
+ prompt = f"""
100
+ You are an expert AI/ML assistant.
101
+
102
+ Conversation:
103
+ {history_text}
104
+
105
+ Context:
106
+ {context}
107
+
108
+ Question:
109
+ {query}
110
+
111
+ Answer clearly:
112
+ """
113
+
114
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
115
+ outputs = llm_model.generate(**inputs, max_new_tokens=200)
116
+
117
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
118
+
119
+ # -----------------------------
120
+ # TOOL RECOMMENDER (AGENT MODE)
121
+ # -----------------------------
122
+ def tool_recommender(query):
123
+ prompt = f"""
124
+ You are an AI architect.
125
+
126
+ Suggest best AI/ML tools for:
127
+
128
+ {query}
129
+
130
+ Give:
131
+ - Tools
132
+ - Why
133
+ - Use cases
134
+ """
135
+
136
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
137
+ outputs = llm_model.generate(**inputs, max_new_tokens=150)
138
+
139
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
140
+
141
+ # -----------------------------
142
+ # INIT
143
+ # -----------------------------
144
+ docs = load_docs(DATA_PATH)
145
+ index, chunks = build_index(docs)
146
+
147
+ # -----------------------------
148
+ # MAIN CHAT PIPELINE
149
+ # -----------------------------
150
+ def rag_chat(query, history):
151
+ history = history or []
152
+
153
+ if index is None:
154
+ history.append({"role": "user", "content": query})
155
+ history.append({"role": "assistant", "content": "No documents found"})
156
+ return history
157
+
158
+ retrieved = retrieve(query, index, chunks)
159
+
160
+ # Agent decision
161
+ if "recommend" in query.lower() or "best tool" in query.lower():
162
+ answer = tool_recommender(query)
163
+ else:
164
+ answer = generate_answer(query, retrieved, history)
165
 
166
+ history.append({"role": "user", "content": query})
167
+ history.append({"role": "assistant", "content": answer})
 
168
 
169
+ return history
170
+
171
+ # -----------------------------
172
+ # UI HANDLER
173
+ # -----------------------------
174
+ def respond(message, chat_history):
175
+ chat_history = chat_history or []
176
+ updated_history = rag_chat(message, chat_history)
177
+ return "", updated_history
178
+
179
+ # -----------------------------
180
+ # UI (CHATGPT STYLE)
181
+ # -----------------------------
182
+ with gr.Blocks() as demo:
183
+ gr.Markdown("## AI/ML Conversational RAG + Tool Recommender")
184
 
185
+ chatbot = gr.Chatbot(type="messages")
186
+ msg = gr.Textbox(placeholder="Ask about AI tools, ML, companies...")
187
 
188
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
 
 
 
 
189
 
190
+ # -----------------------------
191
+ # RUN
192
+ # -----------------------------
193
  if __name__ == "__main__":
194
  demo.launch()