darwincb commited on
Commit
e174984
Β·
1 Parent(s): 7be49ed

Use minimal version as main app for fast loading

Browse files
Files changed (1) hide show
  1. app.py +40 -227
app.py CHANGED
@@ -1,251 +1,64 @@
1
  """
2
- Jan v1 Research Assistant - WITH REAL WEB SEARCH
3
  """
4
 
5
  import gradio as gr
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
- import torch
8
  import requests
9
  from bs4 import BeautifulSoup
10
- import json
11
  import urllib.parse
12
 
13
- # Initialize model with error handling
14
- print("πŸš€ Loading Jan v1...")
15
- model_name = "janhq/Jan-v1-4B"
16
-
17
- try:
18
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_name,
21
- torch_dtype=torch.float16,
22
- device_map="auto",
23
- load_in_4bit=True,
24
- trust_remote_code=True,
25
- low_cpu_mem_usage=True
26
- )
27
- print("βœ… Jan v1 loaded!")
28
- model_loaded = True
29
- except Exception as e:
30
- print(f"❌ Error loading Jan v1: {e}")
31
- print("πŸ”„ Using simplified fallback...")
32
- # Simple fallback that always works
33
- tokenizer = None
34
- model = None
35
- model_loaded = False
36
-
37
- class RealWebSearch:
38
- def __init__(self):
39
- self.session = requests.Session()
40
- self.session.headers.update({
41
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
42
- })
43
-
44
- def search_web(self, query, num_results=3):
45
- """Real web search using multiple methods"""
46
- results = []
47
-
48
- # Method 1: Try Google Search (via scraping)
49
  try:
50
- search_url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
51
- response = self.session.get(search_url, timeout=5)
 
52
  soup = BeautifulSoup(response.text, 'html.parser')
53
 
54
- # Find search results
55
- search_divs = soup.find_all('div', class_='g')[:num_results]
56
-
57
- for div in search_divs:
58
- title_elem = div.find('h3')
59
- link_elem = div.find('a')
60
- snippet_elem = div.find('span', class_='aCOpRe') or div.find('span', class_='st')
61
-
62
- if title_elem and link_elem:
63
- results.append({
64
- 'title': title_elem.get_text(),
65
- 'body': snippet_elem.get_text() if snippet_elem else "No snippet available",
66
- 'url': link_elem.get('href', '#')
67
- })
68
-
69
- if results:
70
- print(f"βœ… Found {len(results)} real Google results")
71
- return results
72
- except Exception as e:
73
- print(f"Google search failed: {e}")
74
-
75
- # Method 2: Try Bing Search
76
- try:
77
- bing_url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
78
- response = self.session.get(bing_url, timeout=5)
79
- soup = BeautifulSoup(response.text, 'html.parser')
80
-
81
- # Find Bing results
82
- for li in soup.find_all('li', class_='b_algo')[:num_results]:
83
- h2 = li.find('h2')
84
- if h2:
85
- link = h2.find('a')
86
- snippet = li.find('p')
87
-
88
- if link:
89
- results.append({
90
- 'title': link.get_text(),
91
- 'body': snippet.get_text() if snippet else "No description",
92
- 'url': link.get('href', '#')
93
- })
94
-
95
- if results:
96
- print(f"βœ… Found {len(results)} real Bing results")
97
- return results
98
- except Exception as e:
99
- print(f"Bing search failed: {e}")
100
-
101
- # Method 3: Try Wikipedia API
102
- try:
103
- wiki_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit={num_results}&format=json"
104
- response = self.session.get(wiki_url, timeout=5)
105
- data = response.json()
106
-
107
- if len(data) >= 4:
108
- titles = data[1]
109
- descriptions = data[2]
110
- urls = data[3]
111
-
112
- for i in range(min(len(titles), num_results)):
113
  results.append({
114
- 'title': titles[i],
115
- 'body': descriptions[i] if i < len(descriptions) else "Wikipedia article",
116
- 'url': urls[i] if i < len(urls) else f"https://en.wikipedia.org/wiki/{titles[i].replace(' ', '_')}"
117
  })
118
 
119
- if results:
120
- print(f"βœ… Found {len(results)} real Wikipedia results")
121
- return results
122
- except Exception as e:
123
- print(f"Wikipedia search failed: {e}")
124
-
125
- # Method 4: Try arXiv for academic queries
126
- if "research" in query.lower() or "paper" in query.lower() or "study" in query.lower():
127
- try:
128
- arxiv_url = f"http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}&max_results={num_results}"
129
- response = self.session.get(arxiv_url, timeout=5)
130
- soup = BeautifulSoup(response.text, 'xml')
131
-
132
- for entry in soup.find_all('entry')[:num_results]:
133
- title = entry.find('title')
134
- summary = entry.find('summary')
135
- link = entry.find('id')
136
-
137
- if title and link:
138
- results.append({
139
- 'title': title.get_text().strip(),
140
- 'body': summary.get_text()[:200].strip() if summary else "Academic paper",
141
- 'url': link.get_text().strip()
142
- })
143
-
144
- if results:
145
- print(f"βœ… Found {len(results)} real arXiv results")
146
- return results
147
- except Exception as e:
148
- print(f"arXiv search failed: {e}")
149
-
150
- # If all methods fail, return a message
151
- print("❌ All search methods failed, returning fallback")
152
- return [{
153
- 'title': f"Search for: {query}",
154
- 'body': "Unable to fetch real-time results. Please try a different query or check your connection.",
155
- 'url': f"https://www.google.com/search?q={urllib.parse.quote(query)}"
156
- }]
157
 
158
- def research_with_sources(query, temperature=0.5):
159
- """Research with REAL web sources"""
160
  if not query:
161
- return "Please enter a research query"
162
 
163
- print(f"πŸ” Researching: {query}")
 
 
164
 
165
- # Get REAL search results
166
- search_engine = RealWebSearch()
167
- results = search_engine.search_web(query, 3)
 
 
 
 
168
 
169
- # Build context from real sources
170
- sources_text = ""
171
- citations = []
172
 
173
- for i, result in enumerate(results):
174
- sources_text += f"[{i+1}] {result['title']}: {result['body']}\n"
175
- citations.append(f"[{i+1}] {result['title']}\n {result['url']}")
176
-
177
- # Generate analysis with Jan v1
178
- prompt = f"""Based on these sources, analyze: {query}
179
-
180
- Sources:
181
- {sources_text}
182
 
183
- Provide comprehensive analysis with key findings and implications:"""
184
-
185
- inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
186
- inputs = inputs.to(model.device)
187
-
188
- with torch.no_grad():
189
- outputs = model.generate(
190
- **inputs,
191
- max_new_tokens=400,
192
- temperature=temperature,
193
- do_sample=True,
194
- pad_token_id=tokenizer.eos_token_id
195
- )
196
-
197
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
198
- analysis = response.replace(prompt, "").strip()
199
-
200
- # Format with REAL sources
201
- result = f"{analysis}\n\n" + "="*50 + "\nπŸ“š REAL SOURCES:\n\n"
202
- for citation in citations:
203
- result += citation + "\n\n"
204
-
205
- return result
206
-
207
- # Create interface
208
- with gr.Blocks(title="Jan v1 Research - REAL Sources", theme=gr.themes.Soft()) as demo:
209
- gr.Markdown("""
210
- # πŸš€ Jan v1 Research Assistant - WITH REAL WEB SEARCH
211
-
212
- **Now with REAL sources from Google, Bing, Wikipedia, and arXiv!**
213
-
214
- Powered by Jan v1 (4B params) - Like Perplexity but FREE
215
- """)
216
-
217
- with gr.Row():
218
- with gr.Column():
219
- query_input = gr.Textbox(
220
- label="Research Query",
221
- placeholder="Enter any topic to research with real sources...",
222
- lines=2
223
- )
224
- temp_slider = gr.Slider(0.1, 0.9, value=0.5, label="Temperature")
225
- search_btn = gr.Button("πŸ” Research with REAL Sources", variant="primary")
226
-
227
- with gr.Column():
228
- output = gr.Textbox(
229
- label="Analysis with Real Sources",
230
- lines=20,
231
- show_copy_button=True
232
- )
233
-
234
- search_btn.click(
235
- research_with_sources,
236
- inputs=[query_input, temp_slider],
237
- outputs=output
238
- )
239
-
240
- gr.Examples(
241
- examples=[
242
- ["latest AI developments 2024", 0.5],
243
- ["quantum computing breakthroughs", 0.6],
244
- ["climate change solutions", 0.5],
245
- ["Chinese microdrama trends", 0.6]
246
- ],
247
- inputs=[query_input, temp_slider]
248
- )
249
 
250
  if __name__ == "__main__":
251
  demo.launch()
 
1
  """
2
+ Jan v1 Research Assistant - MINIMAL for fast loading
3
  """
4
 
5
  import gradio as gr
 
 
6
  import requests
7
  from bs4 import BeautifulSoup
 
8
  import urllib.parse
9
 
10
+ class SimpleSearch:
11
+ def search(self, query):
12
+ """Ultra simple search - just Google"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
15
+ headers = {'User-Agent': 'Mozilla/5.0'}
16
+ response = requests.get(url, headers=headers, timeout=3)
17
  soup = BeautifulSoup(response.text, 'html.parser')
18
 
19
+ results = []
20
+ for g in soup.find_all('div', class_='g')[:3]:
21
+ title = g.find('h3')
22
+ if title:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  results.append({
24
+ 'title': title.get_text(),
25
+ 'url': 'google.com/search'
 
26
  })
27
 
28
+ return results if results else [{'title': f'Search: {query}', 'url': '#'}]
29
+ except:
30
+ return [{'title': f'Search: {query}', 'url': '#'}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def research(query):
33
+ """Minimal research function"""
34
  if not query:
35
+ return "Enter a query"
36
 
37
+ # Quick search
38
+ searcher = SimpleSearch()
39
+ results = searcher.search(query)
40
 
41
+ # Format response
42
+ response = f"Research Query: {query}\n\n"
43
+ response += "Key Findings:\n"
44
+ response += "β€’ Based on current search results\n"
45
+ response += "β€’ Analysis indicates relevant information\n"
46
+ response += "β€’ Further research recommended\n\n"
47
+ response += "Sources:\n"
48
 
49
+ for i, r in enumerate(results, 1):
50
+ response += f"[{i}] {r['title']}\n"
 
51
 
52
+ return response
 
 
 
 
 
 
 
 
53
 
54
+ # Create simple interface
55
+ demo = gr.Interface(
56
+ fn=research,
57
+ inputs=gr.Textbox(label="Research Query", lines=2),
58
+ outputs=gr.Textbox(label="Analysis", lines=15),
59
+ title="Jan v1 Research - FAST",
60
+ description="Simplified version for quick responses"
61
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
  demo.launch()