darwincb commited on
Commit
3fcfd23
·
1 Parent(s): 4b392a8

Simplify to CPU version for initial testing

Browse files
Files changed (6) hide show
  1. MANUAL_UPLOAD.md +38 -0
  2. app-simple.py +186 -0
  3. app.py +93 -313
  4. push-to-hf.sh +35 -0
  5. requirements-simple.txt +5 -0
  6. requirements.txt +1 -6
MANUAL_UPLOAD.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Instrucciones para subir Jan v1 manualmente
2
+
3
+ Ya que el token no tiene permisos de escritura, puedes:
4
+
5
+ ## Opción 1: Copiar y pegar directamente en Hugging Face
6
+
7
+ 1. Ve a: https://huggingface.co/spaces/darwincb/jan-v1-research/tree/main
8
+ 2. Click en "Files and versions"
9
+ 3. Click en "app.py"
10
+ 4. Click en el ícono de lápiz (Edit)
11
+ 5. Borra todo y pega el contenido del archivo: `/Users/darwinborges/jan-v1-research/app.py`
12
+ 6. Commit message: "Add Jan v1 Research Assistant"
13
+ 7. Click "Commit changes to main"
14
+
15
+ 8. Vuelve a "Files and versions"
16
+ 9. Click en "+ Add file" > "Create a new file"
17
+ 10. Nombre: `requirements.txt`
18
+ 11. Pega el contenido del archivo: `/Users/darwinborges/jan-v1-research/requirements.txt`
19
+ 12. Click "Commit new file to main"
20
+
21
+ ## Opción 2: Obtener token con permisos de escritura
22
+
23
+ 1. Ve a: https://huggingface.co/settings/tokens
24
+ 2. Crea nuevo token con permisos "write"
25
+ 3. Ejecuta:
26
+ ```bash
27
+ cd /Users/darwinborges/jan-v1-research
28
+ huggingface-cli login --token TU_NUEVO_TOKEN
29
+ git push origin main
30
+ ```
31
+
32
+ ## IMPORTANTE después de subir:
33
+
34
+ ⚠️ Ve a Settings del Space y selecciona:
35
+ - Hardware: **GPU T4 medium**
36
+ - Sleep time: 1 hour (para ahorrar costos)
37
+
38
+ El modelo Jan v1 (4B params) NO funcionará sin GPU.
app-simple.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Jan v1 Research Assistant - Simplified Version for CPU
3
+ Works without GPU - uses API approach
4
+ """
5
+
6
+ import gradio as gr
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ import json
10
+ from datetime import datetime
11
+
12
+ def scrape_url(url: str) -> str:
13
+ """Scrape and extract text from URL"""
14
+ try:
15
+ headers = {
16
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
17
+ }
18
+ response = requests.get(url, headers=headers, timeout=10)
19
+ soup = BeautifulSoup(response.content, 'html.parser')
20
+
21
+ # Remove script and style elements
22
+ for script in soup(["script", "style"]):
23
+ script.decompose()
24
+
25
+ text = soup.get_text()
26
+ lines = (line.strip() for line in text.splitlines())
27
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
28
+ text = ' '.join(chunk for chunk in chunks if chunk)
29
+
30
+ return text[:4000] # Limit to 4000 chars
31
+ except Exception as e:
32
+ return f"Error scraping URL: {str(e)}"
33
+
34
+ def research_assistant_simple(query: str, context: str = "") -> str:
35
+ """
36
+ Simplified research assistant using Hugging Face Inference API
37
+ """
38
+ # For now, return a structured analysis template
39
+ # This can be replaced with actual API calls to Jan v1 when available
40
+
41
+ if context.startswith('http'):
42
+ context = scrape_url(context)
43
+
44
+ analysis = f"""
45
+ # Research Analysis
46
+
47
+ ## Query
48
+ {query}
49
+
50
+ ## Context Summary
51
+ {context[:500] if context else "No context provided"}...
52
+
53
+ ## Analysis Framework
54
+
55
+ ### 1. Key Findings
56
+ - The context provides information about the topic
57
+ - Further analysis would require examining specific aspects
58
+ - Consider multiple perspectives on this subject
59
+
60
+ ### 2. Critical Questions
61
+ - What are the primary assumptions?
62
+ - What evidence supports the main claims?
63
+ - What alternative viewpoints exist?
64
+
65
+ ### 3. Research Directions
66
+ - Investigate primary sources
67
+ - Compare with related studies
68
+ - Examine historical context
69
+
70
+ ### 4. Limitations
71
+ - Limited context provided
72
+ - Single source analysis
73
+ - Requires deeper investigation
74
+
75
+ ### 5. Next Steps
76
+ - Gather additional sources
77
+ - Conduct comparative analysis
78
+ - Validate key claims
79
+
80
+ ---
81
+ *Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.*
82
+ """
83
+
84
+ return analysis
85
+
86
+ # Create Gradio interface
87
+ with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
88
+ gr.Markdown("""
89
+ # 🔬 Jan v1 Research Assistant (Simplified Version)
90
+
91
+ This is a CPU-compatible version with limited features.
92
+ For full Jan v1 (4B params) capabilities, GPU hardware is required.
93
+
94
+ ### Available Features:
95
+ - 🌐 Web scraping and text extraction
96
+ - 📝 Structured research framework
97
+ - 🔍 Context analysis
98
+ """)
99
+
100
+ with gr.Tab("Research Analysis"):
101
+ with gr.Row():
102
+ with gr.Column():
103
+ query = gr.Textbox(
104
+ label="Research Query",
105
+ placeholder="What would you like to research?",
106
+ lines=2
107
+ )
108
+ context = gr.Textbox(
109
+ label="Context (paste text or URL)",
110
+ placeholder="Paste article text or enter URL to analyze",
111
+ lines=5
112
+ )
113
+ analyze_btn = gr.Button("🔍 Analyze", variant="primary")
114
+
115
+ with gr.Column():
116
+ output = gr.Textbox(
117
+ label="Analysis Results",
118
+ lines=15
119
+ )
120
+
121
+ analyze_btn.click(
122
+ research_assistant_simple,
123
+ inputs=[query, context],
124
+ outputs=output
125
+ )
126
+
127
+ with gr.Tab("Web Scraper"):
128
+ with gr.Row():
129
+ with gr.Column():
130
+ url_input = gr.Textbox(
131
+ label="URL to Scrape",
132
+ placeholder="https://example.com/article",
133
+ lines=1
134
+ )
135
+ scrape_btn = gr.Button("🌐 Extract Text", variant="primary")
136
+
137
+ with gr.Column():
138
+ scrape_output = gr.Textbox(
139
+ label="Extracted Text",
140
+ lines=10
141
+ )
142
+
143
+ scrape_btn.click(
144
+ scrape_url,
145
+ inputs=url_input,
146
+ outputs=scrape_output
147
+ )
148
+
149
+ with gr.Tab("Instructions"):
150
+ gr.Markdown("""
151
+ ## 📋 How to Enable Full Jan v1
152
+
153
+ This Space is currently running in simplified mode without the actual Jan v1 model.
154
+
155
+ To enable full capabilities:
156
+
157
+ 1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
158
+ 2. **Select Hardware**: GPU T4 medium ($0.60/hour)
159
+ 3. **Save changes**
160
+ 4. **Wait 5 minutes** for rebuild
161
+
162
+ ### Current Limitations (CPU mode):
163
+ - ❌ No actual Jan v1 model (4B params needs GPU)
164
+ - ❌ No AI-powered analysis
165
+ - ✅ Web scraping works
166
+ - ✅ Structured framework available
167
+
168
+ ### With GPU Enabled:
169
+ - ✅ Full Jan v1 model (91.1% accuracy)
170
+ - ✅ AI-powered research analysis
171
+ - ✅ Entity extraction
172
+ - ✅ Multi-source comparison
173
+ - ✅ Research question generation
174
+
175
+ ### Alternative Free Options:
176
+ - **Google Colab**: Run the full model for free
177
+ - **Kaggle Notebooks**: 30 hours free GPU/week
178
+ - **Local with Jan App**: If you have 8GB+ VRAM
179
+ """)
180
+
181
+ if __name__ == "__main__":
182
+ demo.launch(
183
+ server_name="0.0.0.0",
184
+ server_port=7860,
185
+ share=False
186
+ )
app.py CHANGED
@@ -1,37 +1,13 @@
1
  """
2
- Jan v1 Research Assistant for Hugging Face Spaces
3
- Optimized for research tasks and source analysis
4
  """
5
 
6
  import gradio as gr
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
8
- import torch
9
  import requests
10
  from bs4 import BeautifulSoup
11
  import json
12
  from datetime import datetime
13
- from typing import List, Dict, Optional
14
- import hashlib
15
-
16
- # Initialize model
17
- print("🚀 Loading Jan v1 model...")
18
- model_name = "janhq/Jan-v1-4B"
19
- tokenizer = AutoTokenizer.from_pretrained(model_name)
20
- model = AutoModelForCausalLM.from_pretrained(
21
- model_name,
22
- torch_dtype=torch.bfloat16,
23
- device_map="auto",
24
- load_in_8bit=True # Reduce memory usage
25
- )
26
- print("✅ Model loaded successfully!")
27
-
28
- # Cache for responses
29
- response_cache = {}
30
-
31
- def get_cache_key(query: str, context: str) -> str:
32
- """Generate cache key for query+context"""
33
- combined = f"{query}|{context}"
34
- return hashlib.md5(combined.encode()).hexdigest()
35
 
36
  def scrape_url(url: str) -> str:
37
  """Scrape and extract text from URL"""
@@ -55,348 +31,152 @@ def scrape_url(url: str) -> str:
55
  except Exception as e:
56
  return f"Error scraping URL: {str(e)}"
57
 
58
- def research_assistant(
59
- query: str,
60
- context: str = "",
61
- temperature: float = 0.6,
62
- use_cache: bool = True,
63
- research_mode: str = "comprehensive"
64
- ) -> str:
65
  """
66
- Main research assistant function
67
  """
68
- # Check cache
69
- cache_key = get_cache_key(query, context)
70
- if use_cache and cache_key in response_cache:
71
- return "📌 [Cached] " + response_cache[cache_key]
72
 
73
- # Build prompt based on research mode
74
- if research_mode == "comprehensive":
75
- prompt = f"""You are an expert research analyst. Provide comprehensive analysis.
76
-
77
- Context/Sources:
78
- {context if context else "No specific context provided"}
79
-
80
- Research Query:
81
- {query}
82
-
83
- Provide your analysis with:
84
- 1. Key Findings & Insights
85
- 2. Supporting Evidence
86
- 3. Critical Analysis
87
- 4. Confidence Level
88
- 5. Suggested Follow-up Questions
89
- 6. Potential Limitations
90
-
91
- Analysis:"""
92
 
93
- elif research_mode == "fact_extraction":
94
- prompt = f"""Extract and verify factual information.
95
 
96
- Source Material:
97
- {context}
98
-
99
- Task: {query}
100
 
101
- Extract:
102
- - Factual claims with confidence scores (0-100%)
103
- - Key entities and relationships
104
- - Dates, numbers, and statistics
105
- - Contradictions or inconsistencies
106
 
107
- Facts:"""
108
-
109
- elif research_mode == "source_comparison":
110
- prompt = f"""Compare and contrast multiple sources.
111
 
112
- Sources:
113
- {context}
 
 
114
 
115
- Comparison Task: {query}
 
 
 
116
 
117
- Analyze:
118
- - Common themes
119
- - Contradictions
120
- - Unique perspectives
121
- - Reliability assessment
122
- - Synthesis
123
 
124
- Comparison:"""
125
-
126
- else: # quick_summary
127
- prompt = f"""Provide a quick summary.
128
 
129
- Content: {context}
130
- Task: {query}
 
 
131
 
132
- Summary:"""
133
-
134
- # Tokenize and generate
135
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
136
-
137
- with torch.no_grad():
138
- outputs = model.generate(
139
- **inputs,
140
- max_new_tokens=1024,
141
- temperature=temperature,
142
- top_p=0.95,
143
- top_k=20,
144
- do_sample=True,
145
- pad_token_id=tokenizer.eos_token_id
146
- )
147
-
148
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
149
- # Remove the prompt from response
150
- response = response.replace(prompt, "").strip()
151
-
152
- # Cache the response
153
- if use_cache:
154
- response_cache[cache_key] = response
155
-
156
- return response
157
-
158
- def process_multiple_sources(sources_text: str, query: str, temperature: float = 0.6) -> str:
159
- """Process multiple sources (URLs or text)"""
160
- sources = sources_text.strip().split('\n')
161
- combined_context = ""
162
- source_count = 0
163
-
164
- for source in sources[:5]: # Limit to 5 sources
165
- source = source.strip()
166
- if not source:
167
- continue
168
-
169
- source_count += 1
170
- if source.startswith('http'):
171
- content = scrape_url(source)
172
- combined_context += f"\n\n--- Source {source_count} (URL: {source[:50]}...) ---\n{content[:800]}"
173
- else:
174
- combined_context += f"\n\n--- Source {source_count} (Text) ---\n{source[:800]}"
175
-
176
- if not combined_context:
177
- return "No valid sources provided"
178
 
179
- return research_assistant(
180
- query=query,
181
- context=combined_context,
182
- temperature=temperature,
183
- research_mode="source_comparison"
184
- )
185
-
186
- def extract_entities(text: str) -> str:
187
- """Extract key entities from text"""
188
- return research_assistant(
189
- query="Extract all people, organizations, locations, dates, and key concepts",
190
- context=text,
191
- temperature=0.3,
192
- research_mode="fact_extraction"
193
- )
194
-
195
- def generate_research_questions(topic: str, context: str = "") -> str:
196
- """Generate research questions for a topic"""
197
- return research_assistant(
198
- query=f"Generate 10 specific, actionable research questions about: {topic}",
199
- context=context,
200
- temperature=0.7,
201
- research_mode="comprehensive"
202
- )
203
 
204
  # Create Gradio interface
205
- with gr.Blocks(title="Jan v1 Research Assistant", theme=gr.themes.Soft()) as demo:
206
  gr.Markdown("""
207
- # 🔬 Jan v1 Research Assistant
208
 
209
- Powered by Jan-v1-4B (91.1% accuracy) - Optimized for research and analysis
 
210
 
211
- ### Features:
212
- - 🌐 Web scraping and analysis
213
- - 📊 Multi-source comparison
214
- - 🔍 Entity extraction
215
- - ❓ Research question generation
216
- - 💾 Response caching
217
  """)
218
 
219
- with gr.Tab("Single Source Analysis"):
220
  with gr.Row():
221
  with gr.Column():
222
- single_query = gr.Textbox(
223
  label="Research Query",
224
  placeholder="What would you like to research?",
225
  lines=2
226
  )
227
- single_context = gr.Textbox(
228
  label="Context (paste text or URL)",
229
  placeholder="Paste article text or enter URL to analyze",
230
  lines=5
231
  )
232
- single_mode = gr.Radio(
233
- ["comprehensive", "fact_extraction", "quick_summary"],
234
- label="Analysis Mode",
235
- value="comprehensive"
236
- )
237
- single_temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature")
238
- single_cache = gr.Checkbox(label="Use cache", value=True)
239
- single_btn = gr.Button("🔍 Analyze", variant="primary")
240
 
241
  with gr.Column():
242
- single_output = gr.Textbox(
243
  label="Analysis Results",
244
  lines=15
245
  )
246
 
247
- def analyze_single(query, context, mode, temp, cache):
248
- # Check if context is URL
249
- if context.startswith('http'):
250
- context = scrape_url(context)
251
-
252
- return research_assistant(
253
- query=query,
254
- context=context,
255
- temperature=temp,
256
- use_cache=cache,
257
- research_mode=mode
258
- )
259
-
260
- single_btn.click(
261
- analyze_single,
262
- inputs=[single_query, single_context, single_mode, single_temp, single_cache],
263
- outputs=single_output
264
- )
265
-
266
- with gr.Tab("Multi-Source Comparison"):
267
- with gr.Row():
268
- with gr.Column():
269
- multi_sources = gr.Textbox(
270
- label="Sources (one per line, URLs or text)",
271
- placeholder="https://example.com/article1\nhttps://example.com/article2\nOr paste text directly",
272
- lines=6
273
- )
274
- multi_query = gr.Textbox(
275
- label="Comparison Query",
276
- placeholder="What aspects should I compare?",
277
- lines=2
278
- )
279
- multi_temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature")
280
- multi_btn = gr.Button("🔄 Compare Sources", variant="primary")
281
-
282
- with gr.Column():
283
- multi_output = gr.Textbox(
284
- label="Comparison Results",
285
- lines=15
286
- )
287
-
288
- multi_btn.click(
289
- process_multiple_sources,
290
- inputs=[multi_sources, multi_query, multi_temp],
291
- outputs=multi_output
292
  )
293
 
294
- with gr.Tab("Entity Extraction"):
295
  with gr.Row():
296
  with gr.Column():
297
- entity_input = gr.Textbox(
298
- label="Text or URL",
299
- placeholder="Paste text or URL to extract entities from",
300
- lines=8
301
  )
302
- entity_btn = gr.Button("🏷️ Extract Entities", variant="primary")
303
 
304
  with gr.Column():
305
- entity_output = gr.Textbox(
306
- label="Extracted Entities",
307
  lines=10
308
  )
309
 
310
- def extract_entities_wrapper(text):
311
- if text.startswith('http'):
312
- text = scrape_url(text)
313
- return extract_entities(text)
314
-
315
- entity_btn.click(
316
- extract_entities_wrapper,
317
- inputs=entity_input,
318
- outputs=entity_output
319
  )
320
 
321
- with gr.Tab("Research Question Generator"):
322
- with gr.Row():
323
- with gr.Column():
324
- rq_topic = gr.Textbox(
325
- label="Research Topic",
326
- placeholder="Enter your research topic",
327
- lines=2
328
- )
329
- rq_context = gr.Textbox(
330
- label="Additional Context (optional)",
331
- placeholder="Any specific focus areas or constraints",
332
- lines=4
333
- )
334
- rq_btn = gr.Button("💡 Generate Questions", variant="primary")
335
-
336
- with gr.Column():
337
- rq_output = gr.Textbox(
338
- label="Research Questions",
339
- lines=12
340
- )
341
-
342
- rq_btn.click(
343
- generate_research_questions,
344
- inputs=[rq_topic, rq_context],
345
- outputs=rq_output
346
- )
347
-
348
- with gr.Tab("API Integration"):
349
  gr.Markdown("""
350
- ### 🔌 Integrate with your Research App
351
 
352
- Once deployed, you can call this Space via API:
353
 
354
- ```javascript
355
- // JavaScript/TypeScript
356
- const response = await fetch('https://[your-username].hf.space/api/predict', {
357
- method: 'POST',
358
- headers: { 'Content-Type': 'application/json' },
359
- body: JSON.stringify({
360
- data: [
361
- "Your research query",
362
- "Context or URL",
363
- "comprehensive", // mode
364
- 0.6, // temperature
365
- true // use cache
366
- ]
367
- })
368
- });
369
- const result = await response.json();
370
- ```
371
 
372
- ```python
373
- # Python
374
- import requests
 
375
 
376
- response = requests.post(
377
- 'https://[your-username].hf.space/api/predict',
378
- json={
379
- "data": [
380
- "Your research query",
381
- "Context or URL",
382
- "comprehensive",
383
- 0.6,
384
- True
385
- ]
386
- }
387
- )
388
- result = response.json()
389
- ```
 
 
 
390
  """)
391
-
392
- gr.Markdown("""
393
- ---
394
- ### 💡 Tips:
395
- - Lower temperature (0.1-0.3) for factual extraction
396
- - Higher temperature (0.7-0.9) for creative research questions
397
- - Cache is cleared when Space restarts
398
- - URLs are automatically scraped and analyzed
399
- """)
400
 
401
  if __name__ == "__main__":
402
  demo.launch(
 
1
  """
2
+ Jan v1 Research Assistant - Simplified Version for CPU
3
+ Works without GPU - uses API approach
4
  """
5
 
6
  import gradio as gr
 
 
7
  import requests
8
  from bs4 import BeautifulSoup
9
  import json
10
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def scrape_url(url: str) -> str:
13
  """Scrape and extract text from URL"""
 
31
  except Exception as e:
32
  return f"Error scraping URL: {str(e)}"
33
 
34
+ def research_assistant_simple(query: str, context: str = "") -> str:
 
 
 
 
 
 
35
  """
36
+ Simplified research assistant using Hugging Face Inference API
37
  """
38
+ # For now, return a structured analysis template
39
+ # This can be replaced with actual API calls to Jan v1 when available
 
 
40
 
41
+ if context.startswith('http'):
42
+ context = scrape_url(context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ analysis = f"""
45
+ # Research Analysis
46
 
47
+ ## Query
48
+ {query}
 
 
49
 
50
+ ## Context Summary
51
+ {context[:500] if context else "No context provided"}...
 
 
 
52
 
53
+ ## Analysis Framework
 
 
 
54
 
55
+ ### 1. Key Findings
56
+ - The context provides information about the topic
57
+ - Further analysis would require examining specific aspects
58
+ - Consider multiple perspectives on this subject
59
 
60
+ ### 2. Critical Questions
61
+ - What are the primary assumptions?
62
+ - What evidence supports the main claims?
63
+ - What alternative viewpoints exist?
64
 
65
+ ### 3. Research Directions
66
+ - Investigate primary sources
67
+ - Compare with related studies
68
+ - Examine historical context
 
 
69
 
70
+ ### 4. Limitations
71
+ - Limited context provided
72
+ - Single source analysis
73
+ - Requires deeper investigation
74
 
75
+ ### 5. Next Steps
76
+ - Gather additional sources
77
+ - Conduct comparative analysis
78
+ - Validate key claims
79
 
80
+ ---
81
+ *Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.*
82
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ return analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  # Create Gradio interface
87
+ with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
88
  gr.Markdown("""
89
+ # 🔬 Jan v1 Research Assistant (Simplified Version)
90
 
91
+ This is a CPU-compatible version with limited features.
92
+ For full Jan v1 (4B params) capabilities, GPU hardware is required.
93
 
94
+ ### Available Features:
95
+ - 🌐 Web scraping and text extraction
96
+ - 📝 Structured research framework
97
+ - 🔍 Context analysis
 
 
98
  """)
99
 
100
+ with gr.Tab("Research Analysis"):
101
  with gr.Row():
102
  with gr.Column():
103
+ query = gr.Textbox(
104
  label="Research Query",
105
  placeholder="What would you like to research?",
106
  lines=2
107
  )
108
+ context = gr.Textbox(
109
  label="Context (paste text or URL)",
110
  placeholder="Paste article text or enter URL to analyze",
111
  lines=5
112
  )
113
+ analyze_btn = gr.Button("🔍 Analyze", variant="primary")
 
 
 
 
 
 
 
114
 
115
  with gr.Column():
116
+ output = gr.Textbox(
117
  label="Analysis Results",
118
  lines=15
119
  )
120
 
121
+ analyze_btn.click(
122
+ research_assistant_simple,
123
+ inputs=[query, context],
124
+ outputs=output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  )
126
 
127
+ with gr.Tab("Web Scraper"):
128
  with gr.Row():
129
  with gr.Column():
130
+ url_input = gr.Textbox(
131
+ label="URL to Scrape",
132
+ placeholder="https://example.com/article",
133
+ lines=1
134
  )
135
+ scrape_btn = gr.Button("🌐 Extract Text", variant="primary")
136
 
137
  with gr.Column():
138
+ scrape_output = gr.Textbox(
139
+ label="Extracted Text",
140
  lines=10
141
  )
142
 
143
+ scrape_btn.click(
144
+ scrape_url,
145
+ inputs=url_input,
146
+ outputs=scrape_output
 
 
 
 
 
147
  )
148
 
149
+ with gr.Tab("Instructions"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  gr.Markdown("""
151
+ ## 📋 How to Enable Full Jan v1
152
 
153
+ This Space is currently running in simplified mode without the actual Jan v1 model.
154
 
155
+ To enable full capabilities:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ 1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
158
+ 2. **Select Hardware**: GPU T4 medium ($0.60/hour)
159
+ 3. **Save changes**
160
+ 4. **Wait 5 minutes** for rebuild
161
 
162
+ ### Current Limitations (CPU mode):
163
+ - ❌ No actual Jan v1 model (4B params needs GPU)
164
+ - ❌ No AI-powered analysis
165
+ - ✅ Web scraping works
166
+ - Structured framework available
167
+
168
+ ### With GPU Enabled:
169
+ - ✅ Full Jan v1 model (91.1% accuracy)
170
+ - ✅ AI-powered research analysis
171
+ - ✅ Entity extraction
172
+ - ✅ Multi-source comparison
173
+ - ✅ Research question generation
174
+
175
+ ### Alternative Free Options:
176
+ - **Google Colab**: Run the full model for free
177
+ - **Kaggle Notebooks**: 30 hours free GPU/week
178
+ - **Local with Jan App**: If you have 8GB+ VRAM
179
  """)
 
 
 
 
 
 
 
 
 
180
 
181
  if __name__ == "__main__":
182
  demo.launch(
push-to-hf.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Script para hacer push a Hugging Face
4
+ # Necesitas tu token de Hugging Face
5
+
6
+ echo "🚀 Pushing Jan v1 Research Assistant to Hugging Face..."
7
+ echo ""
8
+ echo "Necesitas tu token de Hugging Face."
9
+ echo "Puedes obtenerlo en: https://huggingface.co/settings/tokens"
10
+ echo ""
11
+ read -p "Pega tu token de Hugging Face aquí: " HF_TOKEN
12
+
13
+ if [ -z "$HF_TOKEN" ]; then
14
+ echo "❌ Token vacío. Abortando."
15
+ exit 1
16
+ fi
17
+
18
+ # Configurar la URL con el token
19
+ git remote set-url origin https://darwincb:${HF_TOKEN}@huggingface.co/spaces/darwincb/jan-v1-research
20
+
21
+ # Hacer push
22
+ echo "📤 Subiendo archivos..."
23
+ git push origin main
24
+
25
+ if [ $? -eq 0 ]; then
26
+ echo "✅ ¡Éxito! Jan v1 Research Assistant subido a Hugging Face"
27
+ echo "🔗 Ve a: https://huggingface.co/spaces/darwincb/jan-v1-research"
28
+ echo ""
29
+ echo "⚠️ IMPORTANTE: Ve a Settings y selecciona 'GPU T4 medium' para que funcione"
30
+ else
31
+ echo "❌ Error al hacer push. Verifica tu token."
32
+ fi
33
+
34
+ # Limpiar el token de la URL remota por seguridad
35
+ git remote set-url origin https://huggingface.co/spaces/darwincb/jan-v1-research
requirements-simple.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Simplified requirements for CPU version
2
+ gradio==4.19.2
3
+ beautifulsoup4==4.12.3
4
+ requests==2.31.0
5
+ lxml==5.1.0
requirements.txt CHANGED
@@ -1,10 +1,5 @@
1
- # Jan v1 Research Assistant Requirements
2
- transformers==4.36.2
3
- torch==2.1.2
4
  gradio==4.19.2
5
- accelerate==0.25.0
6
- bitsandbytes==0.42.0
7
- sentencepiece==0.1.99
8
  beautifulsoup4==4.12.3
9
  requests==2.31.0
10
  lxml==5.1.0
 
1
+ # Simplified requirements for CPU version
 
 
2
  gradio==4.19.2
 
 
 
3
  beautifulsoup4==4.12.3
4
  requests==2.31.0
5
  lxml==5.1.0