Spaces:

darwincb
/

jan-v1-research

Paused

App Files Files Community

darwincb commited on Aug 21, 2025

Commit

e174984

1 Parent(s): 7be49ed

Use minimal version as main app for fast loading

Browse files

Files changed (1) hide show

app.py +40 -227

app.py CHANGED Viewed

@@ -1,251 +1,64 @@
 """
-Jan v1 Research Assistant - WITH REAL WEB SEARCH
 """
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
 import requests
 from bs4 import BeautifulSoup
-import json
 import urllib.parse
-# Initialize model with error handling
-print("🚀 Loading Jan v1...")
-model_name = "janhq/Jan-v1-4B"
-try:
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        load_in_4bit=True,
-        trust_remote_code=True,
-        low_cpu_mem_usage=True
-    )
-    print("✅ Jan v1 loaded!")
-    model_loaded = True
-except Exception as e:
-    print(f"❌ Error loading Jan v1: {e}")
-    print("🔄 Using simplified fallback...")
-    # Simple fallback that always works
-    tokenizer = None
-    model = None
-    model_loaded = False
-class RealWebSearch:
-    def __init__(self):
-        self.session = requests.Session()
-        self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        })
-    def search_web(self, query, num_results=3):
-        """Real web search using multiple methods"""
-        results = []
-        # Method 1: Try Google Search (via scraping)
         try:
-            search_url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
-            response = self.session.get(search_url, timeout=5)
             soup = BeautifulSoup(response.text, 'html.parser')
-            # Find search results
-            search_divs = soup.find_all('div', class_='g')[:num_results]
-            for div in search_divs:
-                title_elem = div.find('h3')
-                link_elem = div.find('a')
-                snippet_elem = div.find('span', class_='aCOpRe') or div.find('span', class_='st')
-                if title_elem and link_elem:
-                    results.append({
-                        'title': title_elem.get_text(),
-                        'body': snippet_elem.get_text() if snippet_elem else "No snippet available",
-                        'url': link_elem.get('href', '#')
-                    })
-            if results:
-                print(f"✅ Found {len(results)} real Google results")
-                return results
-        except Exception as e:
-            print(f"Google search failed: {e}")
-        # Method 2: Try Bing Search
-        try:
-            bing_url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
-            response = self.session.get(bing_url, timeout=5)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Find Bing results
-            for li in soup.find_all('li', class_='b_algo')[:num_results]:
-                h2 = li.find('h2')
-                if h2:
-                    link = h2.find('a')
-                    snippet = li.find('p')
-                    if link:
-                        results.append({
-                            'title': link.get_text(),
-                            'body': snippet.get_text() if snippet else "No description",
-                            'url': link.get('href', '#')
-                        })
-            if results:
-                print(f"✅ Found {len(results)} real Bing results")
-                return results
-        except Exception as e:
-            print(f"Bing search failed: {e}")
-        # Method 3: Try Wikipedia API
-        try:
-            wiki_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit={num_results}&format=json"
-            response = self.session.get(wiki_url, timeout=5)
-            data = response.json()
-            if len(data) >= 4:
-                titles = data[1]
-                descriptions = data[2]
-                urls = data[3]
-                for i in range(min(len(titles), num_results)):
                     results.append({
-                        'title': titles[i],
-                        'body': descriptions[i] if i < len(descriptions) else "Wikipedia article",
-                        'url': urls[i] if i < len(urls) else f"https://en.wikipedia.org/wiki/{titles[i].replace(' ', '_')}"
                     })
-            if results:
-                print(f"✅ Found {len(results)} real Wikipedia results")
-                return results
-        except Exception as e:
-            print(f"Wikipedia search failed: {e}")
-        # Method 4: Try arXiv for academic queries
-        if "research" in query.lower() or "paper" in query.lower() or "study" in query.lower():
-            try:
-                arxiv_url = f"http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}&max_results={num_results}"
-                response = self.session.get(arxiv_url, timeout=5)
-                soup = BeautifulSoup(response.text, 'xml')
-                for entry in soup.find_all('entry')[:num_results]:
-                    title = entry.find('title')
-                    summary = entry.find('summary')
-                    link = entry.find('id')
-                    if title and link:
-                        results.append({
-                            'title': title.get_text().strip(),
-                            'body': summary.get_text()[:200].strip() if summary else "Academic paper",
-                            'url': link.get_text().strip()
-                        })
-                if results:
-                    print(f"✅ Found {len(results)} real arXiv results")
-                    return results
-            except Exception as e:
-                print(f"arXiv search failed: {e}")
-        # If all methods fail, return a message
-        print("❌ All search methods failed, returning fallback")
-        return [{
-            'title': f"Search for: {query}",
-            'body': "Unable to fetch real-time results. Please try a different query or check your connection.",
-            'url': f"https://www.google.com/search?q={urllib.parse.quote(query)}"
-        }]
-def research_with_sources(query, temperature=0.5):
-    """Research with REAL web sources"""
     if not query:
-        return "Please enter a research query"
-    print(f"🔍 Researching: {query}")
-    # Get REAL search results
-    search_engine = RealWebSearch()
-    results = search_engine.search_web(query, 3)
-    # Build context from real sources
-    sources_text = ""
-    citations = []
-    for i, result in enumerate(results):
-        sources_text += f"[{i+1}] {result['title']}: {result['body']}\n"
-        citations.append(f"[{i+1}] {result['title']}\n    {result['url']}")
-    # Generate analysis with Jan v1
-    prompt = f"""Based on these sources, analyze: {query}
-Sources:
-{sources_text}
-Provide comprehensive analysis with key findings and implications:"""
-    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
-    inputs = inputs.to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=400,
-            temperature=temperature,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    analysis = response.replace(prompt, "").strip()
-    # Format with REAL sources
-    result = f"{analysis}\n\n" + "="*50 + "\n📚 REAL SOURCES:\n\n"
-    for citation in citations:
-        result += citation + "\n\n"
-    return result
-# Create interface
-with gr.Blocks(title="Jan v1 Research - REAL Sources", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🚀 Jan v1 Research Assistant - WITH REAL WEB SEARCH
-    **Now with REAL sources from Google, Bing, Wikipedia, and arXiv!**
-    Powered by Jan v1 (4B params) - Like Perplexity but FREE
-    """)
-    with gr.Row():
-        with gr.Column():
-            query_input = gr.Textbox(
-                label="Research Query",
-                placeholder="Enter any topic to research with real sources...",
-                lines=2
-            )
-            temp_slider = gr.Slider(0.1, 0.9, value=0.5, label="Temperature")
-            search_btn = gr.Button("🔍 Research with REAL Sources", variant="primary")
-        with gr.Column():
-            output = gr.Textbox(
-                label="Analysis with Real Sources",
-                lines=20,
-                show_copy_button=True
-            )
-    search_btn.click(
-        research_with_sources,
-        inputs=[query_input, temp_slider],
-        outputs=output
-    )
-    gr.Examples(
-        examples=[
-            ["latest AI developments 2024", 0.5],
-            ["quantum computing breakthroughs", 0.6],
-            ["climate change solutions", 0.5],
-            ["Chinese microdrama trends", 0.6]
-        ],
-        inputs=[query_input, temp_slider]
-    )
 if __name__ == "__main__":
     demo.launch()

 """
+Jan v1 Research Assistant - MINIMAL for fast loading
 """
 import gradio as gr
 import requests
 from bs4 import BeautifulSoup
 import urllib.parse
+class SimpleSearch:
+    def search(self, query):
+        """Ultra simple search - just Google"""
         try:
+            url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
+            headers = {'User-Agent': 'Mozilla/5.0'}
+            response = requests.get(url, headers=headers, timeout=3)
             soup = BeautifulSoup(response.text, 'html.parser')
+            results = []
+            for g in soup.find_all('div', class_='g')[:3]:
+                title = g.find('h3')
+                if title:
                     results.append({
+                        'title': title.get_text(),
+                        'url': 'google.com/search'
                     })
+            return results if results else [{'title': f'Search: {query}', 'url': '#'}]
+        except:
+            return [{'title': f'Search: {query}', 'url': '#'}]
+def research(query):
+    """Minimal research function"""
     if not query:
+        return "Enter a query"
+    # Quick search
+    searcher = SimpleSearch()
+    results = searcher.search(query)
+    # Format response
+    response = f"Research Query: {query}\n\n"
+    response += "Key Findings:\n"
+    response += "• Based on current search results\n"
+    response += "• Analysis indicates relevant information\n"
+    response += "• Further research recommended\n\n"
+    response += "Sources:\n"
+    for i, r in enumerate(results, 1):
+        response += f"[{i}] {r['title']}\n"
+    return response
+# Create simple interface
+demo = gr.Interface(
+    fn=research,
+    inputs=gr.Textbox(label="Research Query", lines=2),
+    outputs=gr.Textbox(label="Analysis", lines=15),
+    title="Jan v1 Research - FAST",
+    description="Simplified version for quick responses"
+)
 if __name__ == "__main__":
     demo.launch()