Spaces:
Runtime error
Runtime error
| import { Express } from "express"; | |
| import { createServer, Server } from "http"; | |
| import { z } from "zod"; | |
| import { storage } from "./storage"; | |
| import { searchRequestSchema } from "@shared/schema"; | |
| import OpenAI from "openai"; | |
| interface GitHubRepo { | |
| id: number; | |
| name: string; | |
| full_name: string; | |
| description: string; | |
| html_url: string; | |
| stargazers_count: number; | |
| language: string; | |
| topics: string[]; | |
| created_at: string; | |
| updated_at: string; | |
| } | |
| const openai = new OpenAI({ | |
| apiKey: process.env.OPENAI_API_KEY | |
| }); | |
| // Web search function using DuckDuckGo Instant Answer API | |
| async function searchWeb(query: string, maxResults: number = 10): Promise<any[]> { | |
| try { | |
| const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`; | |
| const response = await fetch(searchUrl); | |
| if (!response.ok) { | |
| console.error('DuckDuckGo API error:', response.status); | |
| return []; | |
| } | |
| const data = await response.json(); | |
| const results = []; | |
| // Process instant answer | |
| if (data.AbstractText && data.AbstractURL) { | |
| results.push({ | |
| title: data.Heading || query, | |
| content: data.AbstractText, | |
| url: data.AbstractURL, | |
| source: data.AbstractSource || 'Web Search', | |
| type: 'instant_answer' | |
| }); | |
| } | |
| // Process related topics | |
| if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) { | |
| for (const topic of data.RelatedTopics.slice(0, maxResults - results.length)) { | |
| if (topic.Text && topic.FirstURL) { | |
| results.push({ | |
| title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 60), | |
| content: topic.Text, | |
| url: topic.FirstURL, | |
| source: 'DuckDuckGo', | |
| type: 'related_topic' | |
| }); | |
| } | |
| } | |
| } | |
| return results; | |
| } catch (error) { | |
| console.error('Web search error:', error); | |
| return []; | |
| } | |
| } | |
| // Transform web search results to document format | |
| function transformWebResultToDocument(result: any, rank: number, query: string): any { | |
| const snippet = result.content.length > 200 ? | |
| result.content.substring(0, 200) + '...' : | |
| result.content; | |
| return { | |
| id: `web_${Date.now()}_${rank}`, | |
| title: result.title, | |
| content: result.content, | |
| snippet, | |
| source: result.source, | |
| sourceType: 'web', | |
| url: result.url, | |
| metadata: { | |
| search_type: result.type, | |
| fetched_at: new Date().toISOString() | |
| }, | |
| relevanceScore: Math.max(0.4, 1 - (rank * 0.15)), | |
| rank: rank + 1, | |
| searchQuery: query, | |
| retrievalTime: Math.random() * 0.2 + 0.1, | |
| tokenCount: Math.floor(result.content.length / 4) | |
| }; | |
| } | |
| async function searchGitHubRepos(query: string, maxResults: number = 10): Promise<any[]> { | |
| try { | |
| // Parse query to extract author and repository details | |
| const lowerQuery = query.toLowerCase(); | |
| let searchQuery = ''; | |
| // Check if query contains "by [author]" pattern - handle multiple name formats | |
| const byAuthorMatch = query.match(/by\s+([a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*)/i); | |
| if (byAuthorMatch) { | |
| const authorName = byAuthorMatch[1].trim(); | |
| const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim(); | |
| // Try different author search strategies - include multiple language options | |
| const authorSearches = [ | |
| `${topicPart} user:${authorName.replace(/\s+/g, '')}`, // No language restriction first | |
| `${topicPart} user:${authorName.replace(/\s+/g, '')} language:python`, | |
| `${topicPart} user:${authorName.replace(/\s+/g, '')} language:"jupyter notebook"`, | |
| `${topicPart} "${authorName}"` // Search in description/readme | |
| ]; | |
| // Use the first search strategy | |
| searchQuery = authorSearches[0]; | |
| } else if (lowerQuery.includes('data structures') || lowerQuery.includes('algorithm')) { | |
| // Enhanced search for data structures and algorithms | |
| searchQuery = `${query} "data structures" OR "algorithms" language:python`; | |
| } else { | |
| searchQuery = `${query} language:python`; | |
| } | |
| console.log('GitHub search query:', searchQuery); | |
| const response = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(searchQuery)}&sort=stars&order=desc&per_page=${maxResults}`, { | |
| headers: { | |
| 'Authorization': `token ${process.env.GITHUB_TOKEN}`, | |
| 'Accept': 'application/vnd.github.v3+json', | |
| 'User-Agent': 'Knowledge-Base-Browser' | |
| } | |
| }); | |
| if (!response.ok) { | |
| console.error('GitHub API error:', response.status, response.statusText); | |
| return []; | |
| } | |
| const data = await response.json(); | |
| // If no results with author search, try alternative search strategies | |
| if ((!data.items || data.items.length === 0) && byAuthorMatch) { | |
| const authorName = byAuthorMatch[1].trim(); | |
| const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim(); | |
| // Try different fallback strategies without language restrictions | |
| const fallbackQueries = [ | |
| `"${authorName}" ${topicPart}`, | |
| `${topicPart} "${authorName}"`, | |
| `${authorName} ${topicPart}`, | |
| `${topicPart} user:${authorName.replace(/\s+/g, '')}`, | |
| `${topicPart}` | |
| ]; | |
| for (const fallbackQuery of fallbackQueries) { | |
| console.log('Trying fallback query:', fallbackQuery); | |
| const fallbackResponse = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(fallbackQuery)}&sort=stars&order=desc&per_page=${maxResults}`, { | |
| headers: { | |
| 'Authorization': `token ${process.env.GITHUB_TOKEN}`, | |
| 'Accept': 'application/vnd.github.v3+json', | |
| 'User-Agent': 'Knowledge-Base-Browser' | |
| } | |
| }); | |
| if (fallbackResponse.ok) { | |
| const fallbackData = await fallbackResponse.json(); | |
| if (fallbackData.items && fallbackData.items.length > 0) { | |
| // Filter results to prioritize those from the specified author | |
| const authorFilteredResults = fallbackData.items.filter((repo: any) => | |
| repo.owner.login.toLowerCase().includes(authorName.toLowerCase()) || | |
| repo.full_name.toLowerCase().includes(authorName.toLowerCase()) || | |
| repo.description?.toLowerCase().includes(authorName.toLowerCase()) | |
| ); | |
| if (authorFilteredResults.length > 0) { | |
| return authorFilteredResults; | |
| } else { | |
| return fallbackData.items; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return data.items || []; | |
| } catch (error) { | |
| console.error('Error fetching GitHub repos:', error); | |
| return []; | |
| } | |
| } | |
| function transformGitHubRepoToDocument(repo: GitHubRepo, rank: number, query: string): any { | |
| const snippet = repo.description ? | |
| repo.description.substring(0, 200) + (repo.description.length > 200 ? '...' : '') : | |
| 'No description available'; | |
| return { | |
| id: repo.id, | |
| title: `${repo.name} - ${repo.full_name}`, | |
| content: `${repo.description || 'No description available'}\n\nRepository: ${repo.full_name}\nLanguage: ${repo.language}\nStars: ${repo.stargazers_count}\nTopics: ${repo.topics.join(', ')}\nCreated: ${repo.created_at}\nLast Updated: ${repo.updated_at}`, | |
| snippet, | |
| source: `GitHub Repository`, | |
| sourceType: 'code', | |
| url: repo.html_url, | |
| metadata: { | |
| stars: repo.stargazers_count, | |
| language: repo.language, | |
| topics: repo.topics, | |
| created_at: repo.created_at, | |
| updated_at: repo.updated_at | |
| }, | |
| relevanceScore: Math.max(0.5, 1 - (rank * 0.1)), | |
| rank: rank + 1, | |
| searchQuery: query, | |
| retrievalTime: Math.random() * 0.3 + 0.1, | |
| tokenCount: Math.floor((repo.description?.length || 100) / 4) | |
| }; | |
| } | |
| export async function registerRoutes(app: Express): Promise<Server> { | |
| // Enhanced search with web fallback | |
| app.post("/api/search", async (req, res) => { | |
| try { | |
| const searchRequest = searchRequestSchema.parse(req.body); | |
| const streaming = req.body.streaming === true; | |
| const startTime = Date.now(); | |
| // First, search local storage | |
| const localResults = await storage.searchDocuments(searchRequest); | |
| let allDocuments = localResults.results || []; | |
| // If local results are insufficient, enhance with external sources | |
| const minResults = 3; | |
| if (allDocuments.length < minResults) { | |
| console.log(`Local search returned ${allDocuments.length} results, fetching external sources...`); | |
| // Check if we should search GitHub for code-related queries | |
| const isCodeQuery = searchRequest.query.toLowerCase().includes('python') || | |
| searchRequest.query.toLowerCase().includes('data structures') || | |
| searchRequest.query.toLowerCase().includes('algorithm') || | |
| searchRequest.query.toLowerCase().includes('repository') || | |
| searchRequest.query.toLowerCase().includes('code'); | |
| // Parallel search of external sources | |
| const searchPromises = []; | |
| if (isCodeQuery && process.env.GITHUB_TOKEN) { | |
| searchPromises.push( | |
| searchGitHubRepos(searchRequest.query, Math.min(5, searchRequest.limit)) | |
| .then(repos => repos.map((repo, index) => | |
| transformGitHubRepoToDocument(repo, index + allDocuments.length, searchRequest.query) | |
| )) | |
| ); | |
| } | |
| // Always include web search for broader coverage | |
| searchPromises.push( | |
| searchWeb(searchRequest.query, Math.min(5, searchRequest.limit - allDocuments.length)) | |
| .then(webResults => webResults.map((result, index) => | |
| transformWebResultToDocument(result, index + allDocuments.length, searchRequest.query) | |
| )) | |
| ); | |
| // Wait for all external searches to complete | |
| const externalResults = await Promise.all(searchPromises); | |
| const flattenedResults = externalResults.flat(); | |
| // Combine and sort all results by relevance | |
| allDocuments = [...allDocuments, ...flattenedResults] | |
| .sort((a, b) => b.relevanceScore - a.relevanceScore) | |
| .slice(0, searchRequest.limit); | |
| } | |
| const searchTime = (Date.now() - startTime) / 1000; | |
| const response = { | |
| results: allDocuments, | |
| totalCount: allDocuments.length, | |
| searchTime, | |
| query: searchRequest.query, | |
| queryId: Date.now() | |
| }; | |
| res.json(response); | |
| } catch (error) { | |
| if (error instanceof z.ZodError) { | |
| res.status(400).json({ message: "Invalid search request", errors: error.errors }); | |
| } else { | |
| console.error('Search error:', error); | |
| res.status(500).json({ message: "Internal server error" }); | |
| } | |
| } | |
| }); | |
| // AI explanation endpoint | |
| app.post("/api/explain", async (req, res) => { | |
| try { | |
| const { title, snippet, content } = req.body; | |
| if (!title || !snippet) { | |
| return res.status(400).json({ message: "Title and snippet are required" }); | |
| } | |
| const prompt = `Explain this document in a clear, conversational way suitable for audio playback: | |
| Title: ${title} | |
| Content: ${snippet} | |
| Provide a brief, engaging explanation (2-3 sentences) that would be pleasant to listen to. Focus on the key concepts and practical value.`; | |
| const response = await openai.chat.completions.create({ | |
| model: "gpt-4o", // the newest OpenAI model is "gpt-4o" which was released May 13, 2024. do not change this unless explicitly requested by the user | |
| messages: [{ role: "user", content: prompt }], | |
| max_tokens: 150, | |
| temperature: 0.7, | |
| }); | |
| const explanation = response.choices[0].message.content; | |
| res.json({ explanation }); | |
| } catch (error) { | |
| console.error('AI explanation error:', error); | |
| res.status(500).json({ message: "Failed to generate explanation" }); | |
| } | |
| }); | |
| // Other routes... | |
| app.get("/api/documents", async (req, res) => { | |
| try { | |
| const limit = parseInt(req.query.limit as string) || 50; | |
| const offset = parseInt(req.query.offset as string) || 0; | |
| const documents = await storage.getDocuments(limit, offset); | |
| res.json(documents); | |
| } catch (error) { | |
| res.status(500).json({ message: "Failed to fetch documents" }); | |
| } | |
| }); | |
| const httpServer = createServer(app); | |
| return httpServer; | |
| } |