From e25557190c23e4e27a79bc9dc3499868301cc706 Mon Sep 17 00:00:00 2001 From: Raven Date: Sat, 3 Aug 2024 16:07:39 -0400 Subject: [PATCH] scrape each result with 200 chars response --- backend-server/backend-server.js | 86 ++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/backend-server/backend-server.js b/backend-server/backend-server.js index 46ef39c..41a982f 100644 --- a/backend-server/backend-server.js +++ b/backend-server/backend-server.js @@ -109,7 +109,7 @@ function trimConversationHistory(messages, maxLength, tolerance) { } // Function to scrape web page -async function scrapeWebPage(url) { +async function scrapeWebPage(url, length) { console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`); try { const res = await fetch(url); @@ -127,7 +127,7 @@ async function scrapeWebPage(url) { response += `Description: ${pageDescription}\n`; } if (pageContent) { - const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000; + const MAX_CONTENT_LENGTH = length || process.env.MAX_CONTENT_LENGTH || 2000; let plainTextContent = $('
').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' '); const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/; const isCode = codePattern.test(plainTextContent); @@ -424,44 +424,54 @@ app.post('/api/v1/chat', async (req, res) => { } } - const searchRegex = /\bsearch\s+(.+)\b/i; - const searchMatch = userMessage.match(searchRegex); - - if (searchMatch) { - const searchQuery = searchMatch[1]; - console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`); - - try { - googleIt({ 'query': searchQuery }).then(results => { - let searchResponse = `Search Query: ${searchQuery}\n`; - searchResponse += `Top Google search results:\n`; - - results.forEach((result, index) => { - searchResponse += `${index + 1}. ${result.title} - ${result.link}\n`; - }); - - const lastMessageIndex = conversationHistory[ip].length - 1; - if (lastMessageIndex >= 0) { - conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse; - console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`); - } else { - console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`); - } - }).catch(err => { - console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err); - return res.status(500).json({ - message: "An error occurred while performing Google search", - error: err.message - }); - }); - } catch (err) { - console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err); + const searchRegex = /\b[Ss]earch\s+(.+)\b/; +const searchMatch = userMessage.match(searchRegex); + +if (searchMatch) { + const searchQuery = searchMatch[1]; + console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`); + + try { + googleIt({ 'query': searchQuery }).then(async results => { + let searchResponse = `Search Query: ${searchQuery}\n`; + searchResponse += `Top Google search results:\n`; + + for (let i = 0; i < results.length; i++) { + const result = results[i]; + searchResponse += `${i + 1}. ${result.title} - ${result.link}\n`; + + try { + const scrapeResult = await scrapeWebPage(result.link, 200); + searchResponse += `Scraped Data: ${scrapeResult}\n`; + } catch (scrapeErr) { + console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${result.link}`, scrapeErr); + searchResponse += `Failed to scrape URL: ${result.link}\n`; + } + } + + const lastMessageIndex = conversationHistory[ip].length - 1; + if (lastMessageIndex >= 0) { + conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse; + console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`); + } else { + console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`); + } + }).catch(err => { + console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err); return res.status(500).json({ - message: "An unexpected error occurred", - error: err.message + message: "An error occurred while performing Google search", + error: err.message }); - } - } + }); + } catch (err) { + console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err); + return res.status(500).json({ + message: "An unexpected error occurred", + error: err.message + }); + } +} + // End Plugins ---