scrape each result with 200 chars response

This commit is contained in:
Raven 2024-08-03 16:07:39 -04:00
parent 5e19d00a80
commit e25557190c

View File

@ -109,7 +109,7 @@ function trimConversationHistory(messages, maxLength, tolerance) {
} }
// Function to scrape web page // Function to scrape web page
async function scrapeWebPage(url) { async function scrapeWebPage(url, length) {
console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`); console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
try { try {
const res = await fetch(url); const res = await fetch(url);
@ -127,7 +127,7 @@ async function scrapeWebPage(url) {
response += `Description: ${pageDescription}\n`; response += `Description: ${pageDescription}\n`;
} }
if (pageContent) { if (pageContent) {
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000; const MAX_CONTENT_LENGTH = length || process.env.MAX_CONTENT_LENGTH || 2000;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' '); let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/; const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent); const isCode = codePattern.test(plainTextContent);
@ -424,44 +424,54 @@ app.post('/api/v1/chat', async (req, res) => {
} }
} }
const searchRegex = /\bsearch\s+(.+)\b/i; const searchRegex = /\b[Ss]earch\s+(.+)\b/;
const searchMatch = userMessage.match(searchRegex); const searchMatch = userMessage.match(searchRegex);
if (searchMatch) { if (searchMatch) {
const searchQuery = searchMatch[1]; const searchQuery = searchMatch[1];
console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`); console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`);
try { try {
googleIt({ 'query': searchQuery }).then(results => { googleIt({ 'query': searchQuery }).then(async results => {
let searchResponse = `Search Query: ${searchQuery}\n`; let searchResponse = `Search Query: ${searchQuery}\n`;
searchResponse += `Top Google search results:\n`; searchResponse += `Top Google search results:\n`;
results.forEach((result, index) => { for (let i = 0; i < results.length; i++) {
searchResponse += `${index + 1}. ${result.title} - ${result.link}\n`; const result = results[i];
}); searchResponse += `${i + 1}. ${result.title} - ${result.link}\n`;
const lastMessageIndex = conversationHistory[ip].length - 1; try {
if (lastMessageIndex >= 0) { const scrapeResult = await scrapeWebPage(result.link, 200);
conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse; searchResponse += `Scraped Data: ${scrapeResult}\n`;
console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`); } catch (scrapeErr) {
} else { console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${result.link}`, scrapeErr);
console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`); searchResponse += `Failed to scrape URL: ${result.link}\n`;
} }
}).catch(err => { }
console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err);
return res.status(500).json({ const lastMessageIndex = conversationHistory[ip].length - 1;
message: "An error occurred while performing Google search", if (lastMessageIndex >= 0) {
error: err.message conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse;
}); console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`);
}); } else {
} catch (err) { console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`);
console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err); }
}).catch(err => {
console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err);
return res.status(500).json({ return res.status(500).json({
message: "An unexpected error occurred", message: "An error occurred while performing Google search",
error: err.message error: err.message
}); });
} });
} } catch (err) {
console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err);
return res.status(500).json({
message: "An unexpected error occurred",
error: err.message
});
}
}
// End Plugins --- // End Plugins ---