scrape each result with 200 chars response
This commit is contained in:
parent
5e19d00a80
commit
e25557190c
@ -109,7 +109,7 @@ function trimConversationHistory(messages, maxLength, tolerance) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Function to scrape web page
|
// Function to scrape web page
|
||||||
async function scrapeWebPage(url) {
|
async function scrapeWebPage(url, length) {
|
||||||
console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
|
console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
@ -127,7 +127,7 @@ async function scrapeWebPage(url) {
|
|||||||
response += `Description: ${pageDescription}\n`;
|
response += `Description: ${pageDescription}\n`;
|
||||||
}
|
}
|
||||||
if (pageContent) {
|
if (pageContent) {
|
||||||
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000;
|
const MAX_CONTENT_LENGTH = length || process.env.MAX_CONTENT_LENGTH || 2000;
|
||||||
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
||||||
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
||||||
const isCode = codePattern.test(plainTextContent);
|
const isCode = codePattern.test(plainTextContent);
|
||||||
@ -424,44 +424,54 @@ app.post('/api/v1/chat', async (req, res) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchRegex = /\bsearch\s+(.+)\b/i;
|
const searchRegex = /\b[Ss]earch\s+(.+)\b/;
|
||||||
const searchMatch = userMessage.match(searchRegex);
|
const searchMatch = userMessage.match(searchRegex);
|
||||||
|
|
||||||
if (searchMatch) {
|
if (searchMatch) {
|
||||||
const searchQuery = searchMatch[1];
|
const searchQuery = searchMatch[1];
|
||||||
console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`);
|
console.log(`${getTimestamp()} [INFO] Detected search query in user message: ${searchQuery}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
googleIt({ 'query': searchQuery }).then(results => {
|
googleIt({ 'query': searchQuery }).then(async results => {
|
||||||
let searchResponse = `Search Query: ${searchQuery}\n`;
|
let searchResponse = `Search Query: ${searchQuery}\n`;
|
||||||
searchResponse += `Top Google search results:\n`;
|
searchResponse += `Top Google search results:\n`;
|
||||||
|
|
||||||
results.forEach((result, index) => {
|
for (let i = 0; i < results.length; i++) {
|
||||||
searchResponse += `${index + 1}. ${result.title} - ${result.link}\n`;
|
const result = results[i];
|
||||||
});
|
searchResponse += `${i + 1}. ${result.title} - ${result.link}\n`;
|
||||||
|
|
||||||
const lastMessageIndex = conversationHistory[ip].length - 1;
|
try {
|
||||||
if (lastMessageIndex >= 0) {
|
const scrapeResult = await scrapeWebPage(result.link, 200);
|
||||||
conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse;
|
searchResponse += `Scraped Data: ${scrapeResult}\n`;
|
||||||
console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`);
|
} catch (scrapeErr) {
|
||||||
} else {
|
console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${result.link}`, scrapeErr);
|
||||||
console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`);
|
searchResponse += `Failed to scrape URL: ${result.link}\n`;
|
||||||
}
|
}
|
||||||
}).catch(err => {
|
}
|
||||||
console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err);
|
|
||||||
return res.status(500).json({
|
const lastMessageIndex = conversationHistory[ip].length - 1;
|
||||||
message: "An error occurred while performing Google search",
|
if (lastMessageIndex >= 0) {
|
||||||
error: err.message
|
conversationHistory[ip][lastMessageIndex].content += "\n" + searchResponse;
|
||||||
});
|
console.log(`${getTimestamp()} [INFO] Processed search query: ${searchQuery}, response: ${searchResponse}`);
|
||||||
});
|
} else {
|
||||||
} catch (err) {
|
console.error(`${getTimestamp()} [ERROR] Conversation history is unexpectedly empty for: ${ip}`);
|
||||||
console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err);
|
}
|
||||||
|
}).catch(err => {
|
||||||
|
console.error(`${getTimestamp()} [ERROR] Failed to perform Google search: ${searchQuery}`, err);
|
||||||
return res.status(500).json({
|
return res.status(500).json({
|
||||||
message: "An unexpected error occurred",
|
message: "An error occurred while performing Google search",
|
||||||
error: err.message
|
error: err.message
|
||||||
});
|
});
|
||||||
}
|
});
|
||||||
}
|
} catch (err) {
|
||||||
|
console.error(`${getTimestamp()} [ERROR] An unexpected error occurred:`, err);
|
||||||
|
return res.status(500).json({
|
||||||
|
message: "An unexpected error occurred",
|
||||||
|
error: err.message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// End Plugins ---
|
// End Plugins ---
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user