Как разобрать OpenAIStream, чтобы не было пробелов и списки были правильно отформатированы?
У меня настроен API OpenAI для потоковой передачи ответа, и он работает! Проблема в том, что в результатах есть лишние пробелы, и мое форматирование не помогает устранить проблему с пробелами и не анализирует списки. Вот код. Как правильно разобрать эти «куски» после того, как они все получены?
Компонент сообщений этого приложения React:
const handleSubmit = async (event) => {
event.preventDefault();
if (!inputText) return;
const userMessage = {
author: {
id: user.id,
username: user.username
},
text: `${selectedOption} ${inputText}`,
};
setMessages((prevMessages) => [...prevMessages, userMessage]);
scrollToBottom();
localStorage.setItem("messages", JSON.stringify(messages));
let messageChunks = [];
const systemMessage = {
author: {
id: 'system',
username: 'System'
},
text: '',
};
setMessages((prevMessages) => [...prevMessages, systemMessage]);
generateText({
username: `Name: ` + user.username,
inputText: `${selectedOption} ${inputText}`,
},
setShowModal,
(chunk, isInitialChunk, isFinalChunk) => {
messageChunks.push(chunk.trim());
// Update the last system message with the new chunk
setMessages((prevMessages) => {
const lastSystemMessageIndex = prevMessages.length - 1;
const updatedSystemMessage = {
...prevMessages[lastSystemMessageIndex],
text: prevMessages[lastSystemMessageIndex].text + ' ' + chunk.trim(),
};
return [
...prevMessages.slice(0, lastSystemMessageIndex),
updatedSystemMessage,
...prevMessages.slice(lastSystemMessageIndex + 1),
];
});
if (isFinalChunk) {
const formattedMessage = formatText(messageChunks.join(' ').replace(/\s+/g, ' '));
console.log("Formatted text: ", formattedMessage);
setMessages((prevMessages) => {
const lastSystemMessageIndex = prevMessages.length - 1;
const updatedSystemMessage = {
...prevMessages[lastSystemMessageIndex],
text: formattedMessage || prevMessages[lastSystemMessageIndex].text, // Use formattedMessage if it's defined, otherwise use the original text
};
const updatedMessages = [
...prevMessages.slice(0, lastSystemMessageIndex),
updatedSystemMessage,
...prevMessages.slice(lastSystemMessageIndex + 1),
];
return updatedMessages;
});
}
},
true,
true,
true,
false,
false
);
setInputText("");
};
const formatText = (text) => {
let formattedText = text
.replace(/\s+/g, ' ')
.replace(/\s+([.,!?:;])/g, '$1')
.replace(/([.,!?:;])\s+/g, '$1 ');
const numberedListRegex = /(?:\s|^)(\d+)\.\s+([^\d\s][^\n]*)(?:\n|$)/gm;
let match;
const listItems = [];
let lastIndex = 0;
while ((match = numberedListRegex.exec(formattedText)) !== null) {
if (match.index !== lastIndex) {
const textBetweenMatches = formattedText
.slice(lastIndex, match.index)
.replace(/\s{2,}/g, ' ')
.trim();
if (textBetweenMatches) {
listItems.push(textBetweenMatches);
}
lastIndex = numberedListRegex.lastIndex;
}
// Add match[1] before match[2] to include the number in the list item
listItems.push(`<li>${match[1]}. ${match[2].trim().replace(/\s{2,}/g, ' ')}</li>`);
}
if (lastIndex !== formattedText.length) {
listItems.push(formattedText.slice(lastIndex).replace(/\s{2,}/g, ' ').trim());
}
if (listItems.length > 1) {
formattedText = `<ol>${listItems.join('')}</ol>`;
}
formattedText = he.decode(formattedText);
return formattedText;
};
/// OpenAIStream:
const stream = new ReadableStream({
async start(controller) {
// callback
function onParse(event: ParsedEvent | ReconnectInterval) {
if (event.type === "event") {
const data = event.data;
if (data === "[DONE]") {
controller.close();
return;
}
try {
const json = JSON.parse(data);
const text = (json.choices[0].delta?.content || ""); // Remove newline
if (counter < 2 && text.includes("\n")) {
return;
}
const queue = encoder.encode(text);
controller.enqueue(queue);
counter++;
} catch (e) {
// maybe parse error
controller.error(e);
}
}
}
const parser = createParser(onParse);
const reader = res.body.getReader();
while (true) {
const {
value,
done
} = await reader.read();
if (done) break;
parser.feed(decoder.decode(value));
}
},
});
return stream;
}
//// openai's aPI call:
const generateText = async (
params,
setShowModal,
onUpdate,
isSystemMessage = false,
useSecret = false,
includeRecentMessages = false,
waitForCompletion = false,
skipModeration = false
) => {
console.log("variables passed:", params);
try {
const isFlagged = skipModeration ? false : await moderateText(params, setShowModal);
if (!isFlagged) {
const content = Object.values(params).join(" ");
const messages = [{
role: "user",
content: content
}, ];
if (isSystemMessage) {
const secret = useSecret ? process.env.REACT_APP_SECRET : "";
messages.unshift({
role: "system",
content: "generate content for audience of 6-17 year olds, do not refer to ages or young " + secret
});
}
if (includeRecentMessages && params.recentMessages && params.recentMessages.length > 0) {
prompt += "\n\nPrevious Messages:\n";
prompt += params.recentMessages.map(({
author,
text
}) => `${author.username}: ${text}`).join("\n");
prompt += "\n";
}
const payload = {
model: "gpt-3.5-turbo",
messages: messages,
user: params.username,
temperature: 0,
max_tokens: 2000,
};
console.log(payload)
if (waitForCompletion) {
const response = await openai.post("/chat/completions", payload);
console.log(response.data.choices[0].message.content)
return response.data.choices[0].message.content;
} else {
const streamPayload = {
...payload,
stream: true
};
const stream = await OpenAIStream(streamPayload, apiKey);
const reader = stream.getReader();
const decoder = new TextDecoder();
let isFirstChunk = true;
while (true) {
const {
value,
done
} = await reader.read();
if (done) break;
const text = decoder.decode(value);
onUpdate(text.trim(), isFirstChunk, false);
isFirstChunk = false;
}
onUpdate("", false, true);
}