AWS Lambda Proxy For AI Services
#AI Service Providers like OpenAI and AWS offer Large Language Models, Speech To Text, and Text To Speech services.
I prefer to build AI-based applications that run strictly in-browser and that interact directly with the APIs of these services.
But sometimes in-browser isn't enough, and I need a lightweight backend.
To keep it as lightweight and as easy-to-manage as possible, I currently use a single AWS Lambda with an exposed function URL.
It has the following features:
- Protects calls with a faux bearer token, a simple password.
- Proxies calls to OpenAI endpoints: models, completions, transcriptions, and speech.
- Exposes OpenAI-style endpoints for AWS Bedrock & Polly: models, completions, and speech.
- Supports streaming response for both text and audio.
Prerequisites & Lambda Configuration
#- Navigate to Bedrock in the AWS Console, and request access to LLMs (usually takes only a minute or two for requests to go through)
- Create an AWS Lambda with:
- A Node.js 20.x runtime.
- A function URL.
- An invoke mode of RESPONSE_STREAM.
- A CORS setting that allows all (*) for origin, headers, methods, and credentials.
- An IAM Execution Role with policies for Bedrock (AmazonBedrockFullAcess) and Polly (AmazonPollyFullAccess).
- Environment variables for PASSWORD and OPENAI_API_KEY.
Code
#
// Import AWS SDK clients and dependencies, which should already be available in the Lambda environment.
import { BedrockRuntimeClient, ConverseStreamCommand, ConverseCommand } from "@aws-sdk/client-bedrock-runtime";
import { ListFoundationModelsCommand, BedrockClient } from "@aws-sdk/client-bedrock";
import { PollyClient, SynthesizeSpeechCommand } from "@aws-sdk/client-polly";
// Import Readable for more easily streaming responses (currently only used for Polly audio streaming)
import { Readable } from 'stream';
// Import fetch for handling HTTP/2, which built-in Node.js fetch does not yet fully support
// For simplicity's sake, I built node-fetch locally and literally uploaded the file to the lambda
import fetch from './node-fetch.mjs';
// Pull in PASSWORD from environment variable to protect API endpoints with a faux bearer token
const PASSWORD = process?.env?.PASSWORD;
// Pull in OPENAI_API_KEY from environment variable to use OpenAI API
const OPENAI_API_KEY = process?.env?.OPENAI_API_KEY;
// Define API endpoints in the style of OpenAI
// Prefix with /provider/ to differentiate between different providers
const apiEndpoints = {
'/open-ai/models': {
method: ['GET'],
handler: proxyTo('https://api.openai.com/v1/models', OPENAI_API_KEY),
protection: 'password'
},
'/open-ai/chat/completions': {
method: ['POST'],
handler: proxyTo('https://api.openai.com/v1/chat/completions', OPENAI_API_KEY),
protection: 'password'
},
'/open-ai/transcriptions': {
method: ['POST'],
handler: proxyTo('https://api.openai.com/v1/audio/transcriptions', OPENAI_API_KEY),
protection: 'password'
},
'/open-ai/audio/speech': {
method: ['POST'],
handler: proxyTo('https://api.openai.com/v1/audio/speech', OPENAI_API_KEY, true),
protection: 'password'
},
'/bedrock/chat/completions': {
method: ['POST'],
handler: bedrockConverse(),
protection: 'password'
},
'/bedrock/models': {
method: ['GET'],
handler: bedrockListModels(),
protection: 'password'
},
'/bedrock/audio/speech': {
method: ['POST'],
handler: pollySpeak(),
protection: 'password'
},
};
// Use the arcane streamifyResponse function to handle streaming responses from the Lambda
export const handler = awslambda.streamifyResponse(async (event, responseStream, _context) => {
try {
await processAPIRequest(event, responseStream);
} catch (error) {
errorResponse(responseStream, 500, error.message);
}
});
// Route to the appropriate API endpoint based on the path and method
// Enforce password protection if necessary
async function processAPIRequest(event, responseStream) {
const path = event?.rawPath;
const method = event?.requestContext?.http?.method;
const endpoint = apiEndpoints[path];
if (!endpoint || !endpoint.method.includes(method)) {
errorResponse(responseStream, 404, 'Not Found');
return;
}
if (endpoint.protection === 'password') {
const authHeader = event.headers.authorization;
if (authHeader !== 'Bearer ' + PASSWORD) {
errorResponse(responseStream, 401, 'Unauthorized - Invalid password');
return;
}
}
await endpoint.handler(event, responseStream);
}
// Handle interactions with Bedrock LLMs via AWS SDKs, reformatting responses to match OpenAI API specs
function bedrockConverse() {
return async (event, responseStream) => {
const client = new BedrockRuntimeClient({ region: "us-east-1" });
const { model, messages, stream, inferenceConfig } = JSON.parse(event.body);
const formattedMessages = messages.map(({ role, content }) => ({ role, content: [{ text: content }] }));
try {
if(stream) {
const command = new ConverseStreamCommand({
modelId: model,
messages: formattedMessages,
inferenceConfig: inferenceConfig || { maxTokens: 4096, temperature: 0.5, topP: 0.9 },
});
const response = await client.send(command);
for await (const item of response.stream) {
if (item.contentBlockDelta) {
const responseText = item.contentBlockDelta.delta?.text;
const partialCompletion = reformatPlainTextToOpenAIPartialCompletion(responseText, model);
responseStream.write(partialCompletion);
}
}
responseStream.write('data: [DONE]');
} else {
const command = new ConverseCommand({
modelId: model,
messages: formattedMessages,
inferenceConfig: inferenceConfig || { maxTokens: 4096, temperature: 0.5, topP: 0.9 },
});
const response = await client.send(command);
const reformatted = reformatBedrockCompletionToOpenAI(response, model);
successResponse(responseStream, reformatted);
}
} catch (err) {
console.log(`ERROR: Can't invoke '${model}'. Reason: ${err}`);
errorResponse(responseStream, 500, `Error invoking model: ${err.message}`);
} finally {
responseStream.end();
}
};
}
// Handle listing of Bedrock models via AWS SDKs, reformatting responses to match OpenAI API specs
function bedrockListModels() {
return async (event, responseStream) => {
const client = new BedrockClient({ region: "us-east-1" });
const input = { byOutputModality: "TEXT" };
try {
const response = await client.send(new ListFoundationModelsCommand(input));
const reformatted = reformatBedrockListToOpenAI(response);
successResponse(responseStream, reformatted);
} catch (err) {
errorResponse(responseStream, 500, `Error listing models: ${err.message}`);
}
};
}
// Handle speech synthesis via AWS SDKs, streaming the audio response
function pollySpeak() {
return async (event, responseStream) => {
const body = event.body ? JSON.parse(event.body) : event;
const { input } = body;
const client = new PollyClient({ region: "us-east-1" });
const parameters = {
Engine: 'generative',
OutputFormat: "mp3",
Text: input,
TextType: "text",
VoiceId: 'Ruth'
};
const command = new SynthesizeSpeechCommand(parameters);
const pollyResponse = await client.send(command);
const pollyStream = pollyResponse.AudioStream;
responseStream.write(JSON.stringify({
statusCode: 200,
headers: {
'Content-Type': 'audio/mpeg',
}
}));
responseStream.write('\n');
Readable.fromWeb(Readable.toWeb(pollyStream)).pipe(responseStream);
};
}
// Reformat Bedrock LLM response to match OpenAI API specs - non-streaming
function reformatBedrockCompletionToOpenAI(bedrockResponse, modelId) {
return {
id: bedrockResponse.$metadata.requestId,
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: modelId,
choices: [
{
index: 0,
message: {
role: bedrockResponse.output.message.role,
content: bedrockResponse.output.message.content[0].text
},
finish_reason: bedrockResponse.stopReason === "end_turn" ? "stop" : bedrockResponse.stopReason
}
],
usage: {
prompt_tokens: bedrockResponse.usage.inputTokens,
completion_tokens: bedrockResponse.usage.outputTokens,
total_tokens: bedrockResponse.usage.totalTokens
}
};
}
// Reformat bedrock LLM response to match OpenAI API specs - streaming
function reformatPlainTextToOpenAIPartialCompletion(text, modelId){
const json = {
id: "partial-completion",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: modelId,
choices: [
{
index: 0,
delta: {
content: text
},
finish_reason: null
}
]
};
const jsonString = JSON.stringify(json);
return 'data: ' + jsonString + '\n';
}
// Reformat Bedrock LLM model list to match OpenAI API specs
function reformatBedrockListToOpenAI(bedrockResponse) {
return {
object: "list",
data: bedrockResponse.modelSummaries.map(model => ({
id: model.modelId,
object: "model",
created: Math.floor(Date.now() / 1000),
owned_by: model.providerName || "system"
}))
};
}
// When proxying, filter out headers that could cause issues
function filterHeaders(headers) {
const allowedHeaders = ['content-type', 'content-length', 'accept', 'accept-encoding', 'accept-language'];
return Object.fromEntries(
Object.entries(headers)
.filter(([key]) => allowedHeaders.includes(key))
);
}
// Proxy requests to external APIs like OpenAI, adding an API key if necessary
// A bit of a mess, honestly, but it works
function proxyTo(url, apiKey) {
return async (event, responseStream) => {
const allowedHeaders = filterHeaders(event.headers);
const { method } = event.requestContext.http;
const isMultipart = allowedHeaders['content-type']?.includes('multipart');
const requestBody = isMultipart ? handleMultipartRequest(event.body) : event.body;
try {
const response = await fetch(url, {
method,
headers: {
...allowedHeaders,
Authorization: `Bearer ${apiKey}`,
},
body: requestBody,
});
let stream = false;
if(method === 'POST' && requestBody){
const requestJson = JSON.parse(requestBody);
stream = requestJson.stream;
}
// If url includes audio, stream is true
if(url.includes('audio')){
stream = true;
}
if (stream) {
console.log('streaming response');
for await (const chunk of response.body) {
responseStream.write(chunk);
}
responseStream.end();
} else {
console.log('non-streaming response');
const responseContentType = response.headers.get('content-type');
if (responseContentType?.includes('audio/mpeg')) {
const audioBody = await response.arrayBuffer();
responseStream.write(Buffer.from(audioBody).toString('base64'));
responseStream.end();
} else if (!response.ok) {
errorResponse(responseStream, response.status, response.statusText);
} else {
const jsonResponse = await response.json();
successResponse(responseStream, jsonResponse);
}
}
} catch (err) {
errorResponse(responseStream, 500, `Error proxying request: ${err.message}`);
}
};
}
// For transcription requests
function handleMultipartRequest(body) {
return typeof Buffer !== 'undefined' ? Buffer.from(body, 'base64') : body;
}
// Provide an error response
function errorResponse(responseStream, code, message) {
responseStream.setContentType('application/json');
responseStream.write(JSON.stringify({
statusCode: code,
body: {
"error": message
}
}));
responseStream.end();
}
// Provide a successful response
function successResponse(responseStream, body) {
responseStream.setContentType('application/json');
responseStream.write(JSON.stringify({
...body
}));
responseStream.end();
}