Enhanced image analysis capabilities with improved handlers and default model setup
This commit is contained in:
16
src/index.ts
16
src/index.ts
@@ -5,14 +5,17 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|||||||
|
|
||||||
import { ToolHandlers } from './tool-handlers.js';
|
import { ToolHandlers } from './tool-handlers.js';
|
||||||
|
|
||||||
|
// Define the default model to use when none is specified
|
||||||
|
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
class OpenRouterMultimodalServer {
|
class OpenRouterMultimodalServer {
|
||||||
private server: Server;
|
private server: Server;
|
||||||
private toolHandlers!: ToolHandlers; // Using definite assignment assertion
|
private toolHandlers!: ToolHandlers; // Using definite assignment assertion
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
// Get API key and default model from environment variables
|
// Retrieve API key and default model from environment variables
|
||||||
const apiKey = process.env.OPENROUTER_API_KEY;
|
const apiKey = process.env.OPENROUTER_API_KEY;
|
||||||
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL;
|
const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
|
||||||
|
|
||||||
// Check if API key is provided
|
// Check if API key is provided
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
@@ -55,11 +58,10 @@ class OpenRouterMultimodalServer {
|
|||||||
console.error('Using API key from environment variable');
|
console.error('Using API key from environment variable');
|
||||||
console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:');
|
console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:');
|
||||||
console.error(' OPENROUTER_API_KEY=your-api-key');
|
console.error(' OPENROUTER_API_KEY=your-api-key');
|
||||||
if (process.env.OPENROUTER_DEFAULT_MODEL) {
|
|
||||||
console.error(` Using default model: ${process.env.OPENROUTER_DEFAULT_MODEL}`);
|
const modelDisplay = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
|
||||||
} else {
|
console.error(` Using default model: ${modelDisplay}`);
|
||||||
console.error(' No default model set. You will need to specify a model in each request.');
|
console.error('Server is ready to process tool calls. Waiting for input...');
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/sea
|
|||||||
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
||||||
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
||||||
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
||||||
|
import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
|
||||||
|
|
||||||
export class ToolHandlers {
|
export class ToolHandlers {
|
||||||
private server: Server;
|
private server: Server;
|
||||||
@@ -51,7 +52,7 @@ export class ToolHandlers {
|
|||||||
tools: [
|
tools: [
|
||||||
// Chat Completion Tool
|
// Chat Completion Tool
|
||||||
{
|
{
|
||||||
name: 'chat_completion',
|
name: 'mcp_openrouter_chat_completion',
|
||||||
description: 'Send a message to OpenRouter.ai and get a response',
|
description: 'Send a message to OpenRouter.ai and get a response',
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
@@ -127,10 +128,34 @@ export class ToolHandlers {
|
|||||||
maxContextTokens: 200000
|
maxContextTokens: 200000
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Single Image Analysis Tool
|
||||||
|
{
|
||||||
|
name: 'mcp_openrouter_analyze_image',
|
||||||
|
description: 'Analyze an image using OpenRouter vision models',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
image_path: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'Path to the image file to analyze (must be an absolute path)',
|
||||||
|
},
|
||||||
|
question: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'Question to ask about the image',
|
||||||
|
},
|
||||||
|
model: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['image_path'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
// Multi-Image Analysis Tool
|
// Multi-Image Analysis Tool
|
||||||
{
|
{
|
||||||
name: 'multi_image_analysis',
|
name: 'mcp_openrouter_multi_image_analysis',
|
||||||
description: 'Analyze one or more images with a prompt and receive detailed responses',
|
description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -269,14 +294,21 @@ export class ToolHandlers {
|
|||||||
|
|
||||||
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||||
switch (request.params.name) {
|
switch (request.params.name) {
|
||||||
case 'chat_completion':
|
case 'mcp_openrouter_chat_completion':
|
||||||
return handleChatCompletion({
|
return handleChatCompletion({
|
||||||
params: {
|
params: {
|
||||||
arguments: request.params.arguments as unknown as ChatCompletionToolRequest
|
arguments: request.params.arguments as unknown as ChatCompletionToolRequest
|
||||||
}
|
}
|
||||||
}, this.openai, this.defaultModel);
|
}, this.openai, this.defaultModel);
|
||||||
|
|
||||||
case 'multi_image_analysis':
|
case 'mcp_openrouter_analyze_image':
|
||||||
|
return handleAnalyzeImage({
|
||||||
|
params: {
|
||||||
|
arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
|
||||||
|
}
|
||||||
|
}, this.openai, this.defaultModel);
|
||||||
|
|
||||||
|
case 'mcp_openrouter_multi_image_analysis':
|
||||||
return handleMultiImageAnalysis({
|
return handleMultiImageAnalysis({
|
||||||
params: {
|
params: {
|
||||||
arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest
|
arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest
|
||||||
|
|||||||
@@ -4,10 +4,13 @@ import sharp from 'sharp';
|
|||||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import fetch from 'node-fetch';
|
import fetch from 'node-fetch';
|
||||||
|
import { findSuitableFreeModel } from './multi-image-analysis.js';
|
||||||
|
|
||||||
|
// Default model for image analysis
|
||||||
|
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
export interface AnalyzeImageToolRequest {
|
export interface AnalyzeImageToolRequest {
|
||||||
image_path?: string;
|
image_path: string;
|
||||||
image_url?: string;
|
|
||||||
question?: string;
|
question?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
}
|
}
|
||||||
@@ -83,6 +86,70 @@ async function processImage(buffer: Buffer): Promise<string> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts the image at the given path to a base64 string
|
||||||
|
*/
|
||||||
|
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
||||||
|
try {
|
||||||
|
// Ensure the image path is absolute
|
||||||
|
if (!path.isAbsolute(imagePath)) {
|
||||||
|
throw new McpError(
|
||||||
|
ErrorCode.InvalidParams,
|
||||||
|
'Image path must be absolute'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the file exists
|
||||||
|
try {
|
||||||
|
await fs.access(imagePath);
|
||||||
|
} catch (error) {
|
||||||
|
throw new McpError(
|
||||||
|
ErrorCode.InvalidParams,
|
||||||
|
`File not found: ${imagePath}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the file as a buffer
|
||||||
|
const buffer = await fs.readFile(imagePath);
|
||||||
|
|
||||||
|
// Determine MIME type from file extension
|
||||||
|
const extension = path.extname(imagePath).toLowerCase();
|
||||||
|
let mimeType: string;
|
||||||
|
|
||||||
|
switch (extension) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
case '.gif':
|
||||||
|
mimeType = 'image/gif';
|
||||||
|
break;
|
||||||
|
case '.bmp':
|
||||||
|
mimeType = 'image/bmp';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
mimeType = 'application/octet-stream';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert buffer to base64
|
||||||
|
const base64 = buffer.toString('base64');
|
||||||
|
|
||||||
|
return { base64, mimeType };
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handler for analyzing a single image
|
||||||
|
*/
|
||||||
export async function handleAnalyzeImage(
|
export async function handleAnalyzeImage(
|
||||||
request: { params: { arguments: AnalyzeImageToolRequest } },
|
request: { params: { arguments: AnalyzeImageToolRequest } },
|
||||||
openai: OpenAI,
|
openai: OpenAI,
|
||||||
@@ -91,71 +158,62 @@ export async function handleAnalyzeImage(
|
|||||||
const args = request.params.arguments;
|
const args = request.params.arguments;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Validate image source
|
// Validate inputs
|
||||||
const imagePath = args.image_path;
|
if (!args.image_path) {
|
||||||
const imageUrl = args.image_url;
|
throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
|
||||||
|
|
||||||
if (!imagePath && !imageUrl) {
|
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'Either image_path or image_url must be provided');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize the path/url
|
if (!args.question) {
|
||||||
let imageSource: string;
|
throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
|
||||||
|
|
||||||
if (imageUrl) {
|
|
||||||
// Use the provided URL directly
|
|
||||||
imageSource = imageUrl;
|
|
||||||
} else if (imagePath) {
|
|
||||||
// For backward compatibility, try to handle the image_path
|
|
||||||
if (path.isAbsolute(imagePath)) {
|
|
||||||
// For absolute paths, use as a local file path
|
|
||||||
imageSource = imagePath;
|
|
||||||
} else {
|
|
||||||
// For relative paths, show a better error message
|
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute or use image_url with file:// prefix');
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// This shouldn't happen due to the check above, but TypeScript doesn't know that
|
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'No image source provided');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch and process the image
|
console.error(`Processing image: ${args.image_path}`);
|
||||||
const imageBuffer = await fetchImageAsBuffer(imageSource);
|
|
||||||
console.error(`Successfully read image buffer of size: ${imageBuffer.length}`);
|
|
||||||
|
|
||||||
// Process the image (resize if needed)
|
// Convert the image to base64
|
||||||
const base64Image = await processImage(imageBuffer);
|
const { base64, mimeType } = await imageToBase64(args.image_path);
|
||||||
|
|
||||||
// Select model
|
// Create the content array for the OpenAI API
|
||||||
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
|
const content = [
|
||||||
|
|
||||||
// Prepare message with image
|
|
||||||
const messages = [
|
|
||||||
{
|
{
|
||||||
role: 'user',
|
type: 'text',
|
||||||
content: [
|
text: args.question
|
||||||
{
|
},
|
||||||
type: 'text',
|
{
|
||||||
text: args.question || "What's in this image?"
|
type: 'image_url',
|
||||||
},
|
image_url: {
|
||||||
{
|
url: `data:${mimeType};base64,${base64}`
|
||||||
type: 'image_url',
|
}
|
||||||
image_url: {
|
|
||||||
url: `data:image/jpeg;base64,${base64Image}`
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
console.error('Sending request to OpenRouter...');
|
// Select model with priority:
|
||||||
|
// 1. User-specified model
|
||||||
|
// 2. Default model from environment
|
||||||
|
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
||||||
|
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||||
|
|
||||||
// Call OpenRouter API
|
// If a model is specified but not our default free model, verify it exists
|
||||||
|
if (model !== DEFAULT_FREE_MODEL) {
|
||||||
|
try {
|
||||||
|
await openai.models.retrieve(model);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Specified model ${model} not found, falling back to auto-selection`);
|
||||||
|
model = await findSuitableFreeModel(openai);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(`Making API call with model: ${model}`);
|
||||||
|
|
||||||
|
// Make the API call
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
messages: messages as any,
|
messages: [{
|
||||||
|
role: 'user',
|
||||||
|
content
|
||||||
|
}] as any
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Return the analysis result
|
||||||
return {
|
return {
|
||||||
content: [
|
content: [
|
||||||
{
|
{
|
||||||
@@ -163,9 +221,13 @@ export async function handleAnalyzeImage(
|
|||||||
text: completion.choices[0].message.content || '',
|
text: completion.choices[0].message.content || '',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
metadata: {
|
||||||
|
model: completion.model,
|
||||||
|
usage: completion.usage
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error analyzing image:', error);
|
console.error('Error in image analysis:', error);
|
||||||
|
|
||||||
if (error instanceof McpError) {
|
if (error instanceof McpError) {
|
||||||
throw error;
|
throw error;
|
||||||
@@ -179,6 +241,10 @@ export async function handleAnalyzeImage(
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
isError: true,
|
isError: true,
|
||||||
|
metadata: {
|
||||||
|
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
||||||
|
error_message: error instanceof Error ? error.message : String(error)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,26 @@ import fetch from 'node-fetch';
|
|||||||
import sharp from 'sharp';
|
import sharp from 'sharp';
|
||||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
|
import path from 'path';
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
|
// Remove uuid import as we'll use a simple random string generator instead
|
||||||
|
// import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
|
// Default model for image analysis
|
||||||
|
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
|
// Image processing constants
|
||||||
|
const MAX_DIMENSION = 800;
|
||||||
|
const JPEG_QUALITY = 80;
|
||||||
|
const MAX_RETRY_ATTEMPTS = 3;
|
||||||
|
const RETRY_DELAY = 1000; // ms
|
||||||
|
|
||||||
|
// Simple random ID generator to replace uuid
|
||||||
|
function generateRandomId(): string {
|
||||||
|
return Math.random().toString(36).substring(2, 15) +
|
||||||
|
Math.random().toString(36).substring(2, 15);
|
||||||
|
}
|
||||||
|
|
||||||
export interface MultiImageAnalysisToolRequest {
|
export interface MultiImageAnalysisToolRequest {
|
||||||
images: Array<{
|
images: Array<{
|
||||||
@@ -13,45 +33,139 @@ export interface MultiImageAnalysisToolRequest {
|
|||||||
model?: string;
|
model?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sleep function for retry mechanisms
|
||||||
|
*/
|
||||||
|
const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get MIME type from file extension or data URL
|
||||||
|
*/
|
||||||
|
function getMimeType(url: string): string {
|
||||||
|
if (url.startsWith('data:')) {
|
||||||
|
const match = url.match(/^data:([^;]+);/);
|
||||||
|
return match ? match[1] : 'application/octet-stream';
|
||||||
|
}
|
||||||
|
|
||||||
|
const extension = path.extname(url.split('?')[0]).toLowerCase();
|
||||||
|
|
||||||
|
switch (extension) {
|
||||||
|
case '.png': return 'image/png';
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg': return 'image/jpeg';
|
||||||
|
case '.webp': return 'image/webp';
|
||||||
|
case '.gif': return 'image/gif';
|
||||||
|
case '.bmp': return 'image/bmp';
|
||||||
|
case '.svg': return 'image/svg+xml';
|
||||||
|
default: return 'application/octet-stream';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch image from various sources: data URLs, file paths, or remote URLs
|
||||||
|
*/
|
||||||
async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
||||||
try {
|
try {
|
||||||
// Handle data URLs
|
// Handle data URLs
|
||||||
if (url.startsWith('data:')) {
|
if (url.startsWith('data:')) {
|
||||||
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
|
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
|
||||||
if (!matches || matches.length !== 3) {
|
if (!matches || matches.length !== 3) {
|
||||||
throw new Error('Invalid data URL');
|
throw new Error('Invalid data URL format');
|
||||||
}
|
}
|
||||||
return Buffer.from(matches[2], 'base64');
|
return Buffer.from(matches[2], 'base64');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle file URLs
|
// Handle file URLs with file:// protocol
|
||||||
if (url.startsWith('file://')) {
|
if (url.startsWith('file://')) {
|
||||||
const filePath = url.replace('file://', '');
|
const filePath = url.replace('file://', '');
|
||||||
const fs = await import('fs/promises');
|
try {
|
||||||
return await fs.readFile(filePath);
|
return await fs.readFile(filePath);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error reading file at ${filePath}:`, error);
|
||||||
|
throw new Error(`Failed to read file: ${filePath}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle absolute and relative file paths
|
||||||
|
if (url.startsWith('/') || url.startsWith('./') || url.startsWith('../') || /^[A-Za-z]:\\/.test(url)) {
|
||||||
|
try {
|
||||||
|
return await fs.readFile(url);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error reading file at ${url}:`, error);
|
||||||
|
throw new Error(`Failed to read file: ${url}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle http/https URLs
|
// Handle http/https URLs
|
||||||
const response = await fetch(url);
|
if (url.startsWith('http://') || url.startsWith('https://')) {
|
||||||
if (!response.ok) {
|
for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) {
|
||||||
throw new Error(`HTTP error! status: ${response.status}`);
|
try {
|
||||||
|
// Use AbortController for timeout instead of timeout option
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 15000);
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'OpenRouter-MCP-Server/1.0'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Clear the timeout to prevent memory leaks
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Buffer.from(await response.arrayBuffer());
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error fetching URL (attempt ${attempt + 1}/${MAX_RETRY_ATTEMPTS}): ${url}`, error);
|
||||||
|
|
||||||
|
if (attempt < MAX_RETRY_ATTEMPTS - 1) {
|
||||||
|
// Exponential backoff with jitter
|
||||||
|
const delay = RETRY_DELAY * Math.pow(2, attempt) * (0.5 + Math.random() * 0.5);
|
||||||
|
await sleep(delay);
|
||||||
|
} else {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return Buffer.from(await response.arrayBuffer());
|
|
||||||
|
// If we get here, the URL format is unsupported
|
||||||
|
throw new Error(`Unsupported URL format: ${url}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error fetching image from ${url}:`, error);
|
console.error(`Error fetching image from ${url}:`, error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TypeScript requires a return statement here, but this is unreachable
|
||||||
|
return Buffer.from([]);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function processImage(buffer: Buffer): Promise<string> {
|
/**
|
||||||
|
* Process and optimize image for API consumption
|
||||||
|
*/
|
||||||
|
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
||||||
try {
|
try {
|
||||||
// Get image metadata
|
// Create a temporary directory for processing if needed
|
||||||
const metadata = await sharp(buffer).metadata();
|
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
|
||||||
|
await fs.mkdir(tempDir, { recursive: true });
|
||||||
|
|
||||||
// Calculate dimensions to keep base64 size reasonable
|
// Get image info
|
||||||
const MAX_DIMENSION = 800;
|
let sharpInstance = sharp(buffer);
|
||||||
const JPEG_QUALITY = 80;
|
const metadata = await sharpInstance.metadata();
|
||||||
|
|
||||||
|
// Skip processing for small images
|
||||||
|
if (metadata.width && metadata.height &&
|
||||||
|
metadata.width <= MAX_DIMENSION &&
|
||||||
|
metadata.height <= MAX_DIMENSION &&
|
||||||
|
(mimeType === 'image/jpeg' || mimeType === 'image/webp')) {
|
||||||
|
return buffer.toString('base64');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resize larger images
|
||||||
if (metadata.width && metadata.height) {
|
if (metadata.width && metadata.height) {
|
||||||
const largerDimension = Math.max(metadata.width, metadata.height);
|
const largerDimension = Math.max(metadata.width, metadata.height);
|
||||||
if (largerDimension > MAX_DIMENSION) {
|
if (largerDimension > MAX_DIMENSION) {
|
||||||
@@ -59,44 +173,65 @@ async function processImage(buffer: Buffer): Promise<string> {
|
|||||||
? { width: MAX_DIMENSION }
|
? { width: MAX_DIMENSION }
|
||||||
: { height: MAX_DIMENSION };
|
: { height: MAX_DIMENSION };
|
||||||
|
|
||||||
const resizedBuffer = await sharp(buffer)
|
sharpInstance = sharpInstance.resize(resizeOptions);
|
||||||
.resize(resizeOptions)
|
|
||||||
.jpeg({ quality: JPEG_QUALITY })
|
|
||||||
.toBuffer();
|
|
||||||
|
|
||||||
return resizedBuffer.toString('base64');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no resizing needed, just convert to JPEG
|
// Convert to JPEG for consistency and small size
|
||||||
const jpegBuffer = await sharp(buffer)
|
const processedBuffer = await sharpInstance
|
||||||
.jpeg({ quality: JPEG_QUALITY })
|
.jpeg({ quality: JPEG_QUALITY })
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
|
||||||
return jpegBuffer.toString('base64');
|
return processedBuffer.toString('base64');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error processing image:', error);
|
console.error('Error processing image:', error);
|
||||||
throw error;
|
|
||||||
|
// If sharp processing fails, return the original buffer
|
||||||
|
// This is a fallback to ensure we don't completely fail on processing errors
|
||||||
|
console.error('Returning original image without processing');
|
||||||
|
return buffer.toString('base64');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find a suitable free model with vision capabilities
|
/**
|
||||||
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
* Find a suitable free model with vision capabilities, defaulting to Qwen
|
||||||
|
*/
|
||||||
|
export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||||
try {
|
try {
|
||||||
// Query available models with 'free' in their name
|
// First try with an exact match for our preferred model
|
||||||
const modelsResponse = await openai.models.list();
|
const preferredModel = DEFAULT_FREE_MODEL;
|
||||||
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
|
||||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
try {
|
||||||
|
// Check if our preferred model is available
|
||||||
|
const modelInfo = await openai.models.retrieve(preferredModel);
|
||||||
|
if (modelInfo && modelInfo.id) {
|
||||||
|
console.error(`Using preferred model: ${preferredModel}`);
|
||||||
|
return preferredModel;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Preferred model ${preferredModel} not available, searching for alternatives...`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter models with 'free' in ID and multimodal capabilities
|
// Query available models
|
||||||
const freeModels = modelsResponse.data
|
const modelsResponse = await openai.models.list();
|
||||||
.filter(model => model.id.includes('free'))
|
if (!modelsResponse?.data || modelsResponse.data.length === 0) {
|
||||||
|
console.error('No models found, using default fallback model');
|
||||||
|
return DEFAULT_FREE_MODEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, try to find free vision models
|
||||||
|
const freeVisionModels = modelsResponse.data
|
||||||
|
.filter(model => {
|
||||||
|
const modelId = model.id.toLowerCase();
|
||||||
|
return modelId.includes('free') &&
|
||||||
|
(modelId.includes('vl') || modelId.includes('vision') || modelId.includes('claude') ||
|
||||||
|
modelId.includes('gemini') || modelId.includes('gpt-4') || modelId.includes('qwen'));
|
||||||
|
})
|
||||||
.map(model => {
|
.map(model => {
|
||||||
// Try to extract context length from the model object
|
// Extract context length if available
|
||||||
let contextLength = 0;
|
let contextLength = 0;
|
||||||
try {
|
try {
|
||||||
const modelAny = model as any; // Cast to any to access non-standard properties
|
const modelAny = model as any;
|
||||||
if (typeof modelAny.context_length === 'number') {
|
if (typeof modelAny.context_length === 'number') {
|
||||||
contextLength = modelAny.context_length;
|
contextLength = modelAny.context_length;
|
||||||
} else if (modelAny.context_window) {
|
} else if (modelAny.context_window) {
|
||||||
@@ -112,21 +247,26 @@ async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
if (freeModels.length === 0) {
|
if (freeVisionModels.length > 0) {
|
||||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
|
// Sort by context length and pick the one with the largest context window
|
||||||
|
freeVisionModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||||
|
const selectedModel = freeVisionModels[0].id;
|
||||||
|
console.error(`Selected free vision model: ${selectedModel} with context length: ${freeVisionModels[0].contextLength}`);
|
||||||
|
return selectedModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by context length and pick the one with the largest context window
|
// If no free vision models found, fallback to our default
|
||||||
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
console.error('No free vision models found, using default fallback model');
|
||||||
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
return DEFAULT_FREE_MODEL;
|
||||||
|
|
||||||
return freeModels[0].id;
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error finding suitable free model:', error);
|
console.error('Error finding suitable model:', error);
|
||||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
return DEFAULT_FREE_MODEL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process and analyze multiple images using OpenRouter
|
||||||
|
*/
|
||||||
export async function handleMultiImageAnalysis(
|
export async function handleMultiImageAnalysis(
|
||||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||||
openai: OpenAI,
|
openai: OpenAI,
|
||||||
@@ -150,43 +290,65 @@ export async function handleMultiImageAnalysis(
|
|||||||
text: args.prompt
|
text: args.prompt
|
||||||
}];
|
}];
|
||||||
|
|
||||||
|
// Track successful and failed images for reporting
|
||||||
|
const successfulImages = [];
|
||||||
|
const failedImages = [];
|
||||||
|
|
||||||
// Process each image
|
// Process each image
|
||||||
for (const image of args.images) {
|
for (const [index, image] of args.images.entries()) {
|
||||||
try {
|
try {
|
||||||
|
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
|
||||||
|
|
||||||
|
// Get MIME type
|
||||||
|
const mimeType = getMimeType(image.url);
|
||||||
|
|
||||||
// Fetch and process the image
|
// Fetch and process the image
|
||||||
const imageBuffer = await fetchImageAsBuffer(image.url);
|
const imageBuffer = await fetchImageAsBuffer(image.url);
|
||||||
const base64Image = await processImage(imageBuffer);
|
const base64Image = await processImage(imageBuffer, mimeType);
|
||||||
|
|
||||||
|
// Use JPEG as the output format for consistency
|
||||||
|
const outputMimeType = 'image/jpeg';
|
||||||
|
|
||||||
// Add to content
|
// Add to content
|
||||||
content.push({
|
content.push({
|
||||||
type: 'image_url',
|
type: 'image_url',
|
||||||
image_url: {
|
image_url: {
|
||||||
url: `data:image/jpeg;base64,${base64Image}`
|
url: `data:${outputMimeType};base64,${base64Image}`
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
successfulImages.push(image.url);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error processing image ${image.url}:`, error);
|
console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
|
||||||
|
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
|
||||||
// Continue with other images if one fails
|
// Continue with other images if one fails
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no images were successfully processed
|
// If no images were successfully processed
|
||||||
if (content.length === 1) {
|
if (content.length === 1) {
|
||||||
throw new Error('Failed to process any of the provided images');
|
const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
|
||||||
|
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select model with priority:
|
// Select model with priority:
|
||||||
// 1. User-specified model
|
// 1. User-specified model
|
||||||
// 2. Default model from environment
|
// 2. Default model from environment
|
||||||
// 3. Free model with vision capabilities (selected automatically)
|
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
||||||
let model = args.model || defaultModel;
|
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||||
|
|
||||||
if (!model) {
|
// If a model is specified but not our default free model, verify it exists
|
||||||
model = await findSuitableFreeModel(openai);
|
if (model !== DEFAULT_FREE_MODEL) {
|
||||||
console.error(`Using auto-selected model: ${model}`);
|
try {
|
||||||
|
await openai.models.retrieve(model);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Specified model ${model} not found, falling back to auto-selection`);
|
||||||
|
model = await findSuitableFreeModel(openai);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.error(`Making API call with model: ${model}`);
|
console.error(`Making API call with model: ${model}`);
|
||||||
|
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
|
||||||
|
|
||||||
// Make the API call
|
// Make the API call
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
@@ -197,13 +359,32 @@ export async function handleMultiImageAnalysis(
|
|||||||
}] as any
|
}] as any
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Format the response
|
||||||
|
let responseText = completion.choices[0].message.content || '';
|
||||||
|
|
||||||
|
// Add information about failed images if any
|
||||||
|
if (failedImages.length > 0) {
|
||||||
|
const formattedErrors = args.markdown_response !== false
|
||||||
|
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
|
||||||
|
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
|
||||||
|
|
||||||
|
responseText += formattedErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the analysis result
|
||||||
return {
|
return {
|
||||||
content: [
|
content: [
|
||||||
{
|
{
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: completion.choices[0].message.content || '',
|
text: responseText,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
metadata: {
|
||||||
|
model: completion.model,
|
||||||
|
usage: completion.usage,
|
||||||
|
successful_images: successfulImages.length,
|
||||||
|
failed_images: failedImages.length
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error in multi-image analysis:', error);
|
console.error('Error in multi-image analysis:', error);
|
||||||
@@ -220,6 +401,10 @@ export async function handleMultiImageAnalysis(
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
isError: true,
|
isError: true,
|
||||||
|
metadata: {
|
||||||
|
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
||||||
|
error_message: error instanceof Error ? error.message : String(error)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user