diff --git a/src/index.ts b/src/index.ts index 3ac49d3..585a315 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,14 +5,17 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import { ToolHandlers } from './tool-handlers.js'; +// Define the default model to use when none is specified +const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free'; + class OpenRouterMultimodalServer { private server: Server; private toolHandlers!: ToolHandlers; // Using definite assignment assertion constructor() { - // Get API key and default model from environment variables + // Retrieve API key and default model from environment variables const apiKey = process.env.OPENROUTER_API_KEY; - const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL; + const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL; // Check if API key is provided if (!apiKey) { @@ -55,11 +58,10 @@ class OpenRouterMultimodalServer { console.error('Using API key from environment variable'); console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:'); console.error(' OPENROUTER_API_KEY=your-api-key'); - if (process.env.OPENROUTER_DEFAULT_MODEL) { - console.error(` Using default model: ${process.env.OPENROUTER_DEFAULT_MODEL}`); - } else { - console.error(' No default model set. You will need to specify a model in each request.'); - } + + const modelDisplay = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL; + console.error(` Using default model: ${modelDisplay}`); + console.error('Server is ready to process tool calls. Waiting for input...'); } } diff --git a/src/tool-handlers.ts b/src/tool-handlers.ts index 943332c..6811ed1 100644 --- a/src/tool-handlers.ts +++ b/src/tool-handlers.ts @@ -16,6 +16,7 @@ import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/sea import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js'; import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js'; import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js'; +import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js'; export class ToolHandlers { private server: Server; @@ -51,7 +52,7 @@ export class ToolHandlers { tools: [ // Chat Completion Tool { - name: 'chat_completion', + name: 'mcp_openrouter_chat_completion', description: 'Send a message to OpenRouter.ai and get a response', inputSchema: { type: 'object', @@ -127,10 +128,34 @@ export class ToolHandlers { maxContextTokens: 200000 }, + // Single Image Analysis Tool + { + name: 'mcp_openrouter_analyze_image', + description: 'Analyze an image using OpenRouter vision models', + inputSchema: { + type: 'object', + properties: { + image_path: { + type: 'string', + description: 'Path to the image file to analyze (must be an absolute path)', + }, + question: { + type: 'string', + description: 'Question to ask about the image', + }, + model: { + type: 'string', + description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")', + }, + }, + required: ['image_path'], + }, + }, + // Multi-Image Analysis Tool { - name: 'multi_image_analysis', - description: 'Analyze one or more images with a prompt and receive detailed responses', + name: 'mcp_openrouter_multi_image_analysis', + description: 'Analyze multiple images at once with a single prompt and receive detailed responses', inputSchema: { type: 'object', properties: { @@ -269,14 +294,21 @@ export class ToolHandlers { this.server.setRequestHandler(CallToolRequestSchema, async (request) => { switch (request.params.name) { - case 'chat_completion': + case 'mcp_openrouter_chat_completion': return handleChatCompletion({ params: { arguments: request.params.arguments as unknown as ChatCompletionToolRequest } }, this.openai, this.defaultModel); - case 'multi_image_analysis': + case 'mcp_openrouter_analyze_image': + return handleAnalyzeImage({ + params: { + arguments: request.params.arguments as unknown as AnalyzeImageToolRequest + } + }, this.openai, this.defaultModel); + + case 'mcp_openrouter_multi_image_analysis': return handleMultiImageAnalysis({ params: { arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest diff --git a/src/tool-handlers/analyze-image.ts b/src/tool-handlers/analyze-image.ts index 7f5061d..13e72ef 100644 --- a/src/tool-handlers/analyze-image.ts +++ b/src/tool-handlers/analyze-image.ts @@ -4,10 +4,13 @@ import sharp from 'sharp'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import OpenAI from 'openai'; import fetch from 'node-fetch'; +import { findSuitableFreeModel } from './multi-image-analysis.js'; + +// Default model for image analysis +const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free'; export interface AnalyzeImageToolRequest { - image_path?: string; - image_url?: string; + image_path: string; question?: string; model?: string; } @@ -83,6 +86,70 @@ async function processImage(buffer: Buffer): Promise { } } +/** + * Converts the image at the given path to a base64 string + */ +async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> { + try { + // Ensure the image path is absolute + if (!path.isAbsolute(imagePath)) { + throw new McpError( + ErrorCode.InvalidParams, + 'Image path must be absolute' + ); + } + + // Check if the file exists + try { + await fs.access(imagePath); + } catch (error) { + throw new McpError( + ErrorCode.InvalidParams, + `File not found: ${imagePath}` + ); + } + + // Read the file as a buffer + const buffer = await fs.readFile(imagePath); + + // Determine MIME type from file extension + const extension = path.extname(imagePath).toLowerCase(); + let mimeType: string; + + switch (extension) { + case '.png': + mimeType = 'image/png'; + break; + case '.jpg': + case '.jpeg': + mimeType = 'image/jpeg'; + break; + case '.webp': + mimeType = 'image/webp'; + break; + case '.gif': + mimeType = 'image/gif'; + break; + case '.bmp': + mimeType = 'image/bmp'; + break; + default: + mimeType = 'application/octet-stream'; + } + + // Convert buffer to base64 + const base64 = buffer.toString('base64'); + + return { base64, mimeType }; + } catch (error) { + console.error('Error converting image to base64:', error); + throw error; + } +} + +/** + * Handler for analyzing a single image + */ export async function handleAnalyzeImage( request: { params: { arguments: AnalyzeImageToolRequest } }, openai: OpenAI, @@ -91,71 +158,62 @@ export async function handleAnalyzeImage( const args = request.params.arguments; try { - // Validate image source - const imagePath = args.image_path; - const imageUrl = args.image_url; - - if (!imagePath && !imageUrl) { - throw new McpError(ErrorCode.InvalidParams, 'Either image_path or image_url must be provided'); + // Validate inputs + if (!args.image_path) { + throw new McpError(ErrorCode.InvalidParams, 'An image path is required'); } - // Normalize the path/url - let imageSource: string; - - if (imageUrl) { - // Use the provided URL directly - imageSource = imageUrl; - } else if (imagePath) { - // For backward compatibility, try to handle the image_path - if (path.isAbsolute(imagePath)) { - // For absolute paths, use as a local file path - imageSource = imagePath; - } else { - // For relative paths, show a better error message - throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute or use image_url with file:// prefix'); - } - } else { - // This shouldn't happen due to the check above, but TypeScript doesn't know that - throw new McpError(ErrorCode.InvalidParams, 'No image source provided'); + if (!args.question) { + throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required'); } - // Fetch and process the image - const imageBuffer = await fetchImageAsBuffer(imageSource); - console.error(`Successfully read image buffer of size: ${imageBuffer.length}`); + console.error(`Processing image: ${args.image_path}`); - // Process the image (resize if needed) - const base64Image = await processImage(imageBuffer); + // Convert the image to base64 + const { base64, mimeType } = await imageToBase64(args.image_path); - // Select model - const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet'; - - // Prepare message with image - const messages = [ + // Create the content array for the OpenAI API + const content = [ { - role: 'user', - content: [ - { - type: 'text', - text: args.question || "What's in this image?" - }, - { - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${base64Image}` - } - } - ] + type: 'text', + text: args.question + }, + { + type: 'image_url', + image_url: { + url: `data:${mimeType};base64,${base64}` + } } ]; - console.error('Sending request to OpenRouter...'); + // Select model with priority: + // 1. User-specified model + // 2. Default model from environment + // 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free) + let model = args.model || defaultModel || DEFAULT_FREE_MODEL; - // Call OpenRouter API + // If a model is specified but not our default free model, verify it exists + if (model !== DEFAULT_FREE_MODEL) { + try { + await openai.models.retrieve(model); + } catch (error) { + console.error(`Specified model ${model} not found, falling back to auto-selection`); + model = await findSuitableFreeModel(openai); + } + } + + console.error(`Making API call with model: ${model}`); + + // Make the API call const completion = await openai.chat.completions.create({ model, - messages: messages as any, + messages: [{ + role: 'user', + content + }] as any }); + // Return the analysis result return { content: [ { @@ -163,9 +221,13 @@ export async function handleAnalyzeImage( text: completion.choices[0].message.content || '', }, ], + metadata: { + model: completion.model, + usage: completion.usage + } }; } catch (error) { - console.error('Error analyzing image:', error); + console.error('Error in image analysis:', error); if (error instanceof McpError) { throw error; @@ -179,6 +241,10 @@ export async function handleAnalyzeImage( }, ], isError: true, + metadata: { + error_type: error instanceof Error ? error.constructor.name : 'Unknown', + error_message: error instanceof Error ? error.message : String(error) + } }; } } diff --git a/src/tool-handlers/multi-image-analysis.ts b/src/tool-handlers/multi-image-analysis.ts index 2b7b693..850c45e 100644 --- a/src/tool-handlers/multi-image-analysis.ts +++ b/src/tool-handlers/multi-image-analysis.ts @@ -2,6 +2,26 @@ import fetch from 'node-fetch'; import sharp from 'sharp'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import OpenAI from 'openai'; +import path from 'path'; +import { promises as fs } from 'fs'; +import { tmpdir } from 'os'; +// Remove uuid import as we'll use a simple random string generator instead +// import { v4 as uuidv4 } from 'uuid'; + +// Default model for image analysis +const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free'; + +// Image processing constants +const MAX_DIMENSION = 800; +const JPEG_QUALITY = 80; +const MAX_RETRY_ATTEMPTS = 3; +const RETRY_DELAY = 1000; // ms + +// Simple random ID generator to replace uuid +function generateRandomId(): string { + return Math.random().toString(36).substring(2, 15) + + Math.random().toString(36).substring(2, 15); +} export interface MultiImageAnalysisToolRequest { images: Array<{ @@ -13,45 +33,139 @@ export interface MultiImageAnalysisToolRequest { model?: string; } +/** + * Sleep function for retry mechanisms + */ +const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + +/** + * Get MIME type from file extension or data URL + */ +function getMimeType(url: string): string { + if (url.startsWith('data:')) { + const match = url.match(/^data:([^;]+);/); + return match ? match[1] : 'application/octet-stream'; + } + + const extension = path.extname(url.split('?')[0]).toLowerCase(); + + switch (extension) { + case '.png': return 'image/png'; + case '.jpg': + case '.jpeg': return 'image/jpeg'; + case '.webp': return 'image/webp'; + case '.gif': return 'image/gif'; + case '.bmp': return 'image/bmp'; + case '.svg': return 'image/svg+xml'; + default: return 'application/octet-stream'; + } +} + +/** + * Fetch image from various sources: data URLs, file paths, or remote URLs + */ async function fetchImageAsBuffer(url: string): Promise { try { // Handle data URLs if (url.startsWith('data:')) { const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/); if (!matches || matches.length !== 3) { - throw new Error('Invalid data URL'); + throw new Error('Invalid data URL format'); } return Buffer.from(matches[2], 'base64'); } - // Handle file URLs + // Handle file URLs with file:// protocol if (url.startsWith('file://')) { const filePath = url.replace('file://', ''); - const fs = await import('fs/promises'); - return await fs.readFile(filePath); + try { + return await fs.readFile(filePath); + } catch (error) { + console.error(`Error reading file at ${filePath}:`, error); + throw new Error(`Failed to read file: ${filePath}`); + } + } + + // Handle absolute and relative file paths + if (url.startsWith('/') || url.startsWith('./') || url.startsWith('../') || /^[A-Za-z]:\\/.test(url)) { + try { + return await fs.readFile(url); + } catch (error) { + console.error(`Error reading file at ${url}:`, error); + throw new Error(`Failed to read file: ${url}`); + } } // Handle http/https URLs - const response = await fetch(url); - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); + if (url.startsWith('http://') || url.startsWith('https://')) { + for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) { + try { + // Use AbortController for timeout instead of timeout option + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 15000); + + const response = await fetch(url, { + signal: controller.signal, + headers: { + 'User-Agent': 'OpenRouter-MCP-Server/1.0' + } + }); + + // Clear the timeout to prevent memory leaks + clearTimeout(timeoutId); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + return Buffer.from(await response.arrayBuffer()); + } catch (error) { + console.error(`Error fetching URL (attempt ${attempt + 1}/${MAX_RETRY_ATTEMPTS}): ${url}`, error); + + if (attempt < MAX_RETRY_ATTEMPTS - 1) { + // Exponential backoff with jitter + const delay = RETRY_DELAY * Math.pow(2, attempt) * (0.5 + Math.random() * 0.5); + await sleep(delay); + } else { + throw error; + } + } + } } - return Buffer.from(await response.arrayBuffer()); + + // If we get here, the URL format is unsupported + throw new Error(`Unsupported URL format: ${url}`); } catch (error) { console.error(`Error fetching image from ${url}:`, error); throw error; } + + // TypeScript requires a return statement here, but this is unreachable + return Buffer.from([]); } -async function processImage(buffer: Buffer): Promise { +/** + * Process and optimize image for API consumption + */ +async function processImage(buffer: Buffer, mimeType: string): Promise { try { - // Get image metadata - const metadata = await sharp(buffer).metadata(); + // Create a temporary directory for processing if needed + const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`); + await fs.mkdir(tempDir, { recursive: true }); - // Calculate dimensions to keep base64 size reasonable - const MAX_DIMENSION = 800; - const JPEG_QUALITY = 80; + // Get image info + let sharpInstance = sharp(buffer); + const metadata = await sharpInstance.metadata(); + // Skip processing for small images + if (metadata.width && metadata.height && + metadata.width <= MAX_DIMENSION && + metadata.height <= MAX_DIMENSION && + (mimeType === 'image/jpeg' || mimeType === 'image/webp')) { + return buffer.toString('base64'); + } + + // Resize larger images if (metadata.width && metadata.height) { const largerDimension = Math.max(metadata.width, metadata.height); if (largerDimension > MAX_DIMENSION) { @@ -59,44 +173,65 @@ async function processImage(buffer: Buffer): Promise { ? { width: MAX_DIMENSION } : { height: MAX_DIMENSION }; - const resizedBuffer = await sharp(buffer) - .resize(resizeOptions) - .jpeg({ quality: JPEG_QUALITY }) - .toBuffer(); - - return resizedBuffer.toString('base64'); + sharpInstance = sharpInstance.resize(resizeOptions); } } - // If no resizing needed, just convert to JPEG - const jpegBuffer = await sharp(buffer) + // Convert to JPEG for consistency and small size + const processedBuffer = await sharpInstance .jpeg({ quality: JPEG_QUALITY }) .toBuffer(); - return jpegBuffer.toString('base64'); + return processedBuffer.toString('base64'); } catch (error) { console.error('Error processing image:', error); - throw error; + + // If sharp processing fails, return the original buffer + // This is a fallback to ensure we don't completely fail on processing errors + console.error('Returning original image without processing'); + return buffer.toString('base64'); } } -// Find a suitable free model with vision capabilities -async function findSuitableFreeModel(openai: OpenAI): Promise { +/** + * Find a suitable free model with vision capabilities, defaulting to Qwen + */ +export async function findSuitableFreeModel(openai: OpenAI): Promise { try { - // Query available models with 'free' in their name - const modelsResponse = await openai.models.list(); - if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) { - return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model + // First try with an exact match for our preferred model + const preferredModel = DEFAULT_FREE_MODEL; + + try { + // Check if our preferred model is available + const modelInfo = await openai.models.retrieve(preferredModel); + if (modelInfo && modelInfo.id) { + console.error(`Using preferred model: ${preferredModel}`); + return preferredModel; + } + } catch (error) { + console.error(`Preferred model ${preferredModel} not available, searching for alternatives...`); } - // Filter models with 'free' in ID and multimodal capabilities - const freeModels = modelsResponse.data - .filter(model => model.id.includes('free')) + // Query available models + const modelsResponse = await openai.models.list(); + if (!modelsResponse?.data || modelsResponse.data.length === 0) { + console.error('No models found, using default fallback model'); + return DEFAULT_FREE_MODEL; + } + + // First, try to find free vision models + const freeVisionModels = modelsResponse.data + .filter(model => { + const modelId = model.id.toLowerCase(); + return modelId.includes('free') && + (modelId.includes('vl') || modelId.includes('vision') || modelId.includes('claude') || + modelId.includes('gemini') || modelId.includes('gpt-4') || modelId.includes('qwen')); + }) .map(model => { - // Try to extract context length from the model object + // Extract context length if available let contextLength = 0; try { - const modelAny = model as any; // Cast to any to access non-standard properties + const modelAny = model as any; if (typeof modelAny.context_length === 'number') { contextLength = modelAny.context_length; } else if (modelAny.context_window) { @@ -112,21 +247,26 @@ async function findSuitableFreeModel(openai: OpenAI): Promise { }; }); - if (freeModels.length === 0) { - return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found + if (freeVisionModels.length > 0) { + // Sort by context length and pick the one with the largest context window + freeVisionModels.sort((a, b) => b.contextLength - a.contextLength); + const selectedModel = freeVisionModels[0].id; + console.error(`Selected free vision model: ${selectedModel} with context length: ${freeVisionModels[0].contextLength}`); + return selectedModel; } - // Sort by context length and pick the one with the largest context window - freeModels.sort((a, b) => b.contextLength - a.contextLength); - console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`); - - return freeModels[0].id; + // If no free vision models found, fallback to our default + console.error('No free vision models found, using default fallback model'); + return DEFAULT_FREE_MODEL; } catch (error) { - console.error('Error finding suitable free model:', error); - return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model + console.error('Error finding suitable model:', error); + return DEFAULT_FREE_MODEL; } } +/** + * Process and analyze multiple images using OpenRouter + */ export async function handleMultiImageAnalysis( request: { params: { arguments: MultiImageAnalysisToolRequest } }, openai: OpenAI, @@ -150,43 +290,65 @@ export async function handleMultiImageAnalysis( text: args.prompt }]; + // Track successful and failed images for reporting + const successfulImages = []; + const failedImages = []; + // Process each image - for (const image of args.images) { + for (const [index, image] of args.images.entries()) { try { + console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`); + + // Get MIME type + const mimeType = getMimeType(image.url); + // Fetch and process the image const imageBuffer = await fetchImageAsBuffer(image.url); - const base64Image = await processImage(imageBuffer); + const base64Image = await processImage(imageBuffer, mimeType); + + // Use JPEG as the output format for consistency + const outputMimeType = 'image/jpeg'; // Add to content content.push({ type: 'image_url', image_url: { - url: `data:image/jpeg;base64,${base64Image}` + url: `data:${outputMimeType};base64,${base64Image}` } }); + + successfulImages.push(image.url); } catch (error) { - console.error(`Error processing image ${image.url}:`, error); + console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error); + failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)}); // Continue with other images if one fails } } // If no images were successfully processed if (content.length === 1) { - throw new Error('Failed to process any of the provided images'); + const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; '); + throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`); } // Select model with priority: // 1. User-specified model // 2. Default model from environment - // 3. Free model with vision capabilities (selected automatically) - let model = args.model || defaultModel; + // 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free) + let model = args.model || defaultModel || DEFAULT_FREE_MODEL; - if (!model) { - model = await findSuitableFreeModel(openai); - console.error(`Using auto-selected model: ${model}`); + // If a model is specified but not our default free model, verify it exists + if (model !== DEFAULT_FREE_MODEL) { + try { + await openai.models.retrieve(model); + } catch (error) { + console.error(`Specified model ${model} not found, falling back to auto-selection`); + model = await findSuitableFreeModel(openai); + } } console.error(`Making API call with model: ${model}`); + console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`); // Make the API call const completion = await openai.chat.completions.create({ @@ -197,13 +359,32 @@ export async function handleMultiImageAnalysis( }] as any }); + // Format the response + let responseText = completion.choices[0].message.content || ''; + + // Add information about failed images if any + if (failedImages.length > 0) { + const formattedErrors = args.markdown_response !== false + ? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}` + : `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`; + + responseText += formattedErrors; + } + + // Return the analysis result return { content: [ { type: 'text', - text: completion.choices[0].message.content || '', + text: responseText, }, ], + metadata: { + model: completion.model, + usage: completion.usage, + successful_images: successfulImages.length, + failed_images: failedImages.length + } }; } catch (error) { console.error('Error in multi-image analysis:', error); @@ -220,6 +401,10 @@ export async function handleMultiImageAnalysis( }, ], isError: true, + metadata: { + error_type: error instanceof Error ? error.constructor.name : 'Unknown', + error_message: error instanceof Error ? error.message : String(error) + } }; } }