diff --git a/README.md b/README.md index 3ffa4a5..6000d49 100644 --- a/README.md +++ b/README.md @@ -192,22 +192,6 @@ use_mcp_tool({ }); ``` -### analyze_image - -Analyze a single image with an optional question: - -```javascript -use_mcp_tool({ - server_name: "openrouter", - tool_name: "analyze_image", - arguments: { - image_path: "/absolute/path/to/image.jpg", - question: "What objects are in this image?", // Optional - model: "anthropic/claude-3.5-sonnet" // Optional if default is set - } -}); -``` - ### multi_image_analysis Analyze multiple images with a single prompt: @@ -329,3 +313,10 @@ npm run build ## License MIT License + +## Version 1.2.0 Updates +- Simplified image analysis by consolidating all functionality into the `multi_image_analysis` tool +- Added automatic selection of free models with the largest context window when no model is specified +- Improved handling of various image formats (file://, http://, data:) +- Enhanced error handling and logging for better troubleshooting +- Removed the `analyze_image` tool to eliminate confusion and streamline the interface diff --git a/package.json b/package.json index 101c3e9..b639a74 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@stabgan/openrouter-mcp-multimodal", - "version": "1.1.0", + "version": "1.2.0", "description": "MCP server for OpenRouter providing text chat and image analysis tools", "type": "module", "main": "dist/index.js", diff --git a/src/tool-handlers.ts b/src/tool-handlers.ts index 3c9ced6..943332c 100644 --- a/src/tool-handlers.ts +++ b/src/tool-handlers.ts @@ -15,7 +15,6 @@ import { handleChatCompletion, ChatCompletionToolRequest } from './tool-handlers import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js'; import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js'; import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js'; -import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js'; import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js'; export class ToolHandlers { @@ -128,41 +127,10 @@ export class ToolHandlers { maxContextTokens: 200000 }, - // Image Analysis Tool - { - name: 'analyze_image', - description: 'Analyze an image using OpenRouter vision models', - inputSchema: { - type: 'object', - properties: { - image_path: { - type: 'string', - description: 'Path to the image file to analyze (must be an absolute path)', - }, - image_url: { - type: 'string', - description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)', - }, - question: { - type: 'string', - description: 'Question to ask about the image', - }, - model: { - type: 'string', - description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")', - }, - }, - oneOf: [ - { required: ['image_path'] }, - { required: ['image_url'] } - ] - }, - }, - // Multi-Image Analysis Tool { name: 'multi_image_analysis', - description: 'Analyze multiple images at once with a single prompt and receive detailed responses', + description: 'Analyze one or more images with a prompt and receive detailed responses', inputSchema: { type: 'object', properties: { @@ -174,7 +142,7 @@ export class ToolHandlers { properties: { url: { type: 'string', - description: 'URL or data URL of the image (can be a file:// URL to read from local filesystem)', + description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)', }, alt: { type: 'string', @@ -195,7 +163,7 @@ export class ToolHandlers { }, model: { type: 'string', - description: 'OpenRouter model to use (defaults to claude-3.5-sonnet if not specified)', + description: 'OpenRouter model to use. If not specified, the system will use a free model with vision capabilities or the default model.', }, }, required: ['images', 'prompt'], @@ -308,13 +276,6 @@ export class ToolHandlers { } }, this.openai, this.defaultModel); - case 'analyze_image': - return handleAnalyzeImage({ - params: { - arguments: request.params.arguments as unknown as AnalyzeImageToolRequest - } - }, this.openai, this.defaultModel); - case 'multi_image_analysis': return handleMultiImageAnalysis({ params: { diff --git a/src/tool-handlers/chat-completion.ts b/src/tool-handlers/chat-completion.ts index f961976..56875c7 100644 --- a/src/tool-handlers/chat-completion.ts +++ b/src/tool-handlers/chat-completion.ts @@ -66,6 +66,53 @@ function truncateMessagesToFit( return truncated; } +// Find a suitable free model with the largest context window +async function findSuitableFreeModel(openai: OpenAI): Promise { + try { + // Query available models with 'free' in their name + const modelsResponse = await openai.models.list(); + if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) { + return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model + } + + // Filter models with 'free' in ID + const freeModels = modelsResponse.data + .filter(model => model.id.includes('free')) + .map(model => { + // Try to extract context length from the model object + let contextLength = 0; + try { + const modelAny = model as any; // Cast to any to access non-standard properties + if (typeof modelAny.context_length === 'number') { + contextLength = modelAny.context_length; + } else if (modelAny.context_window) { + contextLength = parseInt(modelAny.context_window, 10); + } + } catch (e) { + console.error(`Error parsing context length for model ${model.id}:`, e); + } + + return { + id: model.id, + contextLength: contextLength || 0 + }; + }); + + if (freeModels.length === 0) { + return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback if no free models found + } + + // Sort by context length and pick the one with the largest context window + freeModels.sort((a, b) => b.contextLength - a.contextLength); + console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`); + + return freeModels[0].id; + } catch (error) { + console.error('Error finding suitable free model:', error); + return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model + } +} + export async function handleChatCompletion( request: { params: { arguments: ChatCompletionToolRequest } }, openai: OpenAI, @@ -73,20 +120,6 @@ export async function handleChatCompletion( ) { const args = request.params.arguments; - // Validate model selection - const model = args.model || defaultModel; - if (!model) { - return { - content: [ - { - type: 'text', - text: 'No model specified and no default model configured in MCP settings. Please specify a model or set OPENROUTER_DEFAULT_MODEL in the MCP configuration.', - }, - ], - isError: true, - }; - } - // Validate message array if (args.messages.length === 0) { return { @@ -101,8 +134,21 @@ export async function handleChatCompletion( } try { + // Select model with priority: + // 1. User-specified model + // 2. Default model from environment + // 3. Free model with the largest context window (selected automatically) + let model = args.model || defaultModel; + + if (!model) { + model = await findSuitableFreeModel(openai); + console.error(`Using auto-selected model: ${model}`); + } + // Truncate messages to fit within context window const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS); + + console.error(`Making API call with model: ${model}`); const completion = await openai.chat.completions.create({ model, diff --git a/src/tool-handlers/multi-image-analysis.ts b/src/tool-handlers/multi-image-analysis.ts index e9dfa63..2b7b693 100644 --- a/src/tool-handlers/multi-image-analysis.ts +++ b/src/tool-handlers/multi-image-analysis.ts @@ -80,6 +80,53 @@ async function processImage(buffer: Buffer): Promise { } } +// Find a suitable free model with vision capabilities +async function findSuitableFreeModel(openai: OpenAI): Promise { + try { + // Query available models with 'free' in their name + const modelsResponse = await openai.models.list(); + if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) { + return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model + } + + // Filter models with 'free' in ID and multimodal capabilities + const freeModels = modelsResponse.data + .filter(model => model.id.includes('free')) + .map(model => { + // Try to extract context length from the model object + let contextLength = 0; + try { + const modelAny = model as any; // Cast to any to access non-standard properties + if (typeof modelAny.context_length === 'number') { + contextLength = modelAny.context_length; + } else if (modelAny.context_window) { + contextLength = parseInt(modelAny.context_window, 10); + } + } catch (e) { + console.error(`Error parsing context length for model ${model.id}:`, e); + } + + return { + id: model.id, + contextLength: contextLength || 0 + }; + }); + + if (freeModels.length === 0) { + return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found + } + + // Sort by context length and pick the one with the largest context window + freeModels.sort((a, b) => b.contextLength - a.contextLength); + console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`); + + return freeModels[0].id; + } catch (error) { + console.error('Error finding suitable free model:', error); + return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model + } +} + export async function handleMultiImageAnalysis( request: { params: { arguments: MultiImageAnalysisToolRequest } }, openai: OpenAI, @@ -128,8 +175,18 @@ export async function handleMultiImageAnalysis( throw new Error('Failed to process any of the provided images'); } - // Select model - const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet'; + // Select model with priority: + // 1. User-specified model + // 2. Default model from environment + // 3. Free model with vision capabilities (selected automatically) + let model = args.model || defaultModel; + + if (!model) { + model = await findSuitableFreeModel(openai); + console.error(`Using auto-selected model: ${model}`); + } + + console.error(`Making API call with model: ${model}`); // Make the API call const completion = await openai.chat.completions.create({