Enhance MCP server (v1.2.0): Consolidate image analysis, add auto model selection

2025-03-27 13:42:13 +05:30
parent 74d2997547
commit 3f9840d884
5 changed files with 130 additions and 75 deletions
--- a/README.md
+++ b/README.md
@@ -192,22 +192,6 @@ use_mcp_tool({
 });
 ```

-### analyze_image
-
-Analyze a single image with an optional question:
-
-```javascript
-use_mcp_tool({
-  server_name: "openrouter",
-  tool_name: "analyze_image",
-  arguments: {
-    image_path: "/absolute/path/to/image.jpg",
-    question: "What objects are in this image?", // Optional
-    model: "anthropic/claude-3.5-sonnet" // Optional if default is set
-  }
-});
-```
-
 ### multi_image_analysis

 Analyze multiple images with a single prompt:
@@ -329,3 +313,10 @@ npm run build
 ## License

 MIT License
+
+## Version 1.2.0 Updates
+- Simplified image analysis by consolidating all functionality into the `multi_image_analysis` tool
+- Added automatic selection of free models with the largest context window when no model is specified
+- Improved handling of various image formats (file://, http://, data:)
+- Enhanced error handling and logging for better troubleshooting
+- Removed the `analyze_image` tool to eliminate confusion and streamline the interface
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@stabgan/openrouter-mcp-multimodal",
-  "version": "1.1.0",
+  "version": "1.2.0",
  "description": "MCP server for OpenRouter providing text chat and image analysis tools",
  "type": "module",
  "main": "dist/index.js",
--- a/src/tool-handlers.ts
+++ b/src/tool-handlers.ts
@@ -15,7 +15,6 @@ import { handleChatCompletion, ChatCompletionToolRequest } from './tool-handlers
 import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js';
 import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
 import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
-import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
 import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';

 export class ToolHandlers {
@@ -128,41 +127,10 @@ export class ToolHandlers {
          maxContextTokens: 200000
        },
        
-        // Image Analysis Tool
-        {
-          name: 'analyze_image',
-          description: 'Analyze an image using OpenRouter vision models',
-          inputSchema: {
-            type: 'object',
-            properties: {
-              image_path: {
-                type: 'string',
-                description: 'Path to the image file to analyze (must be an absolute path)',
-              },
-              image_url: {
-                type: 'string',
-                description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)',
-              },
-              question: {
-                type: 'string',
-                description: 'Question to ask about the image',
-              },
-              model: {
-                type: 'string',
-                description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
-              },
-            },
-            oneOf: [
-              { required: ['image_path'] },
-              { required: ['image_url'] }
-            ]
-          },
-        },
-        
        // Multi-Image Analysis Tool
        {
          name: 'multi_image_analysis',
-          description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
+          description: 'Analyze one or more images with a prompt and receive detailed responses',
          inputSchema: {
            type: 'object',
            properties: {
@@ -174,7 +142,7 @@ export class ToolHandlers {
                  properties: {
                    url: {
                      type: 'string',
-                      description: 'URL or data URL of the image (can be a file:// URL to read from local filesystem)',
+                      description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
                    },
                    alt: {
                      type: 'string',
@@ -195,7 +163,7 @@ export class ToolHandlers {
              },
              model: {
                type: 'string',
-                description: 'OpenRouter model to use (defaults to claude-3.5-sonnet if not specified)',
+                description: 'OpenRouter model to use. If not specified, the system will use a free model with vision capabilities or the default model.',
              },
            },
            required: ['images', 'prompt'],
@@ -308,13 +276,6 @@ export class ToolHandlers {
            }
          }, this.openai, this.defaultModel);
        
-        case 'analyze_image':
-          return handleAnalyzeImage({
-            params: {
-              arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
-            }
-          }, this.openai, this.defaultModel);
-        
        case 'multi_image_analysis':
          return handleMultiImageAnalysis({
            params: {
--- a/src/tool-handlers/chat-completion.ts
+++ b/src/tool-handlers/chat-completion.ts
@@ -66,6 +66,53 @@ function truncateMessagesToFit(
  return truncated;
 }

+// Find a suitable free model with the largest context window
+async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
+  try {
+    // Query available models with 'free' in their name
+    const modelsResponse = await openai.models.list();
+    if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
+      return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
+    }
+    
+    // Filter models with 'free' in ID
+    const freeModels = modelsResponse.data
+      .filter(model => model.id.includes('free'))
+      .map(model => {
+        // Try to extract context length from the model object
+        let contextLength = 0;
+        try {
+          const modelAny = model as any; // Cast to any to access non-standard properties
+          if (typeof modelAny.context_length === 'number') {
+            contextLength = modelAny.context_length;
+          } else if (modelAny.context_window) {
+            contextLength = parseInt(modelAny.context_window, 10);
+          }
+        } catch (e) {
+          console.error(`Error parsing context length for model ${model.id}:`, e);
+        }
+        
+        return {
+          id: model.id,
+          contextLength: contextLength || 0
+        };
+      });
+    
+    if (freeModels.length === 0) {
+      return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback if no free models found
+    }
+    
+    // Sort by context length and pick the one with the largest context window
+    freeModels.sort((a, b) => b.contextLength - a.contextLength);
+    console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
+    
+    return freeModels[0].id;
+  } catch (error) {
+    console.error('Error finding suitable free model:', error);
+    return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
+  }
+}
+
 export async function handleChatCompletion(
  request: { params: { arguments: ChatCompletionToolRequest } },
  openai: OpenAI,
@@ -73,20 +120,6 @@ export async function handleChatCompletion(
 ) {
  const args = request.params.arguments;
  
-  // Validate model selection
-  const model = args.model || defaultModel;
-  if (!model) {
-    return {
-      content: [
-        {
-          type: 'text',
-          text: 'No model specified and no default model configured in MCP settings. Please specify a model or set OPENROUTER_DEFAULT_MODEL in the MCP configuration.',
-        },
-      ],
-      isError: true,
-    };
-  }
-
  // Validate message array
  if (args.messages.length === 0) {
    return {
@@ -101,8 +134,21 @@ export async function handleChatCompletion(
  }

  try {
+    // Select model with priority:
+    // 1. User-specified model
+    // 2. Default model from environment
+    // 3. Free model with the largest context window (selected automatically)
+    let model = args.model || defaultModel;
+    
+    if (!model) {
+      model = await findSuitableFreeModel(openai);
+      console.error(`Using auto-selected model: ${model}`);
+    }
+    
    // Truncate messages to fit within context window
    const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS);
+    
+    console.error(`Making API call with model: ${model}`);

    const completion = await openai.chat.completions.create({
      model,
--- a/src/tool-handlers/multi-image-analysis.ts
+++ b/src/tool-handlers/multi-image-analysis.ts
@@ -80,6 +80,53 @@ async function processImage(buffer: Buffer): Promise<string> {
  }
 }

+// Find a suitable free model with vision capabilities
+async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
+  try {
+    // Query available models with 'free' in their name
+    const modelsResponse = await openai.models.list();
+    if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
+      return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
+    }
+    
+    // Filter models with 'free' in ID and multimodal capabilities
+    const freeModels = modelsResponse.data
+      .filter(model => model.id.includes('free'))
+      .map(model => {
+        // Try to extract context length from the model object
+        let contextLength = 0;
+        try {
+          const modelAny = model as any; // Cast to any to access non-standard properties
+          if (typeof modelAny.context_length === 'number') {
+            contextLength = modelAny.context_length;
+          } else if (modelAny.context_window) {
+            contextLength = parseInt(modelAny.context_window, 10);
+          }
+        } catch (e) {
+          console.error(`Error parsing context length for model ${model.id}:`, e);
+        }
+        
+        return {
+          id: model.id,
+          contextLength: contextLength || 0
+        };
+      });
+    
+    if (freeModels.length === 0) {
+      return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
+    }
+    
+    // Sort by context length and pick the one with the largest context window
+    freeModels.sort((a, b) => b.contextLength - a.contextLength);
+    console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
+    
+    return freeModels[0].id;
+  } catch (error) {
+    console.error('Error finding suitable free model:', error);
+    return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
+  }
+}
+
 export async function handleMultiImageAnalysis(
  request: { params: { arguments: MultiImageAnalysisToolRequest } },
  openai: OpenAI,
@@ -128,8 +175,18 @@ export async function handleMultiImageAnalysis(
      throw new Error('Failed to process any of the provided images');
    }
    
-    // Select model
-    const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
+    // Select model with priority:
+    // 1. User-specified model
+    // 2. Default model from environment
+    // 3. Free model with vision capabilities (selected automatically)
+    let model = args.model || defaultModel;
+    
+    if (!model) {
+      model = await findSuitableFreeModel(openai);
+      console.error(`Using auto-selected model: ${model}`);
+    }
+    
+    console.error(`Making API call with model: ${model}`);
    
    // Make the API call
    const completion = await openai.chat.completions.create({