Enhance MCP server (v1.2.0): Consolidate image analysis, add auto model selection
This commit is contained in:
23
README.md
23
README.md
@@ -192,22 +192,6 @@ use_mcp_tool({
|
|||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
### analyze_image
|
|
||||||
|
|
||||||
Analyze a single image with an optional question:
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
use_mcp_tool({
|
|
||||||
server_name: "openrouter",
|
|
||||||
tool_name: "analyze_image",
|
|
||||||
arguments: {
|
|
||||||
image_path: "/absolute/path/to/image.jpg",
|
|
||||||
question: "What objects are in this image?", // Optional
|
|
||||||
model: "anthropic/claude-3.5-sonnet" // Optional if default is set
|
|
||||||
}
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
### multi_image_analysis
|
### multi_image_analysis
|
||||||
|
|
||||||
Analyze multiple images with a single prompt:
|
Analyze multiple images with a single prompt:
|
||||||
@@ -329,3 +313,10 @@ npm run build
|
|||||||
## License
|
## License
|
||||||
|
|
||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
|
## Version 1.2.0 Updates
|
||||||
|
- Simplified image analysis by consolidating all functionality into the `multi_image_analysis` tool
|
||||||
|
- Added automatic selection of free models with the largest context window when no model is specified
|
||||||
|
- Improved handling of various image formats (file://, http://, data:)
|
||||||
|
- Enhanced error handling and logging for better troubleshooting
|
||||||
|
- Removed the `analyze_image` tool to eliminate confusion and streamline the interface
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@stabgan/openrouter-mcp-multimodal",
|
"name": "@stabgan/openrouter-mcp-multimodal",
|
||||||
"version": "1.1.0",
|
"version": "1.2.0",
|
||||||
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
|
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import { handleChatCompletion, ChatCompletionToolRequest } from './tool-handlers
|
|||||||
import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js';
|
import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js';
|
||||||
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
||||||
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
||||||
import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
|
|
||||||
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
||||||
|
|
||||||
export class ToolHandlers {
|
export class ToolHandlers {
|
||||||
@@ -128,41 +127,10 @@ export class ToolHandlers {
|
|||||||
maxContextTokens: 200000
|
maxContextTokens: 200000
|
||||||
},
|
},
|
||||||
|
|
||||||
// Image Analysis Tool
|
|
||||||
{
|
|
||||||
name: 'analyze_image',
|
|
||||||
description: 'Analyze an image using OpenRouter vision models',
|
|
||||||
inputSchema: {
|
|
||||||
type: 'object',
|
|
||||||
properties: {
|
|
||||||
image_path: {
|
|
||||||
type: 'string',
|
|
||||||
description: 'Path to the image file to analyze (must be an absolute path)',
|
|
||||||
},
|
|
||||||
image_url: {
|
|
||||||
type: 'string',
|
|
||||||
description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)',
|
|
||||||
},
|
|
||||||
question: {
|
|
||||||
type: 'string',
|
|
||||||
description: 'Question to ask about the image',
|
|
||||||
},
|
|
||||||
model: {
|
|
||||||
type: 'string',
|
|
||||||
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
oneOf: [
|
|
||||||
{ required: ['image_path'] },
|
|
||||||
{ required: ['image_url'] }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
// Multi-Image Analysis Tool
|
// Multi-Image Analysis Tool
|
||||||
{
|
{
|
||||||
name: 'multi_image_analysis',
|
name: 'multi_image_analysis',
|
||||||
description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
|
description: 'Analyze one or more images with a prompt and receive detailed responses',
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -174,7 +142,7 @@ export class ToolHandlers {
|
|||||||
properties: {
|
properties: {
|
||||||
url: {
|
url: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'URL or data URL of the image (can be a file:// URL to read from local filesystem)',
|
description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
|
||||||
},
|
},
|
||||||
alt: {
|
alt: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
@@ -195,7 +163,7 @@ export class ToolHandlers {
|
|||||||
},
|
},
|
||||||
model: {
|
model: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'OpenRouter model to use (defaults to claude-3.5-sonnet if not specified)',
|
description: 'OpenRouter model to use. If not specified, the system will use a free model with vision capabilities or the default model.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
required: ['images', 'prompt'],
|
required: ['images', 'prompt'],
|
||||||
@@ -308,13 +276,6 @@ export class ToolHandlers {
|
|||||||
}
|
}
|
||||||
}, this.openai, this.defaultModel);
|
}, this.openai, this.defaultModel);
|
||||||
|
|
||||||
case 'analyze_image':
|
|
||||||
return handleAnalyzeImage({
|
|
||||||
params: {
|
|
||||||
arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
|
|
||||||
}
|
|
||||||
}, this.openai, this.defaultModel);
|
|
||||||
|
|
||||||
case 'multi_image_analysis':
|
case 'multi_image_analysis':
|
||||||
return handleMultiImageAnalysis({
|
return handleMultiImageAnalysis({
|
||||||
params: {
|
params: {
|
||||||
|
|||||||
@@ -66,6 +66,53 @@ function truncateMessagesToFit(
|
|||||||
return truncated;
|
return truncated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find a suitable free model with the largest context window
|
||||||
|
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||||
|
try {
|
||||||
|
// Query available models with 'free' in their name
|
||||||
|
const modelsResponse = await openai.models.list();
|
||||||
|
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
||||||
|
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter models with 'free' in ID
|
||||||
|
const freeModels = modelsResponse.data
|
||||||
|
.filter(model => model.id.includes('free'))
|
||||||
|
.map(model => {
|
||||||
|
// Try to extract context length from the model object
|
||||||
|
let contextLength = 0;
|
||||||
|
try {
|
||||||
|
const modelAny = model as any; // Cast to any to access non-standard properties
|
||||||
|
if (typeof modelAny.context_length === 'number') {
|
||||||
|
contextLength = modelAny.context_length;
|
||||||
|
} else if (modelAny.context_window) {
|
||||||
|
contextLength = parseInt(modelAny.context_window, 10);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Error parsing context length for model ${model.id}:`, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: model.id,
|
||||||
|
contextLength: contextLength || 0
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
if (freeModels.length === 0) {
|
||||||
|
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback if no free models found
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by context length and pick the one with the largest context window
|
||||||
|
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||||
|
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
||||||
|
|
||||||
|
return freeModels[0].id;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error finding suitable free model:', error);
|
||||||
|
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function handleChatCompletion(
|
export async function handleChatCompletion(
|
||||||
request: { params: { arguments: ChatCompletionToolRequest } },
|
request: { params: { arguments: ChatCompletionToolRequest } },
|
||||||
openai: OpenAI,
|
openai: OpenAI,
|
||||||
@@ -73,20 +120,6 @@ export async function handleChatCompletion(
|
|||||||
) {
|
) {
|
||||||
const args = request.params.arguments;
|
const args = request.params.arguments;
|
||||||
|
|
||||||
// Validate model selection
|
|
||||||
const model = args.model || defaultModel;
|
|
||||||
if (!model) {
|
|
||||||
return {
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: 'No model specified and no default model configured in MCP settings. Please specify a model or set OPENROUTER_DEFAULT_MODEL in the MCP configuration.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
isError: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate message array
|
// Validate message array
|
||||||
if (args.messages.length === 0) {
|
if (args.messages.length === 0) {
|
||||||
return {
|
return {
|
||||||
@@ -101,9 +134,22 @@ export async function handleChatCompletion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Select model with priority:
|
||||||
|
// 1. User-specified model
|
||||||
|
// 2. Default model from environment
|
||||||
|
// 3. Free model with the largest context window (selected automatically)
|
||||||
|
let model = args.model || defaultModel;
|
||||||
|
|
||||||
|
if (!model) {
|
||||||
|
model = await findSuitableFreeModel(openai);
|
||||||
|
console.error(`Using auto-selected model: ${model}`);
|
||||||
|
}
|
||||||
|
|
||||||
// Truncate messages to fit within context window
|
// Truncate messages to fit within context window
|
||||||
const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS);
|
const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS);
|
||||||
|
|
||||||
|
console.error(`Making API call with model: ${model}`);
|
||||||
|
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
messages: truncatedMessages,
|
messages: truncatedMessages,
|
||||||
|
|||||||
@@ -80,6 +80,53 @@ async function processImage(buffer: Buffer): Promise<string> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find a suitable free model with vision capabilities
|
||||||
|
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||||
|
try {
|
||||||
|
// Query available models with 'free' in their name
|
||||||
|
const modelsResponse = await openai.models.list();
|
||||||
|
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
||||||
|
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter models with 'free' in ID and multimodal capabilities
|
||||||
|
const freeModels = modelsResponse.data
|
||||||
|
.filter(model => model.id.includes('free'))
|
||||||
|
.map(model => {
|
||||||
|
// Try to extract context length from the model object
|
||||||
|
let contextLength = 0;
|
||||||
|
try {
|
||||||
|
const modelAny = model as any; // Cast to any to access non-standard properties
|
||||||
|
if (typeof modelAny.context_length === 'number') {
|
||||||
|
contextLength = modelAny.context_length;
|
||||||
|
} else if (modelAny.context_window) {
|
||||||
|
contextLength = parseInt(modelAny.context_window, 10);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Error parsing context length for model ${model.id}:`, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: model.id,
|
||||||
|
contextLength: contextLength || 0
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
if (freeModels.length === 0) {
|
||||||
|
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by context length and pick the one with the largest context window
|
||||||
|
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||||
|
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
||||||
|
|
||||||
|
return freeModels[0].id;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error finding suitable free model:', error);
|
||||||
|
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function handleMultiImageAnalysis(
|
export async function handleMultiImageAnalysis(
|
||||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||||
openai: OpenAI,
|
openai: OpenAI,
|
||||||
@@ -128,8 +175,18 @@ export async function handleMultiImageAnalysis(
|
|||||||
throw new Error('Failed to process any of the provided images');
|
throw new Error('Failed to process any of the provided images');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select model
|
// Select model with priority:
|
||||||
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
|
// 1. User-specified model
|
||||||
|
// 2. Default model from environment
|
||||||
|
// 3. Free model with vision capabilities (selected automatically)
|
||||||
|
let model = args.model || defaultModel;
|
||||||
|
|
||||||
|
if (!model) {
|
||||||
|
model = await findSuitableFreeModel(openai);
|
||||||
|
console.error(`Using auto-selected model: ${model}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(`Making API call with model: ${model}`);
|
||||||
|
|
||||||
// Make the API call
|
// Make the API call
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
|
|||||||
Reference in New Issue
Block a user