fix: Improved base64 image handling and Windows compatibility
This commit is contained in:
17
.gitignore
vendored
17
.gitignore
vendored
@@ -48,3 +48,20 @@ ehthumbs_vista.db
|
|||||||
# Testing
|
# Testing
|
||||||
coverage/
|
coverage/
|
||||||
.nyc_output/
|
.nyc_output/
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
|
||||||
|
# Runtime data
|
||||||
|
pids
|
||||||
|
*.pid
|
||||||
|
*.seed
|
||||||
|
*.pid.lock
|
||||||
|
|
||||||
|
# OS files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|||||||
131
convert_to_base64.html
Normal file
131
convert_to_base64.html
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Image to Base64 Converter</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
max-width: 800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
.container {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 20px;
|
||||||
|
}
|
||||||
|
.preview {
|
||||||
|
margin-top: 20px;
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
.preview img {
|
||||||
|
max-width: 100%;
|
||||||
|
max-height: 300px;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
.result {
|
||||||
|
margin-top: 20px;
|
||||||
|
}
|
||||||
|
textarea {
|
||||||
|
width: 100%;
|
||||||
|
height: 100px;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
button {
|
||||||
|
padding: 10px 15px;
|
||||||
|
background-color: #4CAF50;
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 16px;
|
||||||
|
}
|
||||||
|
button:hover {
|
||||||
|
background-color: #45a049;
|
||||||
|
}
|
||||||
|
.copy-button {
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
.code-block {
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow-x: auto;
|
||||||
|
margin-top: 20px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Image to Base64 Converter for MCP Testing</h1>
|
||||||
|
<p>Use this tool to convert a local image to a base64 string that can be used with the MCP server's multi_image_analysis tool.</p>
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
<div>
|
||||||
|
<label for="imageInput">Select an image:</label><br>
|
||||||
|
<input type="file" id="imageInput" accept="image/*">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="preview" id="preview">
|
||||||
|
<h3>Image Preview:</h3>
|
||||||
|
<div id="imagePreview"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="result" id="result">
|
||||||
|
<h3>Base64 String:</h3>
|
||||||
|
<textarea id="base64Output" readonly></textarea>
|
||||||
|
<button class="copy-button" id="copyButton">Copy to Clipboard</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<h3>How to use with MCP:</h3>
|
||||||
|
<div class="code-block">
|
||||||
|
<pre>
|
||||||
|
{
|
||||||
|
"images": [
|
||||||
|
{
|
||||||
|
"url": "PASTE_BASE64_STRING_HERE"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt": "Please describe this image in detail. What does it show?",
|
||||||
|
"model": "qwen/qwen2.5-vl-32b-instruct:free"
|
||||||
|
}
|
||||||
|
</pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.getElementById('imageInput').addEventListener('change', function(event) {
|
||||||
|
const file = event.target.files[0];
|
||||||
|
if (!file) return;
|
||||||
|
|
||||||
|
// Display image preview
|
||||||
|
const preview = document.getElementById('imagePreview');
|
||||||
|
preview.innerHTML = '';
|
||||||
|
const img = document.createElement('img');
|
||||||
|
img.src = URL.createObjectURL(file);
|
||||||
|
preview.appendChild(img);
|
||||||
|
|
||||||
|
// Convert to base64
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = function(e) {
|
||||||
|
const base64String = e.target.result; // This already includes "data:image/jpeg;base64,"
|
||||||
|
document.getElementById('base64Output').value = base64String;
|
||||||
|
};
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('copyButton').addEventListener('click', function() {
|
||||||
|
const textarea = document.getElementById('base64Output');
|
||||||
|
textarea.select();
|
||||||
|
document.execCommand('copy');
|
||||||
|
this.textContent = 'Copied!';
|
||||||
|
setTimeout(() => {
|
||||||
|
this.textContent = 'Copy to Clipboard';
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
89
convert_to_base64.py
Normal file
89
convert_to_base64.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import base64
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def convert_image_to_base64(image_path):
|
||||||
|
"""Convert an image file to base64 encoding with data URI prefix"""
|
||||||
|
# Get file extension and determine mime type
|
||||||
|
file_ext = os.path.splitext(image_path)[1].lower()
|
||||||
|
mime_type = {
|
||||||
|
'.png': 'image/png',
|
||||||
|
'.jpg': 'image/jpeg',
|
||||||
|
'.jpeg': 'image/jpeg',
|
||||||
|
'.gif': 'image/gif',
|
||||||
|
'.webp': 'image/webp',
|
||||||
|
'.bmp': 'image/bmp'
|
||||||
|
}.get(file_ext, 'application/octet-stream')
|
||||||
|
|
||||||
|
# Read binary data and encode to base64
|
||||||
|
try:
|
||||||
|
with open(image_path, 'rb') as img_file:
|
||||||
|
img_data = img_file.read()
|
||||||
|
base64_data = base64.b64encode(img_data).decode('utf-8')
|
||||||
|
return f"data:{mime_type};base64,{base64_data}"
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def save_base64_to_file(base64_data, output_path):
|
||||||
|
"""Save base64 data to a file"""
|
||||||
|
try:
|
||||||
|
with open(output_path, 'w') as out_file:
|
||||||
|
out_file.write(base64_data)
|
||||||
|
print(f"Base64 data saved to {output_path}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error saving file: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Convert image to base64 for MCP server testing')
|
||||||
|
parser.add_argument('image_path', help='Path to the image file')
|
||||||
|
parser.add_argument('-o', '--output', help='Output file path (if not provided, output to console)')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Check if file exists
|
||||||
|
image_path = Path(args.image_path)
|
||||||
|
if not image_path.exists():
|
||||||
|
print(f"Error: File not found: {args.image_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Convert image to base64
|
||||||
|
base64_data = convert_image_to_base64(args.image_path)
|
||||||
|
if not base64_data:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Output base64 data
|
||||||
|
if args.output:
|
||||||
|
success = save_base64_to_file(base64_data, args.output)
|
||||||
|
if not success:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
print("\nBase64 Image Data:")
|
||||||
|
print(base64_data[:100] + "..." if len(base64_data) > 100 else base64_data)
|
||||||
|
print("\nTotal length:", len(base64_data))
|
||||||
|
print("\nTo use with MCP server in multi_image_analysis:")
|
||||||
|
print('''
|
||||||
|
{
|
||||||
|
"images": [
|
||||||
|
{
|
||||||
|
"url": "''' + base64_data[:20] + '... (full base64 string)" ' + '''
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt": "Please describe this image in detail. What does it show?",
|
||||||
|
"model": "qwen/qwen2.5-vl-32b-instruct:free"
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
78
encode_image.sh
Normal file
78
encode_image.sh
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Check if an image file is provided
|
||||||
|
if [ $# -lt 1 ]; then
|
||||||
|
echo "Usage: $0 <image_file> [output_file]"
|
||||||
|
echo "Example: $0 test.png base64_output.txt"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
IMAGE_FILE="$1"
|
||||||
|
OUTPUT_FILE="${2:-}" # Use the second argument as output file, if provided
|
||||||
|
|
||||||
|
# Check if the image file exists
|
||||||
|
if [ ! -f "$IMAGE_FILE" ]; then
|
||||||
|
echo "Error: Image file '$IMAGE_FILE' does not exist."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get the file extension and determine MIME type
|
||||||
|
FILE_EXT="${IMAGE_FILE##*.}"
|
||||||
|
MIME_TYPE="application/octet-stream" # Default MIME type
|
||||||
|
|
||||||
|
case "$FILE_EXT" in
|
||||||
|
png|PNG)
|
||||||
|
MIME_TYPE="image/png"
|
||||||
|
;;
|
||||||
|
jpg|jpeg|JPG|JPEG)
|
||||||
|
MIME_TYPE="image/jpeg"
|
||||||
|
;;
|
||||||
|
gif|GIF)
|
||||||
|
MIME_TYPE="image/gif"
|
||||||
|
;;
|
||||||
|
webp|WEBP)
|
||||||
|
MIME_TYPE="image/webp"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Warning: Unknown file extension. Using generic MIME type."
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Convert image to base64
|
||||||
|
echo "Converting '$IMAGE_FILE' to base64..."
|
||||||
|
|
||||||
|
# Different commands based on OS
|
||||||
|
if [ "$(uname)" == "Darwin" ]; then
|
||||||
|
# macOS
|
||||||
|
BASE64_DATA="data:$MIME_TYPE;base64,$(base64 -i "$IMAGE_FILE")"
|
||||||
|
else
|
||||||
|
# Linux and others
|
||||||
|
BASE64_DATA="data:$MIME_TYPE;base64,$(base64 -w 0 "$IMAGE_FILE")"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Output the base64 data
|
||||||
|
if [ -n "$OUTPUT_FILE" ]; then
|
||||||
|
# Save to file if output file is specified
|
||||||
|
echo "$BASE64_DATA" > "$OUTPUT_FILE"
|
||||||
|
echo "Base64 data saved to '$OUTPUT_FILE'"
|
||||||
|
echo "Total length: ${#BASE64_DATA} characters"
|
||||||
|
else
|
||||||
|
# Display a preview and length if no output file
|
||||||
|
echo "Base64 Image Data (first 100 chars):"
|
||||||
|
echo "${BASE64_DATA:0:100}..."
|
||||||
|
echo "Total length: ${#BASE64_DATA} characters"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "To use with MCP server in multi_image_analysis:"
|
||||||
|
echo '{
|
||||||
|
"images": [
|
||||||
|
{
|
||||||
|
"url": "'"${BASE64_DATA:0:20}"'... (full base64 string)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt": "Please describe this image in detail. What does it show?",
|
||||||
|
"model": "qwen/qwen2.5-vl-32b-instruct:free"
|
||||||
|
}'
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
1594
full_base64.txt
Normal file
1594
full_base64.txt
Normal file
File diff suppressed because it is too large
Load Diff
1611
lena_base64.txt
Normal file
1611
lena_base64.txt
Normal file
File diff suppressed because it is too large
Load Diff
183
openrouter-image-python.py
Normal file
183
openrouter-image-python.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import mimetypes
|
||||||
|
import requests
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Constants
|
||||||
|
OPENROUTER_API_KEY = "your_openrouter_api_key" # Replace with your actual key
|
||||||
|
IMAGE_PATH = "path/to/your/image.jpg" # Replace with your image path
|
||||||
|
|
||||||
|
def image_to_base64(image_path):
|
||||||
|
"""Convert an image file to base64 with data URI prefix"""
|
||||||
|
try:
|
||||||
|
# Determine MIME type
|
||||||
|
mime_type, _ = mimetypes.guess_type(image_path)
|
||||||
|
if not mime_type:
|
||||||
|
# Default to generic binary if type cannot be determined
|
||||||
|
mime_type = "application/octet-stream"
|
||||||
|
|
||||||
|
# Read and encode the image
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
|
||||||
|
|
||||||
|
# Return data URI
|
||||||
|
return f"data:{mime_type};base64,{encoded_string}"
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error converting image to base64: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def send_image_direct_api(base64_image, question="What's in this image?"):
|
||||||
|
"""Send an image to OpenRouter using direct API call"""
|
||||||
|
try:
|
||||||
|
print("Sending image via direct API call...")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"HTTP-Referer": "https://your-site-url.com", # Optional
|
||||||
|
"X-Title": "Your Site Name" # Optional
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": "anthropic/claude-3-opus", # Choose an appropriate model with vision capabilities
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": base64_image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"https://openrouter.ai/api/v1/chat/completions",
|
||||||
|
headers=headers,
|
||||||
|
json=payload
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status() # Raise exception for non-200 responses
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
print("Response from direct API:")
|
||||||
|
print(data["choices"][0]["message"]["content"])
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error sending image via direct API: {e}")
|
||||||
|
if hasattr(e, "response") and e.response:
|
||||||
|
print(f"API error details: {e.response.text}")
|
||||||
|
|
||||||
|
def send_image_openai_sdk(base64_image, question="What's in this image?"):
|
||||||
|
"""Send an image to OpenRouter using OpenAI SDK"""
|
||||||
|
try:
|
||||||
|
print("Sending image via OpenAI SDK...")
|
||||||
|
|
||||||
|
# Initialize the OpenAI client with OpenRouter base URL
|
||||||
|
client = OpenAI(
|
||||||
|
api_key=OPENROUTER_API_KEY,
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
default_headers={
|
||||||
|
"HTTP-Referer": "https://your-site-url.com", # Optional
|
||||||
|
"X-Title": "Your Site Name" # Optional
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the message with text and image
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="anthropic/claude-3-opus", # Choose an appropriate model with vision capabilities
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": base64_image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response from OpenAI SDK:")
|
||||||
|
print(completion.choices[0].message.content)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error sending image via OpenAI SDK: {e}")
|
||||||
|
|
||||||
|
def send_image_from_base64_file(base64_file_path, question="What's in this image?"):
|
||||||
|
"""Use a pre-encoded base64 file (e.g., from bash script)"""
|
||||||
|
try:
|
||||||
|
print("Sending image from base64 file...")
|
||||||
|
|
||||||
|
# Read the base64 data from file
|
||||||
|
with open(base64_file_path, "r") as file:
|
||||||
|
base64_data = file.read().strip()
|
||||||
|
|
||||||
|
# Initialize the OpenAI client
|
||||||
|
client = OpenAI(
|
||||||
|
api_key=OPENROUTER_API_KEY,
|
||||||
|
base_url="https://openrouter.ai/api/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the message with text and image
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="anthropic/claude-3-opus",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": base64_data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response when using base64 file:")
|
||||||
|
print(completion.choices[0].message.content)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error sending image from base64 file: {e}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
# Convert the image to base64
|
||||||
|
base64_image = image_to_base64(IMAGE_PATH)
|
||||||
|
print("Image converted to base64 successfully")
|
||||||
|
|
||||||
|
# Example 1: Using direct API call
|
||||||
|
send_image_direct_api(base64_image)
|
||||||
|
|
||||||
|
# Example 2: Using OpenAI SDK
|
||||||
|
send_image_openai_sdk(base64_image)
|
||||||
|
|
||||||
|
# Example 3: Using a base64 file (if you have one)
|
||||||
|
# send_image_from_base64_file("path/to/base64.txt")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in main function: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
247
openrouter-image-sdk.js
Normal file
247
openrouter-image-sdk.js
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
/**
|
||||||
|
* OpenRouter Image Analysis using OpenAI SDK
|
||||||
|
*
|
||||||
|
* This script demonstrates how to analyze local images using OpenRouter's API
|
||||||
|
* through the OpenAI SDK. It supports both command-line usage and can be imported
|
||||||
|
* as a module for use in other applications.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* - Direct: node openrouter-image-sdk.js <image_path> [prompt]
|
||||||
|
* - As module: import { analyzeImage } from './openrouter-image-sdk.js'
|
||||||
|
*
|
||||||
|
* Environment variables:
|
||||||
|
* - OPENROUTER_API_KEY: Your OpenRouter API key (required)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'dotenv/config';
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import { dirname } from 'path';
|
||||||
|
import { OpenAI } from 'openai';
|
||||||
|
|
||||||
|
// ES Module compatibility
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
|
// Constants
|
||||||
|
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
const MAX_RETRIES = 2;
|
||||||
|
const RETRY_DELAY = 1000; // milliseconds
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a local image file to base64 format
|
||||||
|
*
|
||||||
|
* @param {string} filePath - Path to the image file
|
||||||
|
* @returns {Promise<string>} - Base64 encoded image with data URI prefix
|
||||||
|
*/
|
||||||
|
export async function imageToBase64(filePath) {
|
||||||
|
try {
|
||||||
|
// Ensure the file exists
|
||||||
|
try {
|
||||||
|
await fs.access(filePath);
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Image file not found: ${filePath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the file
|
||||||
|
const imageBuffer = await fs.readFile(filePath);
|
||||||
|
|
||||||
|
// Determine MIME type based on file extension
|
||||||
|
const fileExt = path.extname(filePath).toLowerCase();
|
||||||
|
let mimeType = 'application/octet-stream';
|
||||||
|
|
||||||
|
switch (fileExt) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
case '.gif':
|
||||||
|
mimeType = 'image/gif';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
console.warn(`Unknown file extension: ${fileExt}, using default MIME type`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64 and add the data URI prefix
|
||||||
|
const base64 = imageBuffer.toString('base64');
|
||||||
|
return `data:${mimeType};base64,${base64}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw new Error(`Failed to convert image to base64: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sleep for a specified amount of time
|
||||||
|
*
|
||||||
|
* @param {number} ms - Milliseconds to sleep
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze an image using OpenRouter's API via OpenAI SDK
|
||||||
|
*
|
||||||
|
* @param {Object} options - Options for image analysis
|
||||||
|
* @param {string} options.imagePath - Path to the local image file
|
||||||
|
* @param {string} [options.imageBase64] - Base64 encoded image (alternative to imagePath)
|
||||||
|
* @param {string} [options.prompt="Please describe this image in detail."] - The prompt to send with the image
|
||||||
|
* @param {string} [options.model=DEFAULT_MODEL] - The model to use for analysis
|
||||||
|
* @param {string} [options.apiKey] - OpenRouter API key (defaults to OPENROUTER_API_KEY env var)
|
||||||
|
* @returns {Promise<Object>} - The analysis results
|
||||||
|
*/
|
||||||
|
export async function analyzeImage({
|
||||||
|
imagePath,
|
||||||
|
imageBase64,
|
||||||
|
prompt = "Please describe this image in detail.",
|
||||||
|
model = DEFAULT_MODEL,
|
||||||
|
apiKey
|
||||||
|
}) {
|
||||||
|
// Check for API key
|
||||||
|
const openrouterApiKey = apiKey || process.env.OPENROUTER_API_KEY;
|
||||||
|
if (!openrouterApiKey) {
|
||||||
|
throw new Error('OpenRouter API key is required. Set OPENROUTER_API_KEY in your environment or pass it as an option.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we have either imagePath or imageBase64
|
||||||
|
if (!imagePath && !imageBase64) {
|
||||||
|
throw new Error('Either imagePath or imageBase64 must be provided.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get base64 data if not provided
|
||||||
|
let base64Data = imageBase64;
|
||||||
|
if (!base64Data && imagePath) {
|
||||||
|
console.log(`Converting image at ${imagePath} to base64...`);
|
||||||
|
base64Data = await imageToBase64(imagePath);
|
||||||
|
console.log('Image converted successfully!');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the OpenAI client with OpenRouter base URL
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: openrouterApiKey,
|
||||||
|
baseURL: 'https://openrouter.ai/api/v1',
|
||||||
|
defaultHeaders: {
|
||||||
|
'HTTP-Referer': 'https://github.com/stabgan/openrouter-mcp-multimodal',
|
||||||
|
'X-Title': 'OpenRouter Local Image Analysis'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Implement retry logic
|
||||||
|
let lastError = null;
|
||||||
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||||
|
try {
|
||||||
|
if (attempt > 0) {
|
||||||
|
console.log(`Retry attempt ${attempt}/${MAX_RETRIES}...`);
|
||||||
|
await sleep(RETRY_DELAY * attempt); // Exponential backoff
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Sending image analysis request to model: ${model}`);
|
||||||
|
|
||||||
|
// Create the message with text and image
|
||||||
|
const completion = await openai.chat.completions.create({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: prompt
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: base64Data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract the relevant information from the response
|
||||||
|
if (completion && completion.choices && completion.choices.length > 0) {
|
||||||
|
const result = {
|
||||||
|
analysis: completion.choices[0].message.content,
|
||||||
|
model: completion.model,
|
||||||
|
usage: completion.usage,
|
||||||
|
requestId: completion.id,
|
||||||
|
finishReason: completion.choices[0].finish_reason
|
||||||
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
throw new Error('Unexpected response structure from OpenRouter API.');
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
lastError = error;
|
||||||
|
|
||||||
|
// If this is a 402 Payment Required error, we won't retry
|
||||||
|
if (error.status === 402 || (error.response && error.response.status === 402)) {
|
||||||
|
console.error('Payment required error. Not retrying.');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt === MAX_RETRIES) {
|
||||||
|
console.error('Maximum retry attempts reached.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've exhausted all retries, throw the last error
|
||||||
|
throw lastError || new Error('Failed to analyze image after multiple attempts.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Command line interface for image analysis
|
||||||
|
*/
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
|
||||||
|
if (args.length === 0) {
|
||||||
|
console.log('Usage: node openrouter-image-sdk.js <image_path> [prompt]');
|
||||||
|
console.log('Example: node openrouter-image-sdk.js test.png "What objects do you see in this image?"');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const imagePath = args[0];
|
||||||
|
const prompt = args[1] || "Please describe this image in detail. What do you see?";
|
||||||
|
|
||||||
|
console.log(`Analyzing image: ${imagePath}`);
|
||||||
|
console.log(`Prompt: ${prompt}`);
|
||||||
|
|
||||||
|
const result = await analyzeImage({ imagePath, prompt });
|
||||||
|
|
||||||
|
console.log('\n----- Analysis Results -----\n');
|
||||||
|
console.log(result.analysis);
|
||||||
|
console.log('\n----------------------------\n');
|
||||||
|
|
||||||
|
console.log('Model used:', result.model);
|
||||||
|
if (result.usage) {
|
||||||
|
console.log('Token usage:');
|
||||||
|
console.log('- Prompt tokens:', result.usage.prompt_tokens);
|
||||||
|
console.log('- Completion tokens:', result.usage.completion_tokens);
|
||||||
|
console.log('- Total tokens:', result.usage.total_tokens);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
if (error.response) {
|
||||||
|
console.error('API error details:', JSON.stringify(error.response, null, 2));
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the main function directly
|
||||||
|
main().catch(error => {
|
||||||
|
console.error('Fatal error:', error);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@stabgan/openrouter-mcp-multimodal",
|
"name": "@stabgan/openrouter-mcp-multimodal",
|
||||||
"version": "1.2.0",
|
"version": "1.3.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@stabgan/openrouter-mcp-multimodal",
|
"name": "@stabgan/openrouter-mcp-multimodal",
|
||||||
"version": "1.2.0",
|
"version": "1.3.0",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@modelcontextprotocol/sdk": "^1.8.0",
|
"@modelcontextprotocol/sdk": "^1.8.0",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@stabgan/openrouter-mcp-multimodal",
|
"name": "@stabgan/openrouter-mcp-multimodal",
|
||||||
"version": "1.3.0",
|
"version": "1.4.0",
|
||||||
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
|
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
|
|||||||
259
send_image_to_openrouter.js
Normal file
259
send_image_to_openrouter.js
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
// Send an image to OpenRouter using JavaScript
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { OpenAI } from 'openai';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import { dirname } from 'path';
|
||||||
|
|
||||||
|
console.log("Starting script...");
|
||||||
|
|
||||||
|
// Constants
|
||||||
|
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY || 'your_openrouter_api_key'; // Get from env or replace
|
||||||
|
const IMAGE_PATH = process.argv[2] || 'test.png'; // Get from command line or use default
|
||||||
|
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
|
console.log(`Arguments: ${process.argv.join(', ')}`);
|
||||||
|
console.log(`Using image path: ${IMAGE_PATH}`);
|
||||||
|
|
||||||
|
// Load environment variables from .env file
|
||||||
|
async function loadEnv() {
|
||||||
|
try {
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
const envPath = path.join(__dirname, '.env');
|
||||||
|
const envFile = await fs.readFile(envPath, 'utf-8');
|
||||||
|
|
||||||
|
envFile.split('\n').forEach(line => {
|
||||||
|
const match = line.match(/^\s*([\w.-]+)\s*=\s*(.*)?\s*$/);
|
||||||
|
if (match) {
|
||||||
|
const key = match[1];
|
||||||
|
let value = match[2] || '';
|
||||||
|
|
||||||
|
// Remove quotes if they exist
|
||||||
|
if (value.length > 0 && value.charAt(0) === '"' && value.charAt(value.length - 1) === '"') {
|
||||||
|
value = value.replace(/^"|"$/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
process.env[key] = value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Environment variables loaded from .env file');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading .env file:', error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an image file to base64
|
||||||
|
*/
|
||||||
|
async function imageToBase64(filePath) {
|
||||||
|
try {
|
||||||
|
// Read the file
|
||||||
|
const imageBuffer = await fs.readFile(filePath);
|
||||||
|
|
||||||
|
// Determine MIME type based on file extension
|
||||||
|
const fileExt = path.extname(filePath).toLowerCase();
|
||||||
|
let mimeType = 'application/octet-stream';
|
||||||
|
|
||||||
|
switch (fileExt) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
// Add other supported types as needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64 and add the data URI prefix
|
||||||
|
const base64 = imageBuffer.toString('base64');
|
||||||
|
return `data:${mimeType};base64,${base64}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Example 1: Send a base64 image using the MCP server analyze_image tool
|
||||||
|
*/
|
||||||
|
async function testMcpAnalyzeImage(base64Image, question = "What's in this image?") {
|
||||||
|
try {
|
||||||
|
console.log('Testing MCP analyze_image tool with base64 image...');
|
||||||
|
|
||||||
|
// This would normally be handled by the MCP server client
|
||||||
|
// This is a simulation of how to structure the data for the MCP server
|
||||||
|
console.log(`
|
||||||
|
To analyze the image using MCP, send this request to the MCP server:
|
||||||
|
|
||||||
|
{
|
||||||
|
"tool": "mcp_openrouter_analyze_image",
|
||||||
|
"arguments": {
|
||||||
|
"image_path": "${base64Image.substring(0, 50)}...", // Truncated for display
|
||||||
|
"question": "${question}",
|
||||||
|
"model": "${DEFAULT_MODEL}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
The MCP server will convert the image path (which is already a base64 data URL)
|
||||||
|
and send it to OpenRouter in the correct format.
|
||||||
|
`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error testing MCP analyze_image:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Example 2: Send multiple base64 images using the MCP server multi_image_analysis tool
|
||||||
|
*/
|
||||||
|
async function testMcpMultiImageAnalysis(base64Images, prompt = "Describe these images in detail.") {
|
||||||
|
try {
|
||||||
|
console.log('Testing MCP multi_image_analysis tool with base64 images...');
|
||||||
|
|
||||||
|
// Create the images array for the MCP request
|
||||||
|
const images = base64Images.map(base64 => ({ url: base64 }));
|
||||||
|
|
||||||
|
// This would normally be handled by the MCP server client
|
||||||
|
// This is a simulation of how to structure the data for the MCP server
|
||||||
|
console.log(`
|
||||||
|
To analyze multiple images using MCP, send this request to the MCP server:
|
||||||
|
|
||||||
|
{
|
||||||
|
"tool": "mcp_openrouter_multi_image_analysis",
|
||||||
|
"arguments": {
|
||||||
|
"images": [
|
||||||
|
{ "url": "${base64Images[0].substring(0, 50)}..." } // Truncated for display
|
||||||
|
${base64Images.length > 1 ? `, { "url": "${base64Images[1].substring(0, 50)}..." }` : ''}
|
||||||
|
${base64Images.length > 2 ? ', ...' : ''}
|
||||||
|
],
|
||||||
|
"prompt": "${prompt}",
|
||||||
|
"model": "${DEFAULT_MODEL}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
The MCP server will process these base64 images and send them to OpenRouter
|
||||||
|
in the correct format.
|
||||||
|
`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error testing MCP multi_image_analysis:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Example 3: Direct OpenRouter API call with base64 image (for comparison)
|
||||||
|
*/
|
||||||
|
async function sendImageDirectAPI(base64Image, question = "What's in this image?", apiKey) {
|
||||||
|
try {
|
||||||
|
console.log('Sending image directly to OpenRouter API (for comparison)...');
|
||||||
|
|
||||||
|
const response = await axios.post(
|
||||||
|
'https://openrouter.ai/api/v1/chat/completions',
|
||||||
|
{
|
||||||
|
model: DEFAULT_MODEL,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: base64Image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'HTTP-Referer': 'https://github.com/yourusername/your-repo',
|
||||||
|
'X-Title': 'MCP Server Demo'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('\nDirect API response:');
|
||||||
|
console.log(response.data.choices[0].message.content);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error sending image via direct API:', error);
|
||||||
|
if (error.response) {
|
||||||
|
console.error('API error details:', error.response.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function to run the examples
|
||||||
|
*/
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
// Load environment variables from .env file
|
||||||
|
await loadEnv();
|
||||||
|
|
||||||
|
// Get API key from environment after loading
|
||||||
|
const apiKey = process.env.OPENROUTER_API_KEY || OPENROUTER_API_KEY;
|
||||||
|
|
||||||
|
// Debug: Show if API key is set in environment
|
||||||
|
console.log(`API key from environment: ${process.env.OPENROUTER_API_KEY ? 'Yes (set)' : 'No (not set)'}`);
|
||||||
|
console.log(`Using API key: ${apiKey === 'your_openrouter_api_key' ? 'Default placeholder (update needed)' : 'From environment'}`);
|
||||||
|
|
||||||
|
// Check if API key is provided
|
||||||
|
if (apiKey === 'your_openrouter_api_key') {
|
||||||
|
console.error('Please set the OPENROUTER_API_KEY environment variable or update the script.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Converting image: ${IMAGE_PATH}`);
|
||||||
|
|
||||||
|
// Check if the image file exists
|
||||||
|
try {
|
||||||
|
await fs.access(IMAGE_PATH);
|
||||||
|
console.log(`Image file exists: ${IMAGE_PATH}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`Error: Image file does not exist: ${IMAGE_PATH}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the image to base64
|
||||||
|
const base64Image = await imageToBase64(IMAGE_PATH);
|
||||||
|
console.log('Image converted to base64 successfully.');
|
||||||
|
console.log(`Base64 length: ${base64Image.length} characters`);
|
||||||
|
console.log(`Base64 starts with: ${base64Image.substring(0, 50)}...`);
|
||||||
|
|
||||||
|
// For multiple images demo, we'll use the same image twice
|
||||||
|
const base64Images = [base64Image, base64Image];
|
||||||
|
|
||||||
|
// Example 1: MCP server with analyze_image
|
||||||
|
await testMcpAnalyzeImage(base64Image);
|
||||||
|
|
||||||
|
// Example 2: MCP server with multi_image_analysis
|
||||||
|
await testMcpMultiImageAnalysis(base64Images);
|
||||||
|
|
||||||
|
// Example 3: Direct API call (if API key is available)
|
||||||
|
if (apiKey !== 'your_openrouter_api_key') {
|
||||||
|
await sendImageDirectAPI(base64Image, "What's in this image?", apiKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\nDone! You can now use the MCP server with base64 encoded images.');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in main function:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the main function directly
|
||||||
|
console.log("Running main function...");
|
||||||
|
main().catch(error => {
|
||||||
|
console.error("Unhandled error in main:", error);
|
||||||
|
});
|
||||||
160
send_image_to_openrouter.ts
Normal file
160
send_image_to_openrouter.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import fs from 'fs/promises';
|
||||||
|
import path from 'path';
|
||||||
|
import OpenAI from 'openai';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
// Constants
|
||||||
|
const OPENROUTER_API_KEY = 'your_openrouter_api_key'; // Replace with your actual key
|
||||||
|
const IMAGE_PATH = 'path/to/your/image.jpg'; // Replace with your image path
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an image file to base64
|
||||||
|
*/
|
||||||
|
async function imageToBase64(filePath: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
// Read the file
|
||||||
|
const imageBuffer = await fs.readFile(filePath);
|
||||||
|
|
||||||
|
// Determine MIME type based on file extension
|
||||||
|
const fileExt = path.extname(filePath).toLowerCase();
|
||||||
|
let mimeType = 'application/octet-stream';
|
||||||
|
|
||||||
|
switch (fileExt) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
// Add other supported types as needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64 and add the data URI prefix
|
||||||
|
const base64 = imageBuffer.toString('base64');
|
||||||
|
return `data:${mimeType};base64,${base64}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method 1: Send an image to OpenRouter using direct API call
|
||||||
|
*/
|
||||||
|
async function sendImageDirectAPI(base64Image: string, question: string = "What's in this image?"): Promise<void> {
|
||||||
|
try {
|
||||||
|
console.log('Sending image via direct API call...');
|
||||||
|
|
||||||
|
const response = await axios.post(
|
||||||
|
'https://openrouter.ai/api/v1/chat/completions',
|
||||||
|
{
|
||||||
|
model: 'anthropic/claude-3-opus', // Choose an appropriate model with vision capabilities
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: base64Image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'HTTP-Referer': 'https://your-site-url.com', // Optional
|
||||||
|
'X-Title': 'Your Site Name' // Optional
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('Response from direct API:');
|
||||||
|
console.log(response.data.choices[0].message.content);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error sending image via direct API:', error);
|
||||||
|
if (axios.isAxiosError(error) && error.response) {
|
||||||
|
console.error('API error details:', error.response.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method 2: Send an image to OpenRouter using OpenAI SDK
|
||||||
|
*/
|
||||||
|
async function sendImageOpenAISDK(base64Image: string, question: string = "What's in this image?"): Promise<void> {
|
||||||
|
try {
|
||||||
|
console.log('Sending image via OpenAI SDK...');
|
||||||
|
|
||||||
|
// Initialize the OpenAI client with OpenRouter base URL
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: OPENROUTER_API_KEY,
|
||||||
|
baseURL: 'https://openrouter.ai/api/v1',
|
||||||
|
defaultHeaders: {
|
||||||
|
'HTTP-Referer': 'https://your-site-url.com', // Optional
|
||||||
|
'X-Title': 'Your Site Name' // Optional
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create the message with text and image
|
||||||
|
const completion = await openai.chat.completions.create({
|
||||||
|
model: 'anthropic/claude-3-opus', // Choose an appropriate model with vision capabilities
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: base64Image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Response from OpenAI SDK:');
|
||||||
|
console.log(completion.choices[0].message.content);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error sending image via OpenAI SDK:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function to run the examples
|
||||||
|
*/
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
// Convert the image to base64
|
||||||
|
const base64Image = await imageToBase64(IMAGE_PATH);
|
||||||
|
console.log('Image converted to base64 successfully');
|
||||||
|
|
||||||
|
// Example 1: Using direct API call
|
||||||
|
await sendImageDirectAPI(base64Image);
|
||||||
|
|
||||||
|
// Example 2: Using OpenAI SDK
|
||||||
|
await sendImageOpenAISDK(base64Image);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in main function:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the examples
|
||||||
|
main();
|
||||||
@@ -15,7 +15,7 @@ class OpenRouterMultimodalServer {
|
|||||||
constructor() {
|
constructor() {
|
||||||
// Retrieve API key and default model from environment variables
|
// Retrieve API key and default model from environment variables
|
||||||
const apiKey = process.env.OPENROUTER_API_KEY;
|
const apiKey = process.env.OPENROUTER_API_KEY;
|
||||||
const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
|
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
|
||||||
|
|
||||||
// Check if API key is provided
|
// Check if API key is provided
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ export class ToolHandlers {
|
|||||||
properties: {
|
properties: {
|
||||||
image_path: {
|
image_path: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'Path to the image file to analyze (must be an absolute path)',
|
description: 'Path to the image file to analyze (can be an absolute file path, URL, or base64 data URL starting with "data:")',
|
||||||
},
|
},
|
||||||
question: {
|
question: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
@@ -167,7 +167,7 @@ export class ToolHandlers {
|
|||||||
properties: {
|
properties: {
|
||||||
url: {
|
url: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
|
description: 'URL or data URL of the image (use http(s):// for web images, absolute file paths for local files, or data:image/xxx;base64,... for base64 encoded images)',
|
||||||
},
|
},
|
||||||
alt: {
|
alt: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
|
|||||||
@@ -1,14 +1,32 @@
|
|||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { promises as fs } from 'fs';
|
import { promises as fs } from 'fs';
|
||||||
import sharp from 'sharp';
|
import fetch from 'node-fetch';
|
||||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import fetch from 'node-fetch';
|
|
||||||
import { findSuitableFreeModel } from './multi-image-analysis.js';
|
import { findSuitableFreeModel } from './multi-image-analysis.js';
|
||||||
|
|
||||||
// Default model for image analysis
|
// Default model for image analysis
|
||||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
|
let sharp: any;
|
||||||
|
try {
|
||||||
|
sharp = require('sharp');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Warning: sharp module not available, using fallback image processing');
|
||||||
|
// Mock implementation that just passes through the base64 data
|
||||||
|
sharp = (buffer: Buffer) => ({
|
||||||
|
metadata: async () => ({ width: 800, height: 600 }),
|
||||||
|
resize: () => ({
|
||||||
|
jpeg: () => ({
|
||||||
|
toBuffer: async () => buffer
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
jpeg: () => ({
|
||||||
|
toBuffer: async () => buffer
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export interface AnalyzeImageToolRequest {
|
export interface AnalyzeImageToolRequest {
|
||||||
image_path: string;
|
image_path: string;
|
||||||
question?: string;
|
question?: string;
|
||||||
@@ -49,10 +67,34 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an image with minimal processing when sharp isn't available
|
||||||
|
*/
|
||||||
|
async function processImageFallback(buffer: Buffer): Promise<string> {
|
||||||
|
try {
|
||||||
|
// Just return the buffer as base64 without processing
|
||||||
|
return buffer.toString('base64');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in fallback image processing:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function processImage(buffer: Buffer): Promise<string> {
|
async function processImage(buffer: Buffer): Promise<string> {
|
||||||
try {
|
try {
|
||||||
|
if (typeof sharp !== 'function') {
|
||||||
|
console.warn('Using fallback image processing (sharp not available)');
|
||||||
|
return processImageFallback(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
// Get image metadata
|
// Get image metadata
|
||||||
const metadata = await sharp(buffer).metadata();
|
let metadata;
|
||||||
|
try {
|
||||||
|
metadata = await sharp(buffer).metadata();
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error getting image metadata, using fallback:', error);
|
||||||
|
return processImageFallback(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate dimensions to keep base64 size reasonable
|
// Calculate dimensions to keep base64 size reasonable
|
||||||
const MAX_DIMENSION = 800;
|
const MAX_DIMENSION = 800;
|
||||||
@@ -81,39 +123,56 @@ async function processImage(buffer: Buffer): Promise<string> {
|
|||||||
|
|
||||||
return jpegBuffer.toString('base64');
|
return jpegBuffer.toString('base64');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error processing image:', error);
|
console.error('Error processing image, using fallback:', error);
|
||||||
throw error;
|
return processImageFallback(buffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the image at the given path to a base64 string
|
* Processes an image from a path or base64 string to a proper base64 format for APIs
|
||||||
*/
|
*/
|
||||||
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
async function prepareImage(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
||||||
try {
|
try {
|
||||||
// Ensure the image path is absolute
|
// Check if already a base64 data URL
|
||||||
if (!path.isAbsolute(imagePath)) {
|
if (imagePath.startsWith('data:')) {
|
||||||
throw new McpError(
|
const matches = imagePath.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
|
||||||
ErrorCode.InvalidParams,
|
if (!matches || matches.length !== 3) {
|
||||||
'Image path must be absolute'
|
throw new McpError(ErrorCode.InvalidParams, 'Invalid base64 data URL format');
|
||||||
);
|
}
|
||||||
|
return { base64: matches[2], mimeType: matches[1] };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if image is a URL
|
||||||
|
if (imagePath.startsWith('http://') || imagePath.startsWith('https://')) {
|
||||||
|
try {
|
||||||
|
const buffer = await fetchImageAsBuffer(imagePath);
|
||||||
|
const processed = await processImage(buffer);
|
||||||
|
return { base64: processed, mimeType: 'image/jpeg' }; // We convert everything to JPEG
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new McpError(ErrorCode.InvalidParams, `Failed to fetch image from URL: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle file paths
|
||||||
|
let absolutePath = imagePath;
|
||||||
|
|
||||||
|
// Ensure the image path is absolute if it's a file path
|
||||||
|
if (!imagePath.startsWith('data:') && !path.isAbsolute(imagePath)) {
|
||||||
|
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the file exists
|
|
||||||
try {
|
try {
|
||||||
await fs.access(imagePath);
|
// Check if the file exists
|
||||||
|
await fs.access(absolutePath);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
throw new McpError(
|
throw new McpError(ErrorCode.InvalidParams, `File not found: ${absolutePath}`);
|
||||||
ErrorCode.InvalidParams,
|
|
||||||
`File not found: ${imagePath}`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the file as a buffer
|
// Read the file as a buffer
|
||||||
const buffer = await fs.readFile(imagePath);
|
const buffer = await fs.readFile(absolutePath);
|
||||||
|
|
||||||
// Determine MIME type from file extension
|
// Determine MIME type from file extension
|
||||||
const extension = path.extname(imagePath).toLowerCase();
|
const extension = path.extname(absolutePath).toLowerCase();
|
||||||
let mimeType: string;
|
let mimeType: string;
|
||||||
|
|
||||||
switch (extension) {
|
switch (extension) {
|
||||||
@@ -137,12 +196,11 @@ async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeT
|
|||||||
mimeType = 'application/octet-stream';
|
mimeType = 'application/octet-stream';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert buffer to base64
|
// Process and optimize the image
|
||||||
const base64 = buffer.toString('base64');
|
const processed = await processImage(buffer);
|
||||||
|
return { base64: processed, mimeType };
|
||||||
return { base64, mimeType };
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error converting image to base64:', error);
|
console.error('Error preparing image:', error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -160,23 +218,21 @@ export async function handleAnalyzeImage(
|
|||||||
try {
|
try {
|
||||||
// Validate inputs
|
// Validate inputs
|
||||||
if (!args.image_path) {
|
if (!args.image_path) {
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
|
throw new McpError(ErrorCode.InvalidParams, 'An image path, URL, or base64 data is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!args.question) {
|
const question = args.question || "What's in this image?";
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
|
|
||||||
}
|
|
||||||
|
|
||||||
console.error(`Processing image: ${args.image_path}`);
|
console.error(`Processing image: ${args.image_path.substring(0, 100)}${args.image_path.length > 100 ? '...' : ''}`);
|
||||||
|
|
||||||
// Convert the image to base64
|
// Convert the image to base64
|
||||||
const { base64, mimeType } = await imageToBase64(args.image_path);
|
const { base64, mimeType } = await prepareImage(args.image_path);
|
||||||
|
|
||||||
// Create the content array for the OpenAI API
|
// Create the content array for the OpenAI API
|
||||||
const content = [
|
const content = [
|
||||||
{
|
{
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: args.question
|
text: question
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: 'image_url',
|
type: 'image_url',
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import fetch from 'node-fetch';
|
import fetch from 'node-fetch';
|
||||||
import sharp from 'sharp';
|
// Remove the sharp import to avoid conflicts with our dynamic import
|
||||||
|
// import sharp from 'sharp';
|
||||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@@ -8,6 +9,26 @@ import { tmpdir } from 'os';
|
|||||||
// Remove uuid import as we'll use a simple random string generator instead
|
// Remove uuid import as we'll use a simple random string generator instead
|
||||||
// import { v4 as uuidv4 } from 'uuid';
|
// import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
|
// Setup sharp with fallback
|
||||||
|
let sharp: any;
|
||||||
|
try {
|
||||||
|
sharp = require('sharp');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Warning: sharp module not available, using fallback image processing');
|
||||||
|
// Mock implementation that just passes through the base64 data
|
||||||
|
sharp = (buffer: Buffer) => ({
|
||||||
|
metadata: async () => ({ width: 800, height: 600 }),
|
||||||
|
resize: () => ({
|
||||||
|
jpeg: () => ({
|
||||||
|
toBuffer: async () => buffer
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
jpeg: () => ({
|
||||||
|
toBuffer: async () => buffer
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Default model for image analysis
|
// Default model for image analysis
|
||||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||||
|
|
||||||
@@ -149,13 +170,25 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
|||||||
*/
|
*/
|
||||||
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
||||||
try {
|
try {
|
||||||
|
if (typeof sharp !== 'function') {
|
||||||
|
console.warn('Using fallback image processing (sharp not available)');
|
||||||
|
return processImageFallback(buffer, mimeType);
|
||||||
|
}
|
||||||
|
|
||||||
// Create a temporary directory for processing if needed
|
// Create a temporary directory for processing if needed
|
||||||
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
|
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
|
||||||
await fs.mkdir(tempDir, { recursive: true });
|
await fs.mkdir(tempDir, { recursive: true });
|
||||||
|
|
||||||
// Get image info
|
// Get image info
|
||||||
let sharpInstance = sharp(buffer);
|
let sharpInstance = sharp(buffer);
|
||||||
const metadata = await sharpInstance.metadata();
|
let metadata;
|
||||||
|
|
||||||
|
try {
|
||||||
|
metadata = await sharpInstance.metadata();
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error getting image metadata, using fallback:', error);
|
||||||
|
return processImageFallback(buffer, mimeType);
|
||||||
|
}
|
||||||
|
|
||||||
// Skip processing for small images
|
// Skip processing for small images
|
||||||
if (metadata.width && metadata.height &&
|
if (metadata.width && metadata.height &&
|
||||||
@@ -177,19 +210,20 @@ async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to JPEG for consistency and small size
|
try {
|
||||||
const processedBuffer = await sharpInstance
|
// Convert to JPEG for consistency and small size
|
||||||
.jpeg({ quality: JPEG_QUALITY })
|
const processedBuffer = await sharpInstance
|
||||||
.toBuffer();
|
.jpeg({ quality: JPEG_QUALITY })
|
||||||
|
.toBuffer();
|
||||||
|
|
||||||
return processedBuffer.toString('base64');
|
return processedBuffer.toString('base64');
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error in final image processing, using fallback:', error);
|
||||||
|
return processImageFallback(buffer, mimeType);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error processing image:', error);
|
console.error('Error processing image, using fallback:', error);
|
||||||
|
return processImageFallback(buffer, mimeType);
|
||||||
// If sharp processing fails, return the original buffer
|
|
||||||
// This is a fallback to ensure we don't completely fail on processing errors
|
|
||||||
console.error('Returning original image without processing');
|
|
||||||
return buffer.toString('base64');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -265,7 +299,7 @@ export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process and analyze multiple images using OpenRouter
|
* Main handler for multi-image analysis
|
||||||
*/
|
*/
|
||||||
export async function handleMultiImageAnalysis(
|
export async function handleMultiImageAnalysis(
|
||||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||||
@@ -276,65 +310,50 @@ export async function handleMultiImageAnalysis(
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Validate inputs
|
// Validate inputs
|
||||||
if (!args.images || args.images.length === 0) {
|
if (!args.images || !Array.isArray(args.images) || args.images.length === 0) {
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'At least one image is required');
|
throw new McpError(ErrorCode.InvalidParams, 'At least one image is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!args.prompt) {
|
if (!args.prompt) {
|
||||||
throw new McpError(ErrorCode.InvalidParams, 'A prompt is required');
|
throw new McpError(ErrorCode.InvalidParams, 'A prompt for analyzing the images is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare content array for the message
|
console.error(`Processing ${args.images.length} images`);
|
||||||
const content: Array<any> = [{
|
|
||||||
type: 'text',
|
|
||||||
text: args.prompt
|
|
||||||
}];
|
|
||||||
|
|
||||||
// Track successful and failed images for reporting
|
// Process each image and convert to base64 if needed
|
||||||
const successfulImages = [];
|
const processedImages = await Promise.all(
|
||||||
const failedImages = [];
|
args.images.map(async (image, index) => {
|
||||||
|
try {
|
||||||
// Process each image
|
// Skip processing if already a data URL
|
||||||
for (const [index, image] of args.images.entries()) {
|
if (image.url.startsWith('data:')) {
|
||||||
try {
|
console.error(`Image ${index + 1} is already in base64 format`);
|
||||||
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
|
return image;
|
||||||
|
|
||||||
// Get MIME type
|
|
||||||
const mimeType = getMimeType(image.url);
|
|
||||||
|
|
||||||
// Fetch and process the image
|
|
||||||
const imageBuffer = await fetchImageAsBuffer(image.url);
|
|
||||||
const base64Image = await processImage(imageBuffer, mimeType);
|
|
||||||
|
|
||||||
// Use JPEG as the output format for consistency
|
|
||||||
const outputMimeType = 'image/jpeg';
|
|
||||||
|
|
||||||
// Add to content
|
|
||||||
content.push({
|
|
||||||
type: 'image_url',
|
|
||||||
image_url: {
|
|
||||||
url: `data:${outputMimeType};base64,${base64Image}`
|
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
successfulImages.push(image.url);
|
console.error(`Processing image ${index + 1}: ${image.url.substring(0, 100)}${image.url.length > 100 ? '...' : ''}`);
|
||||||
} catch (error) {
|
|
||||||
console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
|
|
||||||
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
|
|
||||||
// Continue with other images if one fails
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no images were successfully processed
|
// Get MIME type
|
||||||
if (content.length === 1) {
|
const mimeType = getMimeType(image.url);
|
||||||
const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
|
|
||||||
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
|
// Fetch and process the image
|
||||||
}
|
const buffer = await fetchImageAsBuffer(image.url);
|
||||||
|
const base64 = await processImage(buffer, mimeType);
|
||||||
|
|
||||||
|
return {
|
||||||
|
url: `data:${mimeType === 'application/octet-stream' ? 'image/jpeg' : mimeType};base64,${base64}`,
|
||||||
|
alt: image.alt
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error processing image ${index + 1}:`, error);
|
||||||
|
throw new Error(`Failed to process image ${index + 1}: ${image.url}. Error: ${error.message}`);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
// Select model with priority:
|
// Select model with priority:
|
||||||
// 1. User-specified model
|
// 1. User-specified model
|
||||||
// 2. Default model from environment
|
// 2. Default model from environment
|
||||||
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
// 3. Default free vision model
|
||||||
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||||
|
|
||||||
// If a model is specified but not our default free model, verify it exists
|
// If a model is specified but not our default free model, verify it exists
|
||||||
@@ -348,7 +367,30 @@ export async function handleMultiImageAnalysis(
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.error(`Making API call with model: ${model}`);
|
console.error(`Making API call with model: ${model}`);
|
||||||
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
|
|
||||||
|
// Build content array for the API call
|
||||||
|
const content: Array<{
|
||||||
|
type: string;
|
||||||
|
text?: string;
|
||||||
|
image_url?: {
|
||||||
|
url: string
|
||||||
|
}
|
||||||
|
}> = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: args.prompt
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Add each processed image to the content array
|
||||||
|
processedImages.forEach(image => {
|
||||||
|
content.push({
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: image.url
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// Make the API call
|
// Make the API call
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
@@ -359,16 +401,19 @@ export async function handleMultiImageAnalysis(
|
|||||||
}] as any
|
}] as any
|
||||||
});
|
});
|
||||||
|
|
||||||
// Format the response
|
// Get response text and format if requested
|
||||||
let responseText = completion.choices[0].message.content || '';
|
let responseText = completion.choices[0].message.content || '';
|
||||||
|
|
||||||
// Add information about failed images if any
|
// Format as markdown if requested
|
||||||
if (failedImages.length > 0) {
|
if (args.markdown_response) {
|
||||||
const formattedErrors = args.markdown_response !== false
|
// Simple formatting enhancements
|
||||||
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
|
responseText = responseText
|
||||||
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
|
// Add horizontal rule after sections
|
||||||
|
.replace(/^(#{1,3}.*)/gm, '$1\n\n---')
|
||||||
responseText += formattedErrors;
|
// Ensure proper spacing for lists
|
||||||
|
.replace(/^(\s*[-*•]\s.+)$/gm, '\n$1')
|
||||||
|
// Convert plain URLs to markdown links
|
||||||
|
.replace(/(https?:\/\/[^\s]+)/g, '[$1]($1)');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the analysis result
|
// Return the analysis result
|
||||||
@@ -381,12 +426,10 @@ export async function handleMultiImageAnalysis(
|
|||||||
],
|
],
|
||||||
metadata: {
|
metadata: {
|
||||||
model: completion.model,
|
model: completion.model,
|
||||||
usage: completion.usage,
|
usage: completion.usage
|
||||||
successful_images: successfulImages.length,
|
|
||||||
failed_images: failedImages.length
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error: any) {
|
||||||
console.error('Error in multi-image analysis:', error);
|
console.error('Error in multi-image analysis:', error);
|
||||||
|
|
||||||
if (error instanceof McpError) {
|
if (error instanceof McpError) {
|
||||||
@@ -397,14 +440,27 @@ export async function handleMultiImageAnalysis(
|
|||||||
content: [
|
content: [
|
||||||
{
|
{
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: `Error analyzing images: ${error instanceof Error ? error.message : String(error)}`,
|
text: `Error analyzing images: ${error.message}`,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
isError: true,
|
isError: true,
|
||||||
metadata: {
|
metadata: {
|
||||||
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
error_type: error.constructor.name,
|
||||||
error_message: error instanceof Error ? error.message : String(error)
|
error_message: error.message
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an image with minimal processing when sharp isn't available
|
||||||
|
*/
|
||||||
|
async function processImageFallback(buffer: Buffer, mimeType: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
// Just return the buffer as base64 without processing
|
||||||
|
return buffer.toString('base64');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in fallback image processing:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
65
start-server.js
Normal file
65
start-server.js
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
// Load environment variables and start the MCP server
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import { dirname } from 'path';
|
||||||
|
import { spawn } from 'child_process';
|
||||||
|
|
||||||
|
// Get current directory
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
|
// Path to .env file
|
||||||
|
const envPath = path.join(__dirname, '.env');
|
||||||
|
|
||||||
|
async function loadEnvAndStartServer() {
|
||||||
|
try {
|
||||||
|
console.log('Loading environment variables from .env file...');
|
||||||
|
|
||||||
|
// Read .env file
|
||||||
|
const envContent = await fs.readFile(envPath, 'utf8');
|
||||||
|
|
||||||
|
// Parse .env file and set environment variables
|
||||||
|
const envVars = {};
|
||||||
|
envContent.split('\n').forEach(line => {
|
||||||
|
const match = line.match(/^\s*([\w.-]+)\s*=\s*(.*)?\s*$/);
|
||||||
|
if (match) {
|
||||||
|
const key = match[1];
|
||||||
|
let value = match[2] || '';
|
||||||
|
|
||||||
|
// Remove quotes if they exist
|
||||||
|
if (value.length > 0 && value.charAt(0) === '"' && value.charAt(value.length - 1) === '"') {
|
||||||
|
value = value.replace(/^"|"$/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
envVars[key] = value;
|
||||||
|
process.env[key] = value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Environment variables loaded successfully');
|
||||||
|
console.log(`API Key found: ${process.env.OPENROUTER_API_KEY ? 'Yes' : 'No'}`);
|
||||||
|
|
||||||
|
// Start the server process with environment variables
|
||||||
|
console.log('Starting MCP server...');
|
||||||
|
const serverProcess = spawn('node', ['dist/index.js'], {
|
||||||
|
env: { ...process.env, ...envVars },
|
||||||
|
stdio: 'inherit'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle server process events
|
||||||
|
serverProcess.on('close', (code) => {
|
||||||
|
console.log(`MCP server exited with code ${code}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
serverProcess.on('error', (err) => {
|
||||||
|
console.error('Failed to start MCP server:', err);
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the function
|
||||||
|
loadEnvAndStartServer();
|
||||||
155
test-openai-sdk.js
Normal file
155
test-openai-sdk.js
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import { dirname } from 'path';
|
||||||
|
import { OpenAI } from 'openai';
|
||||||
|
|
||||||
|
// Get the directory name for ES modules
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
|
// Constants
|
||||||
|
const TEST_IMAGE_PATH = 'test.png'; // Adjust to your image path
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an image file to base64
|
||||||
|
*/
|
||||||
|
async function imageToBase64(filePath) {
|
||||||
|
try {
|
||||||
|
// Read the file
|
||||||
|
const imageBuffer = await fs.readFile(filePath);
|
||||||
|
|
||||||
|
// Determine MIME type based on file extension
|
||||||
|
const fileExt = path.extname(filePath).toLowerCase();
|
||||||
|
let mimeType = 'application/octet-stream';
|
||||||
|
|
||||||
|
switch (fileExt) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
console.log(`Using default MIME type for extension: ${fileExt}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64 and add the data URI prefix
|
||||||
|
const base64 = imageBuffer.toString('base64');
|
||||||
|
return `data:${mimeType};base64,${base64}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send an image to OpenRouter using OpenAI SDK
|
||||||
|
*/
|
||||||
|
async function analyzeImageWithOpenRouter(base64Image, question = "What's in this image?") {
|
||||||
|
try {
|
||||||
|
console.log('Initializing OpenAI client with OpenRouter...');
|
||||||
|
|
||||||
|
// Initialize the OpenAI client with OpenRouter base URL
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENROUTER_API_KEY,
|
||||||
|
baseURL: 'https://openrouter.ai/api/v1',
|
||||||
|
defaultHeaders: {
|
||||||
|
'HTTP-Referer': 'https://github.com/stabgan/openrouter-mcp-multimodal',
|
||||||
|
'X-Title': 'OpenRouter MCP Test'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Sending image for analysis to Qwen free model...');
|
||||||
|
// Create the message with text and image
|
||||||
|
const completion = await openai.chat.completions.create({
|
||||||
|
model: 'qwen/qwen2.5-vl-32b-instruct:free', // Using Qwen free model with vision capabilities
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: question
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: base64Image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Debug the completion response structure
|
||||||
|
console.log('\n----- Debug: API Response -----');
|
||||||
|
console.log(JSON.stringify(completion, null, 2));
|
||||||
|
console.log('----- End Debug -----\n');
|
||||||
|
|
||||||
|
// Check if completion has expected structure before accessing properties
|
||||||
|
if (completion && completion.choices && completion.choices.length > 0 && completion.choices[0].message) {
|
||||||
|
console.log('\n----- Analysis Results -----\n');
|
||||||
|
console.log(completion.choices[0].message.content);
|
||||||
|
console.log('\n----------------------------\n');
|
||||||
|
|
||||||
|
// Print additional information about the model used and token usage
|
||||||
|
console.log('Model used:', completion.model);
|
||||||
|
if (completion.usage) {
|
||||||
|
console.log('Token usage:');
|
||||||
|
console.log('- Prompt tokens:', completion.usage.prompt_tokens);
|
||||||
|
console.log('- Completion tokens:', completion.usage.completion_tokens);
|
||||||
|
console.log('- Total tokens:', completion.usage.total_tokens);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('Unexpected response structure from OpenRouter API.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return completion;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error analyzing image with OpenRouter:');
|
||||||
|
if (error.response) {
|
||||||
|
console.error('API error status:', error.status);
|
||||||
|
console.error('API error details:', JSON.stringify(error.response, null, 2));
|
||||||
|
} else if (error.cause) {
|
||||||
|
console.error('Error cause:', error.cause);
|
||||||
|
} else {
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function
|
||||||
|
*/
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
if (!process.env.OPENROUTER_API_KEY) {
|
||||||
|
throw new Error('OPENROUTER_API_KEY not found in environment variables. Create a .env file with your API key.');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Converting image at ${TEST_IMAGE_PATH} to base64...`);
|
||||||
|
const base64Image = await imageToBase64(TEST_IMAGE_PATH);
|
||||||
|
console.log('Image converted successfully!');
|
||||||
|
|
||||||
|
// Log the first 100 chars of the base64 string to verify format
|
||||||
|
console.log('Base64 string preview:', base64Image.substring(0, 100) + '...');
|
||||||
|
|
||||||
|
// Analyze the image
|
||||||
|
await analyzeImageWithOpenRouter(base64Image, "Please describe this image in detail. What do you see?");
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in main function:', error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the script
|
||||||
|
main();
|
||||||
1
test_base64.txt
Normal file
1
test_base64.txt
Normal file
File diff suppressed because one or more lines are too long
94
test_mcp_server.js
Normal file
94
test_mcp_server.js
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
// Test MCP server with image analysis
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
// Path to test image
|
||||||
|
const IMAGE_PATH = process.argv[2] || 'test.png';
|
||||||
|
|
||||||
|
// Function to convert image to base64
|
||||||
|
async function imageToBase64(imagePath) {
|
||||||
|
try {
|
||||||
|
// Read the file
|
||||||
|
const imageBuffer = await fs.readFile(imagePath);
|
||||||
|
|
||||||
|
// Determine MIME type based on file extension
|
||||||
|
const fileExt = path.extname(imagePath).toLowerCase();
|
||||||
|
let mimeType = 'application/octet-stream';
|
||||||
|
|
||||||
|
switch (fileExt) {
|
||||||
|
case '.png':
|
||||||
|
mimeType = 'image/png';
|
||||||
|
break;
|
||||||
|
case '.jpg':
|
||||||
|
case '.jpeg':
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
break;
|
||||||
|
case '.webp':
|
||||||
|
mimeType = 'image/webp';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
mimeType = 'image/png'; // Default to PNG
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64 and add the data URI prefix
|
||||||
|
const base64 = imageBuffer.toString('base64');
|
||||||
|
return `data:${mimeType};base64,${base64}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error converting image to base64:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main function to test the MCP server
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
console.log(`Converting image: ${IMAGE_PATH}`);
|
||||||
|
|
||||||
|
// Check if the image file exists
|
||||||
|
try {
|
||||||
|
await fs.access(IMAGE_PATH);
|
||||||
|
console.log(`Image file exists: ${IMAGE_PATH}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`Error: Image file does not exist: ${IMAGE_PATH}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the image to base64
|
||||||
|
const base64Image = await imageToBase64(IMAGE_PATH);
|
||||||
|
console.log('Image converted to base64 successfully.');
|
||||||
|
console.log(`Base64 length: ${base64Image.length} characters`);
|
||||||
|
|
||||||
|
// Create the request for analyze_image
|
||||||
|
const analyzeImageRequest = {
|
||||||
|
jsonrpc: '2.0',
|
||||||
|
id: '1',
|
||||||
|
method: 'mcp/call_tool',
|
||||||
|
params: {
|
||||||
|
tool: 'mcp_openrouter_analyze_image',
|
||||||
|
arguments: {
|
||||||
|
image_path: base64Image,
|
||||||
|
question: "What's in this image?",
|
||||||
|
model: 'qwen/qwen2.5-vl-32b-instruct:free'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Send the request to the MCP server's stdin
|
||||||
|
console.log('Sending request to MCP server...');
|
||||||
|
process.stdout.write(JSON.stringify(analyzeImageRequest) + '\n');
|
||||||
|
|
||||||
|
// The MCP server will write the response to stdout, which we can read
|
||||||
|
console.log('Waiting for response...');
|
||||||
|
|
||||||
|
// In a real application, you would read from the server's stdout stream
|
||||||
|
// Here we just wait for input to be processed by the MCP server
|
||||||
|
console.log('Request sent to MCP server. Check the server logs for the response.');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in main function:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the main function
|
||||||
|
main().catch(error => {
|
||||||
|
console.error("Unhandled error in main:", error);
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user