jarvis-memory/skills/kimi-tts-custom/scripts/voice_reply.py

#!/usr/bin/env python3
"""
Generate voice with Kimi-XXX filename and send via Telegram (voice-only, no text)
Usage: voice_reply.py <chat_id> "Text to speak" [--voice af_bella] [--speed 1.3] [--bot-token TOKEN]
"""

import argparse
import json
import os
import sys
import subprocess
import tempfile
import urllib.request
from datetime import datetime

def generate_voice(text, voice="af_bella", output_dir="/tmp", model="tts-1", speed=1.3):
    """Generate voice file with Kimi-XXX filename"""

    # Generate unique filename: Kimi-YYYYMMDD-HHMMSS.ogg
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    filename = f"Kimi-{timestamp}.ogg"
    filepath = os.path.join(output_dir, filename)

    # Call local Kokoro TTS
    tts_url = "http://10.0.0.228:8880/v1/audio/speech"

    data = json.dumps({
        "model": model,
        "input": text,
        "voice": voice,
        "speed": speed
    }).encode()

    req = urllib.request.Request(
        tts_url,
        data=data,
        headers={"Content-Type": "application/json"}
    )

    try:
        with urllib.request.urlopen(req) as response:
            audio_data = response.read()

        with open(filepath, "wb") as f:
            f.write(audio_data)

        return filepath, filename

    except Exception as e:
        print(f"Error generating voice: {e}", file=sys.stderr)
        sys.exit(1)

def send_voice_telegram(chat_id, audio_path, bot_token=None):
    """Send voice message via Telegram"""

    # Get bot token from env or config
    if not bot_token:
        bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")

    if not bot_token:
        # Try to get from openclaw config
        try:
            result = subprocess.run(
                ["openclaw", "config", "get", "channels.telegram.botToken"],
                capture_output=True, text=True
            )
            bot_token = result.stdout.strip()
        except:
            pass

    if not bot_token:
        print("Error: No bot token found. Set TELEGRAM_BOT_TOKEN or provide --bot-token", file=sys.stderr)
        sys.exit(1)

    # Use openclaw CLI to send
    cmd = [
        "openclaw", "message", "send",
        "--channel", "telegram",
        "--target", chat_id,
        "--media", audio_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✅ Voice sent successfully to {chat_id}")
            return True
        else:
            print(f"Error sending voice: {result.stderr}", file=sys.stderr)
            return False
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        return False

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate and send voice-only reply")
    parser.add_argument("chat_id", help="Telegram chat ID to send to")
    parser.add_argument("text", help="Text to convert to speech")
    parser.add_argument("--voice", default="af_bella", help="Voice ID (default: af_bella)")
    parser.add_argument("--speed", type=float, default=1.3, help="Speech speed multiplier (default: 1.3)")
    parser.add_argument("--bot-token", help="Telegram bot token (or set TELEGRAM_BOT_TOKEN)")
    parser.add_argument("--keep-file", action="store_true", help="Don't delete temp file after sending")

    args = parser.parse_args()

    print(f"Generating voice for: {args.text[:50]}...")
    filepath, filename = generate_voice(args.text, args.voice, speed=args.speed)
    print(f"Generated: {filename}")

    print(f"Sending to {args.chat_id}...")
    success = send_voice_telegram(args.chat_id, filepath, args.bot_token)

    if success and not args.keep_file:
        os.remove(filepath)
        print(f"Cleaned up temp file")
    elif success:
        print(f"Kept file at: {filepath}")

    sys.exit(0 if success else 1)