Files
jarvis-memory/skills/kimi-tts-custom/scripts/voice_reply.py

120 lines
3.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Generate voice with Kimi-XXX filename and send via Telegram (voice-only, no text)
Usage: voice_reply.py <chat_id> "Text to speak" [--voice af_bella] [--speed 1.3] [--bot-token TOKEN]
"""
import argparse
import json
import os
import sys
import subprocess
import tempfile
import urllib.request
from datetime import datetime
def generate_voice(text, voice="af_bella", output_dir="/tmp", model="tts-1", speed=1.3):
"""Generate voice file with Kimi-XXX filename"""
# Generate unique filename: Kimi-YYYYMMDD-HHMMSS.ogg
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
filename = f"Kimi-{timestamp}.ogg"
filepath = os.path.join(output_dir, filename)
# Call local Kokoro TTS
tts_url = "http://10.0.0.228:8880/v1/audio/speech"
data = json.dumps({
"model": model,
"input": text,
"voice": voice,
"speed": speed
}).encode()
req = urllib.request.Request(
tts_url,
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req) as response:
audio_data = response.read()
with open(filepath, "wb") as f:
f.write(audio_data)
return filepath, filename
except Exception as e:
print(f"Error generating voice: {e}", file=sys.stderr)
sys.exit(1)
def send_voice_telegram(chat_id, audio_path, bot_token=None):
"""Send voice message via Telegram"""
# Get bot token from env or config
if not bot_token:
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
if not bot_token:
# Try to get from openclaw config
try:
result = subprocess.run(
["openclaw", "config", "get", "channels.telegram.botToken"],
capture_output=True, text=True
)
bot_token = result.stdout.strip()
except:
pass
if not bot_token:
print("Error: No bot token found. Set TELEGRAM_BOT_TOKEN or provide --bot-token", file=sys.stderr)
sys.exit(1)
# Use openclaw CLI to send
cmd = [
"openclaw", "message", "send",
"--channel", "telegram",
"--target", chat_id,
"--media", audio_path
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"✅ Voice sent successfully to {chat_id}")
return True
else:
print(f"Error sending voice: {result.stderr}", file=sys.stderr)
return False
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate and send voice-only reply")
parser.add_argument("chat_id", help="Telegram chat ID to send to")
parser.add_argument("text", help="Text to convert to speech")
parser.add_argument("--voice", default="af_bella", help="Voice ID (default: af_bella)")
parser.add_argument("--speed", type=float, default=1.3, help="Speech speed multiplier (default: 1.3)")
parser.add_argument("--bot-token", help="Telegram bot token (or set TELEGRAM_BOT_TOKEN)")
parser.add_argument("--keep-file", action="store_true", help="Don't delete temp file after sending")
args = parser.parse_args()
print(f"Generating voice for: {args.text[:50]}...")
filepath, filename = generate_voice(args.text, args.voice, speed=args.speed)
print(f"Generated: {filename}")
print(f"Sending to {args.chat_id}...")
success = send_voice_telegram(args.chat_id, filepath, args.bot_token)
if success and not args.keep_file:
os.remove(filepath)
print(f"Cleaned up temp file")
elif success:
print(f"Kept file at: {filepath}")
sys.exit(0 if success else 1)