#!/usr/bin/env python3 """ Generate voice with custom Kimi-XXX filename using local Kokoro TTS Usage: generate_voice.py "Text to speak" [--voice af_bella] [--output-dir /tmp] [--speed 1.3] """ import argparse import json import os import sys import tempfile import urllib.request from datetime import datetime def generate_voice(text, voice="af_bella", output_dir="/tmp", model="tts-1", speed=1.3): """Generate voice file with Kimi-XXX filename""" # Generate unique filename: Kimi-YYYYMMDD-HHMMSS.ogg timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") filename = f"Kimi-{timestamp}.ogg" filepath = os.path.join(output_dir, filename) # Call local Kokoro TTS tts_url = "http://10.0.0.228:8880/v1/audio/speech" data = json.dumps({ "model": model, "input": text, "voice": voice, "speed": speed }).encode() req = urllib.request.Request( tts_url, data=data, headers={"Content-Type": "application/json"} ) try: with urllib.request.urlopen(req) as response: audio_data = response.read() # Save to file with open(filepath, "wb") as f: f.write(audio_data) # Estimate duration (rough: ~150 chars per minute at normal speed, adjusted for speed) estimated_duration = max(1, len(text) / 150 * 60 / speed) result = { "filepath": filepath, "filename": filename, "size_bytes": len(audio_data), "estimated_duration_seconds": round(estimated_duration, 1), "voice": voice, "speed": speed, "text": text } print(json.dumps(result)) return result except Exception as e: error_result = { "error": str(e), "filepath": None, "filename": None } print(json.dumps(error_result), file=sys.stderr) sys.exit(1) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate voice with Kimi-XXX filename") parser.add_argument("text", help="Text to convert to speech") parser.add_argument("--voice", default="af_bella", help="Voice ID (default: af_bella)") parser.add_argument("--output-dir", default="/tmp", help="Output directory (default: /tmp)") parser.add_argument("--model", default="tts-1", help="TTS model (default: tts-1)") parser.add_argument("--speed", type=float, default=1.3, help="Speech speed multiplier (default: 1.3)") args = parser.parse_args() generate_voice(args.text, args.voice, args.output_dir, args.model, args.speed)