AI视频生成从入门到精通:全栈实战指南

1. AI视频生成技术全景图

1.1 技术架构概览

graph TB
    A[AI视频生成系统] --> B[输入层]
    A --> C[处理层]
    A --> D[输出层]
    
    B --> B1[文本提示]
    B --> B2[图像输入]
    B --> B3[音频输入]
    B --> B4[视频输入]
    
    C --> C1[文本编码器]
    C --> C2[扩散模型]
    C --> C3[运动模型]
    C --> C4[时空一致性]
    
    D --> D1[视频输出]
    D --> D2[音频合成]
    D --> D3[字幕生成]
    D --> D4[特效处理]

1.2 主流平台对比分析

平台名称 核心技术 最大分辨率 时长限制 帧率 特色功能 成本
Runway Gen-2 扩散模型 1280x768 18秒 24fps 多模态输入、运动控制 $15-76/月
Pika 1.0 专有模型 1024x576 3秒免费 24fps 文本+图像视频 免费-$58/月
Stable Video Diffusion 开源模型 1024x576 25帧 3-30fps 完全开源、可定制 本地部署
Kaiber 多种模型 1280x720 无限制 30fps 风格转换、音乐视频 $5-30/月
Luma Dream Machine 扩散模型 1280x720 5秒 24fps 超快生成、逼真动画 免费试用
HeyGen 数字人 1920x1080 5分钟 30fps 数字人播报、语音克隆 $24-720/月
Sora (OpenAI) 扩散Transformer 1920x1080+ 60秒 30fps 物理模拟、长时一致性 未公开

2. 入门篇:5分钟快速上手

2.1 Runway Gen-2 零基础入门

# runway_quickstart.py
"""
Runway Gen-2 快速入门脚本
通过API调用生成第一个AI视频
"""

import requests
import json
import time
import os
from pathlib import Path
from dotenv import load_dotenv

class RunwayVideoGenerator:
    """Runway视频生成器"""
    
    def __init__(self, api_key=None):
        # 加载环境变量
        load_dotenv()
        self.api_key = api_key or os.getenv('RUNWAY_API_KEY')
        
        if not self.api_key:
            raise ValueError("请设置RUNWAY_API_KEY环境变量或传入api_key参数")
        
        # API端点
        self.base_url = "https://api.runwayml.com/v1"
        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        
        # 创建输出目录
        self.output_dir = Path("generated_videos")
        self.output_dir.mkdir(exist_ok=True)
        
        print(f"✅ Runway视频生成器初始化完成")
        print(f"📁 输出目录: {self.output_dir.absolute()}")
    
    def generate_from_text(self, prompt, **kwargs):
        """从文本生成视频"""
        print(f"🎬 正在生成视频: '{prompt[:50]}...'")
        
        # 构建请求参数
        data = {
            "prompt": prompt,
            **kwargs  # 其他参数
        }
        
        # 设置默认参数
        defaults = {
            "duration": 4,  # 秒
            "seed": int(time.time() * 1000) % 1000000,
            "upscale": True,
            "watermark": False,
            "interpolate": True
        }
        
        data = {**defaults, **data}
        
        try:
            # 发送生成请求
            response = requests.post(
                f"{self.base_url}/video/generate",
                headers=self.headers,
                json=data
            )
            
            if response.status_code == 200:
                result = response.json()
                video_id = result.get("id")
                
                print(f"📤 生成任务已提交,任务ID: {video_id}")
                
                # 轮询任务状态
                video_url = self._poll_generation_status(video_id)
                
                if video_url:
                    # 下载视频
                    video_path = self._download_video(video_url, prompt)
                    return video_path
                else:
                    print("❌ 视频生成失败")
                    return None
                    
            else:
                print(f"❌ API请求失败: {response.status_code}")
                print(f"响应: {response.text}")
                return None
                
        except Exception as e:
            print(f"❌ 生成过程中出错: {str(e)}")
            return None
    
    def generate_from_image(self, image_path, prompt=None, **kwargs):
        """从图像生成视频"""
        print(f"🖼️ 正在从图像生成视频: {image_path}")
        
        # 上传图像
        with open(image_path, 'rb') as f:
            files = {'image': f}
            upload_response = requests.post(
                f"{self.base_url}/files/upload",
                headers={"Authorization": f"Bearer {self.api_key}"},
                files=files
            )
        
        if upload_response.status_code != 200:
            print(f"❌ 图像上传失败: {upload_response.text}")
            return None
        
        image_data = upload_response.json()
        image_url = image_data.get("url")
        
        # 构建生成请求
        data = {
            "image_url": image_url,
            **kwargs
        }
        
        if prompt:
            data["prompt"] = prompt
        
        # 发送生成请求
        response = requests.post(
            f"{self.base_url}/video/generate/image-to-video",
            headers=self.headers,
            json=data
        )
        
        if response.status_code == 200:
            result = response.json()
            video_id = result.get("id")
            
            # 轮询状态并下载
            video_url = self._poll_generation_status(video_id)
            
            if video_url:
                video_path = self._download_video(
                    video_url, 
                    prompt or f"image_{Path(image_path).stem}"
                )
                return video_path
        
        return None
    
    def _poll_generation_status(self, task_id, max_attempts=60, interval=5):
        """轮询生成状态"""
        print("⏳ 等待视频生成...", end="", flush=True)
        
        for attempt in range(max_attempts):
            time.sleep(interval)
            
            status_response = requests.get(
                f"{self.base_url}/tasks/{task_id}",
                headers=self.headers
            )
            
            if status_response.status_code == 200:
                status_data = status_response.json()
                status = status_data.get("status")
                
                print(".", end="", flush=True)
                
                if status == "completed":
                    print("\n✅ 视频生成完成!")
                    return status_data.get("output", {}).get("video_url")
                elif status == "failed":
                    print(f"\n❌ 生成失败: {status_data.get('error', '未知错误')}")
                    return None
                # 继续等待...
            else:
                print(f"\n❌ 状态查询失败: {status_response.status_code}")
                return None
        
        print(f"\n❌ 生成超时({max_attempts * interval}秒)")
        return None
    
    def _download_video(self, video_url, prompt):
        """下载生成的视频"""
        try:
            # 生成文件名
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            safe_prompt = "".join(c for c in prompt[:50] if c.isalnum() or c in (' ', '-', '_')).rstrip()
            filename = f"{timestamp}_{safe_prompt}.mp4"
            filepath = self.output_dir / filename
            
            # 下载视频
            response = requests.get(video_url, stream=True)
            
            if response.status_code == 200:
                with open(filepath, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                
                print(f"💾 视频已保存: {filepath}")
                
                # 获取视频信息
                video_size = filepath.stat().st_size / (1024 * 1024)  # MB
                print(f"📊 文件大小: {video_size:.2f} MB")
                
                return filepath
            else:
                print(f"❌ 视频下载失败: {response.status_code}")
                return None
                
        except Exception as e:
            print(f"❌ 下载过程中出错: {str(e)}")
            return None
    
    def generate_batch(self, prompts, **kwargs):
        """批量生成视频"""
        results = []
        
        for i, prompt in enumerate(prompts, 1):
            print(f"\n{'='*50}")
            print(f"批次 {i}/{len(prompts)}: {prompt}")
            print(f"{'='*50}")
            
            video_path = self.generate_from_text(prompt, **kwargs)
            results.append({
                "prompt": prompt,
                "video_path": video_path,
                "success": video_path is not None
            })
            
            # 避免API限流
            if i < len(prompts):
                print("⏸️ 等待10秒继续...")
                time.sleep(10)
        
        # 生成报告
        self._generate_batch_report(results)
        
        return results
    
    def _generate_batch_report(self, results):
        """生成批量处理报告"""
        report_path = self.output_dir / "batch_report.md"
        
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write("# AI视频批量生成报告\n\n")
            f.write(f"生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            
            success_count = sum(1 for r in results if r['success'])
            f.write(f"## 统计信息\n")
            f.write(f"- 总任务数: {len(results)}\n")
            f.write(f"- 成功数: {success_count}\n")
            f.write(f"- 失败数: {len(results) - success_count}\n")
            f.write(f"- 成功率: {success_count/len(results)*100:.1f}%\n\n")
            
            f.write("## 详细结果\n\n")
            for result in results:
                status = "✅ 成功" if result['success'] else "❌ 失败"
                f.write(f"### {result['prompt']}\n")
                f.write(f"- 状态: {status}\n")
                if result['success']:
                    f.write(f"- 文件: {result['video_path']}\n")
                f.write("\n")
        
        print(f"\n📊 批量报告已生成: {report_path}")

# 使用示例
if __name__ == "__main__":
    # 初始化生成器
    generator = RunwayVideoGenerator()
    
    # 示例1: 单个文本生成
    print("示例1: 文本到视频生成")
    prompt = "A beautiful butterfly flying through a magical forest, cinematic lighting, 4K"
    video_path = generator.generate_from_text(prompt)
    
    if video_path:
        print(f"🎉 视频生成成功!文件: {video_path}")
    
    # 示例2: 图像到视频
    print("\n示例2: 图像到视频生成")
    image_path = "input_image.jpg"  # 替换为你的图片路径
    if Path(image_path).exists():
        video_path = generator.generate_from_image(
            image_path,
            prompt="The image comes to life with gentle movement"
        )
    
    # 示例3: 批量生成
    print("\n示例3: 批量生成")
    prompts = [
        "Sunrise over mountains, time-lapse, cinematic",
        "Underwater coral reef with tropical fish, beautiful lighting",
        "Cyberpunk city at night with flying cars, neon lights"
    ]
    
    results = generator.generate_batch(prompts, duration=3)

2.2 环境配置与API获取

# 环境配置完整指南

## 第一步:获取API密钥

### 1. Runway API
1. 访问 https://runwayml.com
2. 注册账号并登录
3. 进入Account Settings → API Keys
4. 点击"Create New API Key"
5. 复制API密钥

### 2. Pika API (如需)
1. 访问 https://pika.art
2. 加入Discord等待邀请
3. 获取API访问权限

## 第二步:本地环境配置

```bash
# 1. 安装Python 3.8+
python --version

# 2. 创建虚拟环境
python -m venv ai-video-env

# 3. 激活虚拟环境
# Windows:
ai-video-env\Scripts\activate
# Mac/Linux:
source ai-video-env/bin/activate

# 4. 安装依赖包
pip install requests python-dotenv pillow opencv-python

# 5. 创建环境变量文件
cat > .env << EOF
RUNWAY_API_KEY=your_runway_api_key_here
PIKA_API_KEY=your_pika_api_key_here
OPENAI_API_KEY=your_openai_api_key_for_sora
EOF

# 6. 创建项目结构
mkdir -p generated_videos/raw
mkdir -p generated_videos/processed
mkdir -p input_images
mkdir -p scripts

第三步:测试环境

# test_environment.py
import os
from dotenv import load_dotenv

def test_environment():
    load_dotenv()
    
    print("🔍 环境测试开始")
    print("="*40)
    
    # 检查API密钥
    apis = ['RUNWAY_API_KEY', 'PIKA_API_KEY']
    for api in apis:
        key = os.getenv(api)
        if key and len(key) > 20:
            print(f"✅ {api}: 已设置")
        elif key:
            print(f"⚠️  {api}: 可能无效(长度过短)")
        else:
            print(f"❌ {api}: 未设置")
    
    # 检查目录
    directories = ['generated_videos', 'input_images', 'scripts']
    for dir_name in directories:
        if os.path.exists(dir_name):
            print(f"✅ 目录 '{dir_name}': 存在")
        else:
            print(f"❌ 目录 '{dir_name}': 不存在")
    
    print("="*40)
    print("环境测试完成")

if __name__ == "__main__":
    test_environment()

第四步:快速验证

运行快速开始脚本,确保一切正常:
python runway_quickstart.py

## 3. 进阶篇:提示词工程与参数优化

### 3.1 高级提示词编写技巧
```python
# prompt_engineering.py
"""
AI视频生成提示词工程
"""

class VideoPromptEngineer:
    """视频提示词工程师"""
    
    def __init__(self):
        self.styles = {
            "cinematic": [
                "cinematic shot", "film grain", "shallow depth of field",
                "anamorphic lens flare", "color grading", "35mm film"
            ],
            "anime": [
                "anime style", "Japanese animation", "Studio Ghibli",
                "Makoto Shinkai", "sharp lines", "vibrant colors"
            ],
            "cyberpunk": [
                "cyberpunk 2077", "neon lights", "rainy night",
                "futuristic city", "holographic displays", "synthwave"
            ],
            "documentary": [
                "documentary style", "handheld camera", "natural lighting",
                "interview style", "b-roll footage", "voice over"
            ],
            "fantasy": [
                "fantasy art", "magical realism", "ethereal glow",
                "mythical creatures", "enchanted forest", "dreamlike"
            ]
        }
        
        self.camera_movements = {
            "静态": ["static shot", "locked camera", "tripod"],
            "平移": ["dolly shot", "tracking shot", "camera pan left/right"],
            "推进": ["zoom in", "push in", "dolly zoom"],
            "拉升": ["crane shot", "helicopter shot", "bird's eye view"],
            "手持": ["handheld camera", "shaky cam", "documentary style"],
            "环绕": ["orbit shot", "360 shot", "circular movement"]
        }
        
        self.lighting_types = {
            "自然光": ["natural lighting", "golden hour", "sunlight"],
            "戏剧光": ["dramatic lighting", "chiaroscuro", "high contrast"],
            "电影光": ["cinematic lighting", "moody", "rim light"],
            "霓虹光": ["neon lighting", "cyberpunk lights", "colorful"],
            "柔和光": ["soft lighting", "diffused", "even lighting"]
        }
    
    def construct_prompt(self, subject, action, style="cinematic", 
                         camera="dolly shot", lighting="cinematic lighting",
                         quality="4K", extra_details=None):
        """构建结构化提示词"""
        
        # 基础结构
        prompt_parts = [
            subject,
            action
        ]
        
        # 添加风格
        if style in self.styles:
            prompt_parts.extend(self.styles[style][:2])
        
        # 添加相机运动
        if camera in self.camera_movements:
            prompt_parts.append(camera)
        
        # 添加光照
        if lighting in self.lighting_types:
            prompt_parts.extend(self.lighting_types[lighting][:2])
        
        # 质量描述
        prompt_parts.append(quality)
        prompt_parts.append("high quality")
        
        # 额外细节
        if extra_details:
            if isinstance(extra_details, list):
                prompt_parts.extend(extra_details)
            else:
                prompt_parts.append(extra_details)
        
        # 构建最终提示词
        prompt = ", ".join(prompt_parts)
        
        return prompt
    
    def generate_scene_breakdown(self, main_prompt):
        """生成场景分解"""
        breakdown = {
            "main_prompt": main_prompt,
            "suggested_variations": [],
            "technical_specs": {},
            "style_references": []
        }
        
        # 分析提示词并生成变体
        variations = self._generate_variations(main_prompt)
        breakdown["suggested_variations"] = variations[:3]
        
        # 生成技术规格建议
        breakdown["technical_specs"] = self._suggest_technical_specs(main_prompt)
        
        # 提供风格参考
        breakdown["style_references"] = self._suggest_style_references(main_prompt)
        
        return breakdown
    
    def _generate_variations(self, prompt):
        """生成提示词变体"""
        variations = []
        
        # 变体1: 不同风格
        for style_name in list(self.styles.keys())[:3]:
            style_desc = self.styles[style_name][0]
            variation = f"{prompt}, {style_desc} style"
            variations.append(variation)
        
        # 变体2: 不同相机角度
        camera_options = ["low angle shot", "high angle shot", "Dutch angle"]
        for camera in camera_options:
            variation = f"{prompt}, {camera}"
            variations.append(variation)
        
        # 变体3: 不同时间段
        times = ["at sunrise", "at golden hour", "at night", "in the rain"]
        for time in times[:2]:
            variation = f"{prompt}, {time}"
            variations.append(variation)
        
        return variations
    
    def _suggest_technical_specs(self, prompt):
        """建议技术规格"""
        specs = {}
        
        # 基于提示词判断时长
        if any(word in prompt.lower() for word in ["slow motion", "timelapse"]):
            specs["duration"] = "8-12 seconds"
        elif any(word in prompt.lower() for word in ["action", "fast", "running"]):
            specs["duration"] = "4-6 seconds"
        else:
            specs["duration"] = "6-8 seconds"
        
        # 建议帧率
        if "slow motion" in prompt.lower():
            specs["frame_rate"] = "60fps (for slow motion)"
        else:
            specs["frame_rate"] = "24fps (cinematic)"
        
        # 建议宽高比
        if any(word in prompt.lower() for word in ["cinematic", "movie", "film"]):
            specs["aspect_ratio"] = "2.35:1 (cinemascope)"
        else:
            specs["aspect_ratio"] = "16:9 (standard)"
        
        return specs
    
    def _suggest_style_references(self, prompt):
        """建议风格参考"""
        references = []
        
        prompt_lower = prompt.lower()
        
        if any(word in prompt_lower for word in ["cyberpunk", "neon", "futuristic"]):
            references.append("Blade Runner 2049 (2017)")
            references.append("Cyberpunk 2077 (game)")
        
        if any(word in prompt_lower for word in ["fantasy", "magical", "enchanted"]):
            references.append("Lord of the Rings (movie)")
            references.append("Studio Ghibli films")
        
        if any(word in prompt_lower for word in ["anime", "Japanese"]):
            references.append("Your Name (2016)")
            references.append("Spirited Away (2001)")
        
        if any(word in prompt_lower for word in ["documentary", "realistic"]):
            references.append("BBC Planet Earth")
            references.append("National Geographic")
        
        return references
    
    def create_storyboard(self, scene_descriptions, transition="cut"):
        """创建分镜脚本"""
        storyboard = {
            "title": "AI Generated Video Storyboard",
            "scenes": [],
            "transitions": transition,
            "total_duration": 0
        }
        
        for i, description in enumerate(scene_descriptions, 1):
            scene = {
                "scene_number": i,
                "description": description,
                "duration": self._estimate_scene_duration(description),
                "shot_type": self._suggest_shot_type(description),
                "camera_movement": self._suggest_camera_movement(description),
                "lighting": self._suggest_lighting(description)
            }
            
            storyboard["scenes"].append(scene)
            storyboard["total_duration"] += scene["duration"]
        
        return storyboard
    
    def _estimate_scene_duration(self, description):
        """估计场景时长"""
        word_count = len(description.split())
        
        if word_count < 10:
            return 3  # 秒
        elif word_count < 20:
            return 5
        else:
            return 8
    
    def _suggest_shot_type(self, description):
        """建议镜头类型"""
        if "close up" in description.lower():
            return "Close-up"
        elif "wide shot" in description.lower() or "landscape" in description.lower():
            return "Wide shot"
        elif "medium shot" in description.lower():
            return "Medium shot"
        else:
            return "Medium close-up"
    
    def _suggest_camera_movement(self, description):
        """建议相机运动"""
        desc_lower = description.lower()
        
        if any(word in desc_lower for word in ["flying", "floating", "orbiting"]):
            return "Orbit shot"
        elif any(word in desc_lower for word in ["walking", "running", "moving"]):
            return "Tracking shot"
        elif "zoom" in desc_lower:
            return "Zoom"
        else:
            return "Static shot"
    
    def _suggest_lighting(self, description):
        """建议光照"""
        desc_lower = description.lower()
        
        if any(word in desc_lower for word in ["sunset", "sunrise", "golden"]):
            return "Golden hour lighting"
        elif any(word in desc_lower for word in ["night", "dark", "moonlight"]):
            return "Low-key lighting"
        elif any(word in desc_lower for word in ["bright", "sunny", "day"]):
            return "High-key lighting"
        else:
            return "Natural lighting"

# 使用示例
if __name__ == "__main__":
    engineer = VideoPromptEngineer()
    
    # 示例1: 构建结构化提示词
    print("示例1: 结构化提示词构建")
    prompt = engineer.construct_prompt(
        subject="A lone astronaut",
        action="floating in space near a colorful nebula",
        style="cinematic",
        camera="slow dolly zoom",
        lighting="dramatic lighting",
        quality="8K",
        extra_details=["stars twinkling", "reflection in visor"]
    )
    
    print(f"生成的提示词:\n{prompt}\n")
    
    # 示例2: 场景分解
    print("示例2: 提示词分解分析")
    breakdown = engineer.generate_scene_breakdown(prompt)
    
    print("提示词分析:")
    print(f"原提示词: {breakdown['main_prompt']}")
    print("\n推荐变体:")
    for i, variation in enumerate(breakdown['suggested_variations'], 1):
        print(f"{i}. {variation}")
    
    print("\n技术规格建议:")
    for key, value in breakdown['technical_specs'].items():
        print(f"{key}: {value}")
    
    print("\n风格参考:")
    for reference in breakdown['style_references']:
        print(f"- {reference}")
    
    # 示例3: 创建分镜脚本
    print("\n示例3: 分镜脚本生成")
    scenes = [
        "A spaceship approaching a distant planet",
        "Close-up of astronaut's face inside the helmet",
        "Wide shot of the colorful nebula in space",
        "Astronaut reaching out towards the camera"
    ]
    
    storyboard = engineer.create_storyboard(scenes, transition="fade")
    
    print(f"分镜标题: {storyboard['title']}")
    print(f"总时长: {storyboard['total_duration']}秒")
    print(f"转场方式: {storyboard['transitions']}")
    
    print("\n分镜详情:")
    for scene in storyboard['scenes']:
        print(f"\n场景 {scene['scene_number']}:")
        print(f"  描述: {scene['description']}")
        print(f"  时长: {scene['duration']}秒")
        print(f"  镜头: {scene['shot_type']}")
        print(f"  运动: {scene['camera_movement']}")
        print(f"  光照: {scene['lighting']}")

3.2 参数优化策略

# parameter_optimization.py
"""
AI视频生成参数优化
"""

import json
from dataclasses import dataclass
from typing import Dict, List, Optional
from enum import Enum

class ModelType(Enum):
    """模型类型枚举"""
    RUNWAY_GEN2 = "runway_gen2"
    STABLE_VIDEO = "stable_video"
    PIKA = "pika"
    LUMA = "luma"

class StylePreset(Enum):
    """风格预设"""
    CINEMATIC = "cinematic"
    ANIMATED = "animated"
    REALISTIC = "realistic"
    PAINTERLY = "painterly"
    SKETCH = "sketch"

@dataclass
class VideoGenerationParams:
    """视频生成参数"""
    # 基本参数
    prompt: str
    model_type: ModelType = ModelType.RUNWAY_GEN2
    
    # 时间参数
    duration_seconds: float = 4.0
    fps: int = 24
    
    # 质量参数
    seed: Optional[int] = None
    cfg_scale: float = 7.5  # 提示词遵循度
    steps: int = 50  # 扩散步数
    
    # 风格参数
    style_preset: Optional[StylePreset] = None
    negative_prompt: str = ""
    
    # 运动参数
    motion_scale: float = 1.0  # 运动强度
    camera_control: Dict = None
    
    def __post_init__(self):
        if self.camera_control is None:
            self.camera_control = {
                "pan": 0.0,
                "tilt": 0.0,
                "zoom": 0.0,
                "rotation": 0.0
            }

class ParameterOptimizer:
    """参数优化器"""
    
    def __init__(self):
        self.parameter_ranges = {
            "duration_seconds": {
                "min": 2.0,
                "max": 18.0,
                "step": 2.0,
                "description": "视频时长(秒)"
            },
            "cfg_scale": {
                "min": 3.0,
                "max": 15.0,
                "step": 1.0,
                "description": "提示词遵循度,越高越贴近提示词"
            },
            "steps": {
                "min": 20,
                "max": 100,
                "step": 10,
                "description": "扩散步数,越高质量越好但越慢"
            },
            "motion_scale": {
                "min": 0.1,
                "max": 2.0,
                "step": 0.2,
                "description": "运动强度"
            }
        }
        
        self.style_presets = {
            StylePreset.CINEMATIC: {
                "cfg_scale": 8.0,
                "steps": 60,
                "negative_prompt": "blurry, low quality, distorted, ugly",
                "description": "电影感风格"
            },
            StylePreset.ANIMATED: {
                "cfg_scale": 7.0,
                "steps": 40,
                "negative_prompt": "realistic, photo, photograph",
                "description": "动画风格"
            },
            StylePreset.REALISTIC: {
                "cfg_scale": 9.0,
                "steps": 80,
                "negative_prompt": "cartoon, anime, painting, drawing",
                "description": "写实风格"
            },
            StylePreset.PAINTERLY: {
                "cfg_scale": 6.5,
                "steps": 50,
                "negative_prompt": "photorealistic, sharp, detailed",
                "description": "油画风格"
            }
        }
    
    def optimize_for_scene(self, scene_description: str) -> VideoGenerationParams:
        """根据场景描述优化参数"""
        
        # 分析场景类型
        scene_type = self._analyze_scene_type(scene_description)
        
        # 基础参数
        params = VideoGenerationParams(
            prompt=scene_description,
            model_type=ModelType.RUNWAY_GEN2
        )
        
        # 根据场景类型调整参数
        if scene_type == "action":
            params.duration_seconds = 3.0
            params.fps = 30
            params.motion_scale = 1.5
            params.style_preset = StylePreset.CINEMATIC
            
        elif scene_type == "landscape":
            params.duration_seconds = 8.0
            params.fps = 24
            params.motion_scale = 0.3
            params.style_preset = StylePreset.REALISTIC
            
        elif scene_type == "portrait":
            params.duration_seconds = 4.0
            params.fps = 24
            params.motion_scale = 0.1
            params.style_preset = StylePreset.CINEMATIC
            
        elif scene_type == "abstract":
            params.duration_seconds = 6.0
            params.fps = 24
            params.motion_scale = 0.8
            params.style_preset = StylePreset.PAINTERLY
        
        # 应用风格预设
        if params.style_preset:
            preset = self.style_presets[params.style_preset]
            params.cfg_scale = preset["cfg_scale"]
            params.steps = preset["steps"]
            params.negative_prompt = preset["negative_prompt"]
        
        return params
    
    def _analyze_scene_type(self, description: str) -> str:
        """分析场景类型"""
        desc_lower = description.lower()
        
        action_keywords = ["running", "fighting", "exploding", "fast", "action"]
        landscape_keywords = ["mountain", "ocean", "forest", "landscape", "view"]
        portrait_keywords = ["face", "person", "portrait", "close-up", "character"]
        abstract_keywords = ["abstract", "pattern", "flow", "shape", "color"]
        
        if any(word in desc_lower for word in action_keywords):
            return "action"
        elif any(word in desc_lower for word in landscape_keywords):
            return "landscape"
        elif any(word in desc_lower for word in portrait_keywords):
            return "portrait"
        elif any(word in desc_lower for word in abstract_keywords):
            return "abstract"
        else:
            return "general"
    
    def generate_parameter_grid(self, base_params: VideoGenerationParams, 
                                variables: List[str]) -> List[VideoGenerationParams]:
        """生成参数网格用于A/B测试"""
        variations = []
        
        for var in variables:
            if var in self.parameter_ranges:
                range_info = self.parameter_ranges[var]
                current_value = getattr(base_params, var)
                
                # 生成三个值:偏低、中等、偏高
                low = max(range_info["min"], current_value - range_info["step"])
                high = min(range_info["max"], current_value + range_info["step"])
                
                for value in [low, current_value, high]:
                    if value != current_value:  # 跳过原始值
                        new_params = self._clone_params(base_params)
                        setattr(new_params, var, value)
                        variations.append(new_params)
        
        return variations
    
    def _clone_params(self, params: VideoGenerationParams) -> VideoGenerationParams:
        """克隆参数对象"""
        return VideoGenerationParams(
            prompt=params.prompt,
            model_type=params.model_type,
            duration_seconds=params.duration_seconds,
            fps=params.fps,
            seed=params.seed,
            cfg_scale=params.cfg_scale,
            steps=params.steps,
            style_preset=params.style_preset,
            negative_prompt=params.negative_prompt,
            motion_scale=params.motion_scale,
            camera_control=params.camera_control.copy()
        )
    
    def create_camera_animation(self, scene_duration: float, 
                               animation_type: str = "gentle_pan") -> Dict:
        """创建相机动画参数"""
        animations = {
            "gentle_pan": {
                "pan": {"start": -0.1, "end": 0.1},
                "tilt": {"start": 0.0, "end": 0.0},
                "zoom": {"start": 1.0, "end": 1.0},
                "rotation": {"start": 0.0, "end": 0.0}
            },
            "slow_zoom_in": {
                "pan": {"start": 0.0, "end": 0.0},
                "tilt": {"start": 0.0, "end": 0.0},
                "zoom": {"start": 1.0, "end": 1.3},
                "rotation": {"start": 0.0, "end": 0.0}
            },
            "orbit_shot": {
                "pan": {"start": -0.3, "end": 0.3},
                "tilt": {"start": -0.1, "end": 0.1},


与AI交互的提示工程:

AI工具详解教程:


>> AI热点技术目录