Spaces:

ccclllwww
/

Assignment_V1

Build error

App Files Files Community

ccclllwww commited on May 1, 2025

Commit

5bd24ba

verified ·

1 Parent(s): 7463f5d

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -76

app.py CHANGED Viewed

@@ -16,23 +16,13 @@ import torch
 # ======================================
 # Initialize image captioning pipeline with pretrained model
-# Model source: Hugging Face Model Hub
 _image_caption_pipeline = pipeline(
     task="image-to-text",
     model="noamrot/FuseCap_Image_Captioning"
 )
 # Global model configuration constants
-_MODEL_NAME = "Qwen/Qwen3-1.7B"
-_THINKING_TOKEN_ID = 151668  # Special token marking thinking/content separation
-# Initialize model components once
-_tokenizer = AutoTokenizer.from_pretrained(_MODEL_NAME)
-_model = AutoModelForCausalLM.from_pretrained(
-    _MODEL_NAME,
-    torch_dtype="auto",
-    device_map="auto"
-)
 # Initialize TTS components once to avoid reloading
 _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
@@ -95,76 +85,19 @@ def generate_story_content(system_prompt: str, user_prompt: str) -> str:
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt}
         ]
-        # Format input using model-specific template
-        formatted_input = _tokenizer.apply_chat_template(
-            conversation_history,
-            tokenize=False,
-            add_generation_prompt=True,
-            enable_thinking=False
-        )
-        # Tokenize and prepare model inputs
-        model_inputs = _tokenizer(
-            [formatted_input],
-            return_tensors="pt"
-        ).to(_model.device)
-        # Generate text completion
-        generated_sequences = _model.generate(
-            **model_inputs,
-            max_new_tokens=150
-        )
         # Process and clean output
-        return _process_generated_output(
-            generated_sequences,
-            model_inputs.input_ids
-        )
     except Exception as error:
         raise RuntimeError(f"Story generation failed: {str(error)}") from error
-def _process_generated_output(generated_sequences: list, input_ids: list) -> str:
-    """
-    Processes raw model output to extract final content.
-    Args:
-        generated_sequences: Raw output sequences from model generation
-        input_ids: Original input token IDs used for generation
-    Returns:
-        Cleaned final content text
-    """
-    # Extract new tokens excluding original prompt
-    new_tokens = generated_sequences[0][len(input_ids[0]):].tolist()
-    # Find separation point between thinking and final content
-    separation_index = _find_thinking_separation(new_tokens)
-    # Decode and clean final content
-    return _tokenizer.decode(
-        new_tokens[separation_index:],
-        skip_special_tokens=True
-    ).strip("\n")
-def _find_thinking_separation(token_sequence: list) -> int:
-    """
-    Locates the boundary between thinking process and final content.
-    Args:
-        token_sequence: List of generated token IDs
-    Returns:
-        Index position marking the start of final content
-    """
-    try:
-        # Search from end for separation token
-        reverse_position = token_sequence[::-1].index(_THINKING_TOKEN_ID)
-        return len(token_sequence) - reverse_position
-    except ValueError:
-        return 0  # Return start if token not found
 def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
     """
     Convert text story to speech audio file using text-to-speech synthesis.
@@ -238,7 +171,6 @@ st.markdown("""
         margin: 20px 0;
         box-shadow: 0 4px 8px rgba(0,0,0,0.1);
     }
     /* Image caption styling */
     .image-caption {
         border-left: 4px solid #4CAF50;

 # ======================================
 # Initialize image captioning pipeline with pretrained model
 _image_caption_pipeline = pipeline(
     task="image-to-text",
     model="noamrot/FuseCap_Image_Captioning"
 )
 # Global model configuration constants
+_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B",max_new_tokens=100)
 # Initialize TTS components once to avoid reloading
 _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt}
         ]
+        # Generate the story
+        story=_text_generation_pipeline(conversation_history)
+        # Extract the stroy result
+        stroy_result=story[0]["generated_text"][2]["content"]
         # Process and clean output
+        return stroy_result
     except Exception as error:
         raise RuntimeError(f"Story generation failed: {str(error)}") from error
 def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
     """
     Convert text story to speech audio file using text-to-speech synthesis.
         margin: 20px 0;
         box-shadow: 0 4px 8px rgba(0,0,0,0.1);
     }
     /* Image caption styling */
     .image-caption {
         border-left: 4px solid #4CAF50;