Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import os | |
| from huggingface_hub import login | |
| model_name_tinyllama = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| tokenizer_tinyllama = AutoTokenizer.from_pretrained(model_name_tinyllama) | |
| model_tinyllama = AutoModelForCausalLM.from_pretrained(model_name_tinyllama,torch_dtype=torch.float32,device_map={"": "cpu"}) | |
| def summarize_tinyllama(article): | |
| # For causal models like TinyLlama, summarization isn't a direct task like with encoder-decoder models. | |
| # We can prompt it to continue a summary. | |
| prompt="Summarize the following article clearly and concisely:" | |
| input_text = f"{prompt}\n{article}\nSummary:" | |
| inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True) | |
| # Generate tokens - the model will try to complete the input prompt. | |
| # We need to adjust generation parameters for open-ended generation. | |
| # max_new_tokens controls how much new text is generated after the prompt. | |
| outputs = model_tinyllama.generate( | |
| inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_new_tokens=500, # Generate up to 300 new tokens for the summary | |
| do_sample=True, # Don't sample, use greedy decoding | |
| temperature=0.7, | |
| min_new_tokens=150, | |
| top_p=0.9, | |
| pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed | |
| ) | |
| # Decode the entire output sequence. | |
| generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True) | |
| # The generated text will include the original prompt. We need to extract the summary part. | |
| # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output. | |
| summary_start_index = generated_text.find("Summary:") + len("Summary:") | |
| summary = generated_text[summary_start_index:].strip() | |
| return summary | |
| def answer_question_tinyllama(article, question): | |
| # Formulate the prompt to guide the TinyLlama model to answer the question based on the article. | |
| # We ask the model to act as an AI answering a question based on the provided text. | |
| input_text = f"From this Article: {article}\n\n Answer the below Question: {question}\n\nAnswer:" | |
| # Tokenize the input text | |
| # Truncate if the combined article and question is too long | |
| inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True) | |
| # Generate the answer using the model. | |
| # We use generate with parameters suitable for generating a concise answer. | |
| outputs = model_tinyllama.generate( | |
| inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_new_tokens=500, # Generate up to 100 new tokens for the answer | |
| do_sample=True, # Use sampling to potentially get more varied answers | |
| temperature=0.7, # Control randomness | |
| top_p=0.9, # Nucleus sampling | |
| pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed | |
| ) | |
| # Decode the generated sequence | |
| generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True) | |
| # The generated text will include the original prompt. We need to extract the answer part. | |
| # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output. | |
| answer_start_index = generated_text.find("Answer:") + len("Answer:") | |
| answer = generated_text[answer_start_index:].strip() | |
| # Basic cleanup: remove potential repetition of the question or prompt in the answer | |
| if answer.startswith(question): | |
| answer = answer[len(question):].strip() | |
| return answer | |
| st.title("Smart Article Insights Generator") | |
| st.markdown("Summarize an article or ask a question about it.") | |
| mode = st.radio("Select Mode", ["Summarize", "Answer Question"]) | |
| article_input = st.text_area("Article Text", height=300, placeholder="Paste the article here...") | |
| question_input = None | |
| if mode == "Answer Question": | |
| question_input = st.text_input("Question", placeholder="Enter your question here...") | |
| if st.button("Process"): | |
| if mode == "Summarize": | |
| if article_input: | |
| with st.spinner("Generating summary..."): | |
| output = summarize_tinyllama(article_input) | |
| st.subheader("Summary") | |
| st.write(output) | |
| else: | |
| st.warning("Please provide an article to summarize.") | |
| elif mode == "Answer Question": | |
| if article_input and question_input: | |
| with st.spinner("Generating answer..."): | |
| output = answer_question_tinyllama(article_input, question_input) | |
| st.subheader("Answer") | |
| st.write(output) | |
| elif not article_input: | |
| st.warning("Please provide an article to answer the question from.") | |
| elif not question_input: | |
| st.warning("Please provide a question to answer.") | |